PHP笔记网

革命尚未成功,同志仍须努力下载JDK17

作者:Albert.Wen  添加时间:2022-08-26 23:37:09  修改时间:2025-01-17 22:10:19  分类:03.数据采集/爬虫  编辑

【Selenium】Java版 详细教程

样板代码

# 多个元素

// 分页码:a标签
List<WebElement> elementPageNums = EmptyHelper.emptyArrayList();
try {
	elementPageNums = new WebDriverWait(driver, Duration.ofMillis(Timeout.T_10_sec))
		.until(ExpectedConditions.presenceOfAllElementsLocatedBy(By.cssSelector(".s-pagination-container .s-pagination-strip a")));
} catch (Exception e) {
	errMsg = "没有查询到【分页条1】";
	log.error(errMsg, new Throwable());
}

# 单个元素

// 暂停几秒
ThreadUtil.sleep(Timeout.T_3_sec);

// 输入 登录名
try {
	WebElement elementUserName = new WebDriverWait(driver, Duration.ofMillis(Timeout.T_10_sec))
		.until(ExpectedConditions.presenceOfElementLocated(By.cssSelector("#ap_email")));
	if (ObjectUtil.isNotNull(elementUserName)) {
		elementUserName.clear();
		elementUserName.sendKeys(account.getUsername());
	}
} catch (Exception e) {
	errMsg = "未找到【登录名】输入框";
	log.error(errMsg, e);
	return res.setMsg(errMsg);
}

// 暂停几秒
ThreadUtil.sleep(Timeout.T_3_sec);

// 点击 登录.下一步按钮
try {
	WebElement elementLoginNext = new WebDriverWait(driver, Duration.ofMillis(Timeout.T_10_sec))
		.until(ExpectedConditions.elementToBeClickable(By.cssSelector("#continue")));
	if (ObjectUtil.isNotNull(elementLoginNext)) {
		elementLoginNext.click();
	}
} catch (Exception e) {
	errMsg = "未找到【登录.下一步】按钮";
	log.error(errMsg, e);
	return res.setMsg(errMsg);
}

SeleniumHelper.java

package com.wanma.framework_noweb.helper;

import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.io.resource.ResourceUtil;
import cn.hutool.core.thread.ThreadUtil;
import cn.hutool.core.util.ArrayUtil;
import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.StrUtil;
import com.wanma.framework_noweb.config.Timeout;
import com.wanma.framework_noweb.constant.PageName;
import org.openqa.selenium.WindowType;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;

import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;

/**
 * Selenium 组手类
 *
 * @author Albert
 * @date 2022/8/25
 */
public class SeleniumHelper {
    private static ChromeDriver driver;
    private static final Map<String, String> pageHandleMap = new LinkedHashMap<>();

    // 预设置6个页面
    private static final String[] pageNameArr = {
        PageName.page1, PageName.page2, PageName.page3,
        PageName.page4, PageName.page5, PageName.page6
    };

    /**
     * 实例化 ChromeDriver 单例对象
     */
    public static ChromeDriver getSingleDriver() {
        if (ObjectUtil.isNull(driver)) {
            // Chrome选项
            ChromeOptions options = new ChromeOptions();

            // 浏览器窗口 最大化
            // options.addArguments("--start-maximized");

            // 忽略掉证书错误
            // /36.html
            // options.setExperimentalOption("excludeSwitches", "ignore-certificate-errors");
            // options.setExperimentalOption("excludeSwitches", "enable-automation");

            // 禁止显示:Chrome 正在受到自动软件的控制
            options.setExperimentalOption("excludeSwitches", CollectionUtil.toList("enable-automation", "ignore-certificate-errors"));
            // 禁止显示:请停用以开发者…
            options.setExperimentalOption("useAutomationExtension", false);
            options.setExperimentalOption("w3c", false);
            // 禁止显示:“保存密码”弹框
            Map<String, Object> prefsMap = new HashMap<>();
            prefsMap.put("credentials_enable_service", false);
            prefsMap.put("profile.password_manager_enabled", false);
            options.setExperimentalOption("prefs", prefsMap);

            options.addArguments("lang=zh-CN,zh,zh-TW,en-US,en");
            options.addArguments("user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36");
            // options.addArguments("disable-infobars");
            // options.addArguments("--no-sandbox");
            options.addArguments("disable-blink-features=AutomationControlled"); // 就是这一行告诉chrome去掉了webdriver痕迹
            options.addArguments("--ignore-certificate-errors"); // 忽略掉证书错误
            options.addArguments("--disable-extensions");

            // 屏蔽 youtube、google等网站的资源
            options.addArguments("--host-resolver-rules=MAP www.youtube.com 127.0.0.1" +
                ",MAP www.google.com 127.0.0.1" +
                ",MAP cmacgm.matomo.cloud 127.0.0.1" +
                ",MAP ec.walkme.com 127.0.0.1" +
                ",MAP www.googletagmanager.com 127.0.0.1" +
                ",MAP www.freeprivacypolicy.com 127.0.0.1" +
                ",MAP www.googletagmanager.com 127.0.0.1" +
                ",MAP s.go-mpulse.net 127.0.0.1" +
                // ",MAP cdnjs.cloudflare.com 127.0.0.1" +
                ",MAP geolocation.onetrust.com 127.0.0.1"
            );

            ChromeDriver _driver = new ChromeDriver(options);

            // 突破【反爬虫】,抹去 Chrome浏览器 的 Selenium 属性
            Map<String, Object> parameters = new HashMap<>();
            byte[] jsData = ResourceUtil.readBytes("stealth.min.js");
            String stealth = StrUtil.str(jsData, Charset.defaultCharset());
            parameters.put("source", stealth);
            _driver.executeCdpCommand("Page.addScriptToEvaluateOnNewDocument", parameters);

            // 浏览器窗口 最大化
            _driver.manage().window().maximize();

            driver = _driver;
        }
        return driver;
    }

    /**
     * 切换页面
     */
    public static void switchPage(String pageName) {
        // 限制取值访问
        if (!ArrayUtil.contains(pageNameArr, pageName)) {
            throw new RuntimeException("参数pageName值错误");
        }

        // 特殊处理第1个页面
        if (ObjectUtil.isEmpty(pageHandleMap)) {
            pageHandleMap.put(PageName.page1, getSingleDriver().getWindowHandle());
        }

        boolean newHandle = true;
        String pageHandle = pageHandleMap.getOrDefault(pageName, "");

        if (StrUtil.isNotEmpty(pageHandle)
            && CollectionUtil.contains(getSingleDriver().getWindowHandles(), pageHandle)) {
            newHandle = false;
        }

        if (newHandle) {
            pageHandle = getSingleDriver().switchTo().newWindow(WindowType.TAB).getWindowHandle();
            pageHandleMap.put(pageName, pageHandle);
        } else {
            getSingleDriver().switchTo().window(pageHandle);
        }
    }

    /**
     * 加载页面(失败重试3次)
     */
    public static void navigate(String pageName, String url) {
        int tryMax = 3;
        int tryTimes = 1;
        boolean ret = _navigate(pageName, url);
        while (!ret && (tryTimes < tryMax)) {
            ret = _navigate(pageName, url);
            tryTimes++;
            ThreadUtil.sleep(Timeout.T_2_sec);
        }
    }

    private static boolean _navigate(String pageName, String url) {
        boolean ret = true;
        try {
            switchPage(pageName);
            getSingleDriver().get(url);
        } catch (Exception e) {
            ret = false;
            System.out.println("【加载页面出错】" + FileUtil.getLineSeparator() + url);
            e.printStackTrace();
        }
        return ret;
    }

    /**
     * 加载页面(默认窗口)
     */
    public static void navigate(String url) {
        navigate(PageName.page1, url);
    }

    /**
     * 获取当前页面URL
     */
    public static String getCurrentUrl() {
        return StrUtil.removeSuffix(getSingleDriver().getCurrentUrl(), "/");
    }
}

UrlHelper.java

package com.wanma.framework_noweb.helper;

import cn.hutool.core.net.url.UrlBuilder;
import cn.hutool.core.util.StrUtil;

/**
 * Url助手类
 */
public class UrlHelper {
    /**
     * 获取参数值
     */
    public static String getParam(String url, String name) {
        if (StrUtil.isEmpty(url) || StrUtil.isEmpty(name)) {
            return "";
        }
        return StringHelper.toString(UrlBuilder.of(url).getQuery().get(name));
    }

    /**
     * 格式化URL
     */
    public static String formatUrl(String url, String siteUrl) {
        if (StrUtil.isEmpty(url)) {
            return "";
        }
        if (!StrUtil.contains(url, "http")) {
            url = siteUrl + url;
        }
        return addLanguage(url);
    }

    /**
     * 添加 语言参数
     */
    public static String addLanguage(String url) {
        if (!StrUtil.contains(url, "?")) {
            return url + "?language=en_US";
        }
        return url + "&language=en_US";
    }
}