不灭的火

革命尚未成功,同志仍须努力下载JDK17

作者:AlbertWen  添加时间:2022-08-26 23:37:09  修改时间:2025-03-31 11:01:01  分类:04.数据采集/爬虫  编辑

【Selenium】Java版 详细教程

样板代码

# 多个元素

1
2
3
4
5
6
7
8
9
// 分页码:a标签
List<WebElement> elementPageNums = EmptyHelper.emptyArrayList();
try {
    elementPageNums = new WebDriverWait(driver, Duration.ofMillis(Timeout.T_10_sec))
        .until(ExpectedConditions.presenceOfAllElementsLocatedBy(By.cssSelector(".s-pagination-container .s-pagination-strip a")));
} catch (Exception e) {
    errMsg = "没有查询到【分页条1】";
    log.error(errMsg, new Throwable());
}

# 单个元素

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
// 暂停几秒
ThreadUtil.sleep(Timeout.T_3_sec);
 
// 输入 登录名
try {
    WebElement elementUserName = new WebDriverWait(driver, Duration.ofMillis(Timeout.T_10_sec))
        .until(ExpectedConditions.presenceOfElementLocated(By.cssSelector("#ap_email")));
    if (ObjectUtil.isNotNull(elementUserName)) {
        elementUserName.clear();
        elementUserName.sendKeys(account.getUsername());
    }
} catch (Exception e) {
    errMsg = "未找到【登录名】输入框";
    log.error(errMsg, e);
    return res.setMsg(errMsg);
}
 
// 暂停几秒
ThreadUtil.sleep(Timeout.T_3_sec);
 
// 点击 登录.下一步按钮
try {
    WebElement elementLoginNext = new WebDriverWait(driver, Duration.ofMillis(Timeout.T_10_sec))
        .until(ExpectedConditions.elementToBeClickable(By.cssSelector("#continue")));
    if (ObjectUtil.isNotNull(elementLoginNext)) {
        elementLoginNext.click();
    }
} catch (Exception e) {
    errMsg = "未找到【登录.下一步】按钮";
    log.error(errMsg, e);
    return res.setMsg(errMsg);
}

SeleniumHelper.java

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
package com.wanma.framework_noweb.helper;
 
import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.io.resource.ResourceUtil;
import cn.hutool.core.thread.ThreadUtil;
import cn.hutool.core.util.ArrayUtil;
import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.StrUtil;
import com.wanma.framework_noweb.config.Timeout;
import com.wanma.framework_noweb.constant.PageName;
import org.openqa.selenium.WindowType;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
 
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
 
/**
 * Selenium 组手类
 *
 * @author Albert
 * @date 2022/8/25
 */
public class SeleniumHelper {
    private static ChromeDriver driver;
    private static final Map<String, String> pageHandleMap = new LinkedHashMap<>();
 
    // 预设置6个页面
    private static final String[] pageNameArr = {
        PageName.page1, PageName.page2, PageName.page3,
        PageName.page4, PageName.page5, PageName.page6
    };
 
    /**
     * 实例化 ChromeDriver 单例对象
     */
    public static ChromeDriver getSingleDriver() {
        if (ObjectUtil.isNull(driver)) {
            // Chrome选项
            ChromeOptions options = new ChromeOptions();
 
            // 浏览器窗口 最大化
            // options.addArguments("--start-maximized");
 
            // 忽略掉证书错误
            // /36.html
            // options.setExperimentalOption("excludeSwitches", "ignore-certificate-errors");
            // options.setExperimentalOption("excludeSwitches", "enable-automation");
 
            // 禁止显示:Chrome 正在受到自动软件的控制
            options.setExperimentalOption("excludeSwitches", CollectionUtil.toList("enable-automation", "ignore-certificate-errors"));
            // 禁止显示:请停用以开发者…
            options.setExperimentalOption("useAutomationExtension", false);
            options.setExperimentalOption("w3c", false);
            // 禁止显示:“保存密码”弹框
            Map<String, Object> prefsMap = new HashMap<>();
            prefsMap.put("credentials_enable_service", false);
            prefsMap.put("profile.password_manager_enabled", false);
            options.setExperimentalOption("prefs", prefsMap);
 
            options.addArguments("lang=zh-CN,zh,zh-TW,en-US,en");
            options.addArguments("user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36");
            // options.addArguments("disable-infobars");
            // options.addArguments("--no-sandbox");
            options.addArguments("disable-blink-features=AutomationControlled"); // 就是这一行告诉chrome去掉了webdriver痕迹
            options.addArguments("--ignore-certificate-errors"); // 忽略掉证书错误
            options.addArguments("--disable-extensions");
 
            // 屏蔽 youtube、google等网站的资源
            options.addArguments("--host-resolver-rules=MAP www.youtube.com 127.0.0.1" +
                ",MAP www.google.com 127.0.0.1" +
                ",MAP cmacgm.matomo.cloud 127.0.0.1" +
                ",MAP ec.walkme.com 127.0.0.1" +
                ",MAP www.googletagmanager.com 127.0.0.1" +
                ",MAP www.freeprivacypolicy.com 127.0.0.1" +
                ",MAP www.googletagmanager.com 127.0.0.1" +
                ",MAP s.go-mpulse.net 127.0.0.1" +
                // ",MAP cdnjs.cloudflare.com 127.0.0.1" +
                ",MAP geolocation.onetrust.com 127.0.0.1"
            );
 
            ChromeDriver _driver = new ChromeDriver(options);
 
            // 突破【反爬虫】,抹去 Chrome浏览器 的 Selenium 属性
            Map<String, Object> parameters = new HashMap<>();
            byte[] jsData = ResourceUtil.readBytes("stealth.min.js");
            String stealth = StrUtil.str(jsData, Charset.defaultCharset());
            parameters.put("source", stealth);
            _driver.executeCdpCommand("Page.addScriptToEvaluateOnNewDocument", parameters);
 
            // 浏览器窗口 最大化
            _driver.manage().window().maximize();
 
            driver = _driver;
        }
        return driver;
    }
 
    /**
     * 切换页面
     */
    public static void switchPage(String pageName) {
        // 限制取值访问
        if (!ArrayUtil.contains(pageNameArr, pageName)) {
            throw new RuntimeException("参数pageName值错误");
        }
 
        // 特殊处理第1个页面
        if (ObjectUtil.isEmpty(pageHandleMap)) {
            pageHandleMap.put(PageName.page1, getSingleDriver().getWindowHandle());
        }
 
        boolean newHandle = true;
        String pageHandle = pageHandleMap.getOrDefault(pageName, "");
 
        if (StrUtil.isNotEmpty(pageHandle)
            && CollectionUtil.contains(getSingleDriver().getWindowHandles(), pageHandle)) {
            newHandle = false;
        }
 
        if (newHandle) {
            pageHandle = getSingleDriver().switchTo().newWindow(WindowType.TAB).getWindowHandle();
            pageHandleMap.put(pageName, pageHandle);
        } else {
            getSingleDriver().switchTo().window(pageHandle);
        }
    }
 
    /**
     * 加载页面(失败重试3次)
     */
    public static void navigate(String pageName, String url) {
        int tryMax = 3;
        int tryTimes = 1;
        boolean ret = _navigate(pageName, url);
        while (!ret && (tryTimes < tryMax)) {
            ret = _navigate(pageName, url);
            tryTimes++;
            ThreadUtil.sleep(Timeout.T_2_sec);
        }
    }
 
    private static boolean _navigate(String pageName, String url) {
        boolean ret = true;
        try {
            switchPage(pageName);
            getSingleDriver().get(url);
        } catch (Exception e) {
            ret = false;
            System.out.println("【加载页面出错】" + FileUtil.getLineSeparator() + url);
            e.printStackTrace();
        }
        return ret;
    }
 
    /**
     * 加载页面(默认窗口)
     */
    public static void navigate(String url) {
        navigate(PageName.page1, url);
    }
 
    /**
     * 获取当前页面URL
     */
    public static String getCurrentUrl() {
        return StrUtil.removeSuffix(getSingleDriver().getCurrentUrl(), "/");
    }
}

UrlHelper.java

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
package com.wanma.framework_noweb.helper;
 
import cn.hutool.core.net.url.UrlBuilder;
import cn.hutool.core.util.StrUtil;
 
/**
 * Url助手类
 */
public class UrlHelper {
    /**
     * 获取参数值
     */
    public static String getParam(String url, String name) {
        if (StrUtil.isEmpty(url) || StrUtil.isEmpty(name)) {
            return "";
        }
        return StringHelper.toString(UrlBuilder.of(url).getQuery().get(name));
    }
 
    /**
     * 格式化URL
     */
    public static String formatUrl(String url, String siteUrl) {
        if (StrUtil.isEmpty(url)) {
            return "";
        }
        if (!StrUtil.contains(url, "http")) {
            url = siteUrl + url;
        }
        return addLanguage(url);
    }
 
    /**
     * 添加 语言参数
     */
    public static String addLanguage(String url) {
        if (!StrUtil.contains(url, "?")) {
            return url + "?language=en_US";
        }
        return url + "&language=en_US";
    }
}