Selenium收藏 、Playwright官网教程 ,
自动下载(浏览器、驱动)安装位置:C:\Users\wenjianbao\AppData\Local\ms-playwright\下载地址
- Playwrigh(python)微软浏览器自动化教程(一)
- Playwright(python)微软浏览器自动化教程(二) —— 核心概念:Browser、Context、Page、Frame (讲清楚了Context是什么 !!)
- Playwright+Bs4爬取微信公众号文章链接
- 在Playwright测试中切换选项卡 —— 关键方法:page1.bringToFront();
- 【荐】Playwright入门
- Playwright选择器
- 提取Playwright录制文件中的元素定位信息
- 扩展Playwright自动等待方法(1) —— [已放弃] 当重新打开一个页面时,page1.click('#username'); 方法不好使,它不会去等待元素是否有点击的效果,导致 期望的点击效果没出现!
- 扩展Playwright自动等待方法(2)
- Playwright屏幕元素截图并保存至allure报告
- 在Playwright中保持登录状态
- Web自动化测试
- playwright--自动化
- 反爬设置
- 捕获异常
- Playwright上手学习1
- 使用
time.sleep()会导致超时问题:
建议使用page.wait_for_timeout(毫秒)替换time module的time.sleep(秒),会很好改善超时问题。因为Playwright内部是依赖于异步操作,而当使用time.sleep(秒)时,无法正确处理这种情况。
- 使用
- Playwright上手学习2
- Playwright上手学习3
- Playwright上手学习4
- 强大易用!新一代爬虫利器 Playwright 的介绍
- Python中web端自动化神器Playwright —— 自动下载 浏览器
- playwright 爬虫使用
- playwright网络爬虫实战案例分享
- Playwright实战案例之爬取js加密数据
- Hello Playwright:(8)等待页面加载
- 爬虫系列(4) playwright使用说明
- 如何用Playwright进行网页抓取?
- 【playwright】使用playwright实现拖动功能(拖动 验证码)
- Playwright - 滚动条操作
- 用playwright实现访问目标网页并截图,提取重要数据处理后一起推送至钉钉
- python发帖技巧_Playwright-python实现多端发帖 一文多发
- Playwright-新一代自动化工具 > 酱紫写爬虫?
- playwright自动化 绕过验证码 cookies登录
- win10系统开发环境快速安装Playwright python的方法
- Playwright: 比 Puppeteer 更好用的浏览器自动化工具
- 爬虫框架Playwright在Java环境下的开发实践
- Java中使用playwright
- Playwright-java 自定义目录和浏览器 兼容win7
- 如何使用Playwright对Java API实现自动视觉测试
自动识别验证码
Nodejs Playwright 2Captcha 验证码识别实现自动登录
基于PlayWright的爬虫项目
- crawloop:基于PlayWright实现对js渲染的动态网页进行抓取,包含网页源码、截图、网站入口、网页交互过程等,支持优先级任务调度。
- BdfbSpider:基于playwright-java开发的多线程爬虫框架,属于playwright-java的高阶用法
(1) PlaywrightHelper.java
package com.wanma.framework_noweb.helper;
import cn.hutool.core.collection.ListUtil;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.util.ArrayUtil;
import cn.hutool.core.util.ObjectUtil;
import com.microsoft.playwright.*;
import com.wanma.apps.constant.PageName;
import lombok.extern.slf4j.Slf4j;
import java.util.HashMap;
import java.util.Map;
/**
* Playwright 助手类
*/
@Slf4j
public class PlaywrightHelper {
private static Playwright playwright;
private static Browser browser;
private static BrowserContext context;
private static Page page1;
private static Page page2;
private static Page page3;
private static Page page4;
private static Page page5;
private static Page page6;
private static final String[] pageNames = {
PageName.page1, PageName.page2, PageName.page3,
PageName.page4, PageName.page5, PageName.page6
};
/**
* 实例化 Playwright 单例对象
*/
public static Playwright getSinglePlaywright() {
if (ObjectUtil.isNull(playwright)) {
Map<String, String> envMap = new HashMap<>();
// 跳过下载浏览器,因为公司是内网,这个配置很重要
envMap.put("PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD", "1");
// 跳过下载浏览器后配置浏览器位置
// 默认地址为:C:/Users/wenjianbao/AppData/Local/ms-playwright
envMap.put("PLAYWRIGHT_BROWSERS_PATH", "c:/java/ms-playwright");
playwright = Playwright.create(new Playwright.CreateOptions().setEnv(envMap));
}
return playwright;
}
/**
* 实例化 浏览器模块 单例对象
*/
private static Browser _getSingleBrowser() {
if (ObjectUtil.isNull(browser)) {
browser = getSinglePlaywright().chromium().launch(
new BrowserType.LaunchOptions()
.setHeadless(false) // 取消无头模式,这样才能看见浏览器操作
// .setChannel("chrome")
.setTimeout(60000) // 超时时间:1分钟
.setArgs(ListUtil.of("--start-maximized")) // 窗口最大化
);
}
return browser;
}
/**
* 实例化 浏览器窗口 单例对象
*/
private static BrowserContext _getSingleContext() {
if (ObjectUtil.isNull(context)) {
context = _getSingleBrowser().newContext(
new Browser.NewContextOptions()
.setViewportSize(null) // 配合前面的 窗口最大化,这里必须设置为null
);
}
return context;
}
/**
* 新建 页面
*/
private static Page _newPage() {
Page page = _getSingleContext().newPage();
// 超时时间:40秒
page.setDefaultTimeout(40000);
page.setDefaultNavigationTimeout(40000);
return page;
}
public static Page getPage(String pageName) {
if (!ArrayUtil.contains(pageNames, pageName)) {
throw new RuntimeException("参数pageName值错误");
}
// 页面2
if (pageName.equals(PageName.page2)) {
if (ObjectUtil.isNull(page2) || page2.isClosed()) {
page2 = _newPage();
}
return page2;
}
// 页面3
else if (pageName.equals(PageName.page3)) {
if (ObjectUtil.isNull(page3) || page3.isClosed()) {
page3 = _newPage();
}
return page3;
}
// 页面4
else if (pageName.equals(PageName.page4)) {
if (ObjectUtil.isNull(page4) || page4.isClosed()) {
page4 = _newPage();
}
return page4;
}
// 页面5
else if (pageName.equals(PageName.page5)) {
if (ObjectUtil.isNull(page5) || page5.isClosed()) {
page5 = _newPage();
}
return page5;
}
// 页面6
else if (pageName.equals(PageName.page6)) {
if (ObjectUtil.isNull(page6) || page6.isClosed()) {
page6 = _newPage();
}
return page6;
}
// 页面1
else {
if (ObjectUtil.isNull(page1) || page1.isClosed()) {
page1 = _newPage();
}
return page1;
}
}
/**
* 加载 页面URL
*/
public static void navigate(String pageName, String url) {
try {
getPage(pageName).bringToFront();
getPage(pageName).navigate(url);
} catch (Exception e) {
System.out.println("【加载页面超时】" + FileUtil.getLineSeparator() + url);
e.printStackTrace();
}
}
/**
* 返回元素的outerHTML值
*/
public static String getOuterHTML(ElementHandle element) {
if (ObjectUtil.isNull(element)) {
return "";
}
try {
Object html = element.evaluate("node => node.outerHTML");
return StringHelper.toString(html);
} catch (Exception e) {
System.out.println("【获取outerHTML出错】");
e.printStackTrace();
}
return "";
}
}
(2) UrlHelper.java
package com.wanma.framework_noweb.helper;
import cn.hutool.core.net.url.UrlBuilder;
import cn.hutool.core.util.StrUtil;
/**
* Url助手类
*/
public class UrlHelper {
/**
* 获取参数值
*/
public static String getParam(String url, String name) {
if (StrUtil.isEmpty(url) || StrUtil.isEmpty(name)) {
return "";
}
return StringHelper.toString(UrlBuilder.of(url).getQuery().get(name));
}
/**
* 格式化URL
*/
public static String formatUrl(String url, String siteUrl) {
if (StrUtil.isEmpty(url)) {
return "";
}
if (!StrUtil.contains(url, "http")) {
url = siteUrl + url;
}
return addLanguage(url);
}
/**
* 添加 语言参数
*/
public static String addLanguage(String url) {
if (!StrUtil.contains(url, "?")) {
return url + "?language=en_US";
}
return url + "&language=en_US";
}
}