Selenium收藏 、Playwright官网教程 ,
自动下载(浏览器、驱动)安装位置:C:\Users\wenjianbao\AppData\Local\ms-playwright\
下载地址
- Playwrigh(python)微软浏览器自动化教程(一)
- Playwright(python)微软浏览器自动化教程(二) —— 核心概念:Browser、Context、Page、Frame (讲清楚了Context是什么 !!)
- Playwright+Bs4爬取微信公众号文章链接
- 在Playwright测试中切换选项卡 —— 关键方法:page1.bringToFront();
- 【荐】Playwright入门
- Playwright选择器
- 提取Playwright录制文件中的元素定位信息
- 扩展Playwright自动等待方法(1) —— [已放弃] 当重新打开一个页面时,page1.click('#username'); 方法不好使,它不会去等待元素是否有点击的效果,导致 期望的点击效果没出现!
- 扩展Playwright自动等待方法(2)
- Playwright屏幕元素截图并保存至allure报告
- 在Playwright中保持登录状态
- Web自动化测试
- playwright--自动化
- 反爬设置
- 捕获异常
- Playwright上手学习1
- 使用
time.sleep()
会导致超时问题:
建议使用page.wait_for_timeout(毫秒)
替换time module的time.sleep(秒)
,会很好改善超时问题。因为Playwright内部是依赖于异步操作,而当使用time.sleep(秒)
时,无法正确处理这种情况。
- 使用
- Playwright上手学习2
- Playwright上手学习3
- Playwright上手学习4
- 强大易用!新一代爬虫利器 Playwright 的介绍
- Python中web端自动化神器Playwright —— 自动下载 浏览器
- playwright 爬虫使用
- playwright网络爬虫实战案例分享
- Playwright实战案例之爬取js加密数据
- Hello Playwright:(8)等待页面加载
- 爬虫系列(4) playwright使用说明
- 如何用Playwright进行网页抓取?
- 【playwright】使用playwright实现拖动功能(拖动 验证码)
- Playwright - 滚动条操作
- 用playwright实现访问目标网页并截图,提取重要数据处理后一起推送至钉钉
- python发帖技巧_Playwright-python实现多端发帖 一文多发
- Playwright-新一代自动化工具 > 酱紫写爬虫?
- playwright自动化 绕过验证码 cookies登录
- win10系统开发环境快速安装Playwright python的方法
- Playwright: 比 Puppeteer 更好用的浏览器自动化工具
- 爬虫框架Playwright在Java环境下的开发实践
- Java中使用playwright
- Playwright-java 自定义目录和浏览器 兼容win7
- 如何使用Playwright对Java API实现自动视觉测试
自动识别验证码
Nodejs Playwright 2Captcha 验证码识别实现自动登录
基于PlayWright的爬虫项目
- crawloop:基于PlayWright实现对js渲染的动态网页进行抓取,包含网页源码、截图、网站入口、网页交互过程等,支持优先级任务调度。
- BdfbSpider:基于playwright-java开发的多线程爬虫框架,属于playwright-java的高阶用法
(1) PlaywrightHelper.java
package com.wanma.framework_noweb.helper; import cn.hutool.core.collection.ListUtil; import cn.hutool.core.io.FileUtil; import cn.hutool.core.util.ArrayUtil; import cn.hutool.core.util.ObjectUtil; import com.microsoft.playwright.*; import com.wanma.apps.constant.PageName; import lombok.extern.slf4j.Slf4j; import java.util.HashMap; import java.util.Map; /** * Playwright 助手类 */ @Slf4j public class PlaywrightHelper { private static Playwright playwright; private static Browser browser; private static BrowserContext context; private static Page page1; private static Page page2; private static Page page3; private static Page page4; private static Page page5; private static Page page6; private static final String[] pageNames = { PageName.page1, PageName.page2, PageName.page3, PageName.page4, PageName.page5, PageName.page6 }; /** * 实例化 Playwright 单例对象 */ public static Playwright getSinglePlaywright() { if (ObjectUtil.isNull(playwright)) { Map<String, String> envMap = new HashMap<>(); // 跳过下载浏览器,因为公司是内网,这个配置很重要 envMap.put("PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD", "1"); // 跳过下载浏览器后配置浏览器位置 // 默认地址为:C:/Users/wenjianbao/AppData/Local/ms-playwright envMap.put("PLAYWRIGHT_BROWSERS_PATH", "c:/java/ms-playwright"); playwright = Playwright.create(new Playwright.CreateOptions().setEnv(envMap)); } return playwright; } /** * 实例化 浏览器模块 单例对象 */ private static Browser _getSingleBrowser() { if (ObjectUtil.isNull(browser)) { browser = getSinglePlaywright().chromium().launch( new BrowserType.LaunchOptions() .setHeadless(false) // 取消无头模式,这样才能看见浏览器操作 // .setChannel("chrome") .setTimeout(60000) // 超时时间:1分钟 .setArgs(ListUtil.of("--start-maximized")) // 窗口最大化 ); } return browser; } /** * 实例化 浏览器窗口 单例对象 */ private static BrowserContext _getSingleContext() { if (ObjectUtil.isNull(context)) { context = _getSingleBrowser().newContext( new Browser.NewContextOptions() .setViewportSize(null) // 配合前面的 窗口最大化,这里必须设置为null ); } return context; } /** * 新建 页面 */ private static Page _newPage() { Page page = _getSingleContext().newPage(); // 超时时间:40秒 page.setDefaultTimeout(40000); page.setDefaultNavigationTimeout(40000); return page; } public static Page getPage(String pageName) { if (!ArrayUtil.contains(pageNames, pageName)) { throw new RuntimeException("参数pageName值错误"); } // 页面2 if (pageName.equals(PageName.page2)) { if (ObjectUtil.isNull(page2) || page2.isClosed()) { page2 = _newPage(); } return page2; } // 页面3 else if (pageName.equals(PageName.page3)) { if (ObjectUtil.isNull(page3) || page3.isClosed()) { page3 = _newPage(); } return page3; } // 页面4 else if (pageName.equals(PageName.page4)) { if (ObjectUtil.isNull(page4) || page4.isClosed()) { page4 = _newPage(); } return page4; } // 页面5 else if (pageName.equals(PageName.page5)) { if (ObjectUtil.isNull(page5) || page5.isClosed()) { page5 = _newPage(); } return page5; } // 页面6 else if (pageName.equals(PageName.page6)) { if (ObjectUtil.isNull(page6) || page6.isClosed()) { page6 = _newPage(); } return page6; } // 页面1 else { if (ObjectUtil.isNull(page1) || page1.isClosed()) { page1 = _newPage(); } return page1; } } /** * 加载 页面URL */ public static void navigate(String pageName, String url) { try { getPage(pageName).bringToFront(); getPage(pageName).navigate(url); } catch (Exception e) { System.out.println("【加载页面超时】" + FileUtil.getLineSeparator() + url); e.printStackTrace(); } } /** * 返回元素的outerHTML值 */ public static String getOuterHTML(ElementHandle element) { if (ObjectUtil.isNull(element)) { return ""; } try { Object html = element.evaluate("node => node.outerHTML"); return StringHelper.toString(html); } catch (Exception e) { System.out.println("【获取outerHTML出错】"); e.printStackTrace(); } return ""; } }
(2) UrlHelper.java
package com.wanma.framework_noweb.helper; import cn.hutool.core.net.url.UrlBuilder; import cn.hutool.core.util.StrUtil; /** * Url助手类 */ public class UrlHelper { /** * 获取参数值 */ public static String getParam(String url, String name) { if (StrUtil.isEmpty(url) || StrUtil.isEmpty(name)) { return ""; } return StringHelper.toString(UrlBuilder.of(url).getQuery().get(name)); } /** * 格式化URL */ public static String formatUrl(String url, String siteUrl) { if (StrUtil.isEmpty(url)) { return ""; } if (!StrUtil.contains(url, "http")) { url = siteUrl + url; } return addLanguage(url); } /** * 添加 语言参数 */ public static String addLanguage(String url) { if (!StrUtil.contains(url, "?")) { return url + "?language=en_US"; } return url + "&language=en_US"; } }