browser - playwright的基本用法 - headless browser 无头浏览器
访问量: 825
参考:
https://playwright.dev/docs/intro#manually
起因
我想要nftgo.io的数据
发现这是一帮杭州人做的。。。
然后找到了这个: checklyhq , 用python脚本抓包的
https://www.checklyhq.com/learn/headless/request-interception/
其实这个正是我想要的
然后发现了 playwright
安装
(注意先进入到工作目录)
(可以先设置代理)npm config set https-proxy http://127.0.0.1:8075
npm i -D @playwright/test
npx playwright install
创建新的测试
注意:
1。 文件要以 .spec.js 结尾
2. 文件要放在 tests 目录下
sg552@SKYUSER-DSBR6H5:/workspace/playwright$ cat tests/example.spec.js const { test, expect } = require('@playwright/test'); test('basic test', async ({ page }) => { await page.goto('https://playwright.dev/'); const title = page.locator('.navbar__inner .navbar__title'); await expect(title).toHaveText('Playwright'); });
运行
npx playwright test --headedRunning 1 test using 1 worker ok tests\example.spec.js:3:1 › basic test (15s) 1 passed (16s)
会看到一个浏览器窗口弹出来,太NB了。
如何打开页面,等待页面加载?
如何保持页面的登录状态?
如何只运行某个test 文件?
refer to: https://playwright.dev/docs/running-tests#run-specific-tests
npx playwright some_test.js
如何只运行某个文件夹?
npx playwright folder1 folder2
如何获得某个child 元素?
refer to: https://playwright.dev/docs/locators#filter-by-childdescendant
或者: page.$$
refer to: https://stackoverflow.com/questions/66702513/playwright-find-multiple-elements-or-class-names
如何根据文字获得element?
refer to: https://playwright.dev/docs/api/class-page#page-get-by-text
根据css selector 获得元素?
见下面代码
如何让页面等待?
page.waitForTimeout(3000)
得,好多内容都没了。。。
直接来个代码例子吧:
const { test, expect } = require('@playwright/test'); test('basic test', async ({ page }) => { // 120s 超时时间 test.setTimeout(120000) // 1. 登录 let url = 'https://passport.damai.cn/login?ru=https%3A%2F%2Fwww.damai.cn%2F' await page.goto(url); console.info("== 为了演示,该页面停留 1s") await page.waitForTimeout(1000) await page.frameLocator('#alibaba-login-box').getByText("密码登录").click() await page.frameLocator('#alibaba-login-box').getByLabel('请输入手机号或邮箱').fill("13.....") await page.frameLocator('#alibaba-login-box').getByLabel('请输入登录密码').fill(".....") await page.frameLocator('#alibaba-login-box').getByRole('button', {name: '登录'}).click() await expect(page.getByText("首页")).toBeVisible() // 2. 不断地刷新页面 let url2 = 'https://detail.damai.cn/item.htm?spm=a2oeg.search_category.0.0.1f6db885HF26ws&id=747858504463&clicktitle=%E2%80%9C%E7%BE%8E%E5%A5%BD%E7%9A%84%E6%97%B6%E5%85%89%E2%80%9D%E5%9B%BD%E5%AE%B6%E5%A4%A7%E5%89%A7%E9%99%A2%E5%90%88%E5%94%B1%E5%9B%A214%E5%91%A8%E5%B9%B4%E9%9F%B3%E4%B9%90%E4%BC%9A' await page.goto(url2) //page.waitForTimeout(10000) //await expect(page.locator('.select_right_list_item')).toBeVisible() //let target1 = page.$$('.select_right_list_item') console.info("== 为了演示,该页面停留 1s") await page.waitForTimeout(1000) let target1 = page.getByText('120.00元', { exact: true }) console.info("=== target1: ", target1) await target1.click() //page.waitForTimeout(10000) // 3. 如果可以购买了,直接点击购买 let start_to_buy = page.locator('.buy-link') await start_to_buy.click() console.info("== start_to_buy: ", start_to_buy) console.info("== 为了演示,该页面停留 5s") await page.waitForTimeout(5000) // 4. 购买之后,会自动跳转到这个路径: // https://m.damai.cn/app/dmfe/select-seat-biz/kylin.html?itemId=747858504463&userPromotion=true&toDxOrder=true&quickBuy=0&privilegeActId=&channel=damai_app&performId=212348031&skuId=5330323643730&projectId=218328001&rtc=0 await page.getByText('120.00元').click() console.info("== 为了演示,该页面停留 1s, 准备点击票价了") // 下面是针对 需要选座的演唱会的 操作过程: 点击票价,点击座位,购买即可 // 建议人工选座 await page.waitForTimeout(1000) await page.locator('.select-seat__container').click({position: {x: 10, y:10}}) console.info("== 为了演示,该页面停留 1s, 准备点击票价了") page.evaluate( () => { alert("请手动选择座位") }) await page.waitForTimeout(100000) });
也可以不使用test, 直接运行脚本:
下面的脚本,会针对某个网站抓取打开该网站所访问的所有URL:
/** * 用来打印: 完全目标网站后所需要访问的所有URL * 安装: * 1. nodejs (20.16.0) * 2. npm install playwright ( 我的是最新版本 1.45.3 ) * 3. npx playwright install ( 安装playwright 所用到的浏览器) * 4. (nodejs会自动提示,我的是ubuntu 22, 所以命令为:) sudo apt-get install libatk1.0-0 libatk-bridge2.0-0 libcups2 libatspi2.0-0 libxcomposite1 libxdamage1 libxfixes3 libxrandr2 libgbm1 libxkbcommon0 libpango-1.0-0 libcairo2 libasound2 * 5. node ~/get_request_xxx.js 就可以看到结果了。 * 目前 CSS: 19, ... */ const { chromium } = require('playwright'); (async () => { const browser = await chromium.launch({ headless: true}); const page = await browser.newPage(); let cssCount = 0; let jsCount = 0; let xhrCount = 0; let imageCount = 0 ; const requestURLs = []; // 用于存储所有请求的 URL page.on('request', (request) => { const url = request.url(); requestURLs.push(url); // 将 URL 添加到数组中 if (url.endsWith('.css')) { cssCount++; } else if (url.match(/jpg|gif|jpeg|png/i)) { imageCount++; } else if (url.endsWith('.js')) { jsCount++; } else if (request.resourceType() === 'xhr') { xhrCount++; } }); await page.goto('https://xxx.com'); await page.waitForLoadState('networkidle'); console.log(`CSS: ${cssCount}, JS: ${jsCount}, XHR: ${xhrCount}, image: ${imageCount}, Total: ${cssCount + jsCount + xhrCount + imageCount}`); // 遍历并打印所有请求的 URL for (const url of requestURLs) { console.log(`url: ${url}`); } // 等待 10 秒后再关闭浏览器 setTimeout(async () => { await browser.close(); }, 10000); })();