Skip to content

Commit 897e2a9

Browse files
committed
Iterate
1 parent dc0502e commit 897e2a9

File tree

4 files changed

+38
-26
lines changed

4 files changed

+38
-26
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM apify/actor-node-puppeteer-chrome:latest
1+
FROM apify/actor-node-playwright:latest
22
COPY package*.json ./
33
COPY yarn.lock ./
44
RUN yarn install

index.js

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import express from "express";
22
import { Actor } from "apify";
3-
import { PuppeteerCrawler, RequestList, sleep } from "crawlee";
3+
import { PlaywrightCrawler, RequestList, sleep } from "crawlee";
44

55
const app = express();
66
const port = 3000;
@@ -18,32 +18,25 @@ const initializeCrawler = async () => {
1818
});
1919
const requestQueue = await Actor.openRequestQueue();
2020

21-
const crawler = new PuppeteerCrawler({
21+
const crawler = new PlaywrightCrawler({
2222
requestList,
2323
requestQueue,
24-
useSessionPool: false,
24+
useSessionPool: true,
2525
persistCookiesPerSession: false,
26-
headless: true,
26+
headless: false,
2727
keepAlive: true,
2828
minConcurrency: 5,
29-
maxConcurrency: 15,
30-
launchContext: {
31-
launchOptions: {
32-
defaultViewport: {
33-
width: 1512,
34-
height: 982,
35-
},
36-
},
37-
userAgent:
38-
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
39-
},
29+
maxConcurrency: 30,
4030
requestHandler: async ({ request, page }) => {
41-
await Promise.race([
42-
page.waitForNetworkIdle({ idleTime: 500, concurrency: 3 }),
43-
new Promise((_, reject) =>
44-
setTimeout(() => reject(new Error("Timeout")), 10000),
45-
),
46-
]);
31+
await page.route('**/*', (route) => {
32+
if (route.request().resourceType() === 'image') {
33+
route.abort();
34+
} else {
35+
route.continue();
36+
}
37+
});
38+
39+
//sleep(60000);
4740

4841
await page.evaluate(() => {
4942
return window.scrollBy(0, window.innerHeight);
@@ -60,7 +53,7 @@ const initializeCrawler = async () => {
6053
console.log(`Title: ${await page.title()}`);
6154
console.log(`Content: ${content}`);
6255
contentMap.set(request.uniqueKey, content); // Store content with uniqueKey
63-
await requestQueue.markRequestHandled(request);
56+
//await requestQueue.markRequestHandled(request);
6457
},
6558
});
6659

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,6 @@
1111
"apify": "^3.2.0",
1212
"crawlee": "^3.10.1",
1313
"express": "^4.19.2",
14-
"puppeteer": "^22.9.0"
14+
"playwright": "^1.44.1",
1515
}
1616
}

yarn.lock

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@
119119
tiny-typed-emitter "^2.1.0"
120120
tslib "^2.4.0"
121121

122-
"@crawlee/[email protected]":
122+
"@crawlee/[email protected]", "@crawlee/browser@^3.10.1":
123123
version "3.10.1"
124124
resolved "https://registry.yarnpkg.com/@crawlee/browser/-/browser-3.10.1.tgz#a7d5416a95b2accfbd751bbcbc8ff4808a09f901"
125125
integrity sha512-FgQbWQcIe787w8HhKYGjf4j3E64OmOJYkoKJa/KdWfSJ03PozAc3Bu8Kw1dSO8J0OOK8JpoBO5SnqazXcbjuzg==
@@ -252,7 +252,7 @@
252252
proper-lockfile "^4.1.2"
253253
tslib "^2.4.0"
254254

255-
"@crawlee/[email protected]":
255+
"@crawlee/[email protected]", "@crawlee/playwright@^3.10.1":
256256
version "3.10.1"
257257
resolved "https://registry.yarnpkg.com/@crawlee/playwright/-/playwright-3.10.1.tgz#874f8d375023a06ab5361ee457ac3415a7f143c8"
258258
integrity sha512-NjYlqSVJO31zwoKrjIgce23BYl1rs9+nLPAM9Ppf5fFtEgMPONRCrHSg8WzICXrqT3yX0vU2lxKLTF2qwBdsGQ==
@@ -1402,6 +1402,11 @@ fs-extra@^11.0.0, fs-extra@^11.2.0:
14021402
jsonfile "^6.0.1"
14031403
universalify "^2.0.0"
14041404

1405+
1406+
version "2.3.2"
1407+
resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.3.2.tgz#8a526f78b8fdf4623b709e0b975c52c24c02fd1a"
1408+
integrity sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==
1409+
14051410
function-bind@^1.1.2:
14061411
version "1.1.2"
14071412
resolved "https://registry.yarnpkg.com/function-bind/-/function-bind-1.1.2.tgz#2c02d864d97f3ea6c8830c464cbd11ab6eab7a1c"
@@ -2277,6 +2282,20 @@ pkg-dir@^4.2.0:
22772282
dependencies:
22782283
find-up "^4.0.0"
22792284

2285+
2286+
version "1.44.1"
2287+
resolved "https://registry.yarnpkg.com/playwright-core/-/playwright-core-1.44.1.tgz#53ec975503b763af6fc1a7aa995f34bc09ff447c"
2288+
integrity sha512-wh0JWtYTrhv1+OSsLPgFzGzt67Y7BE/ZS3jEqgGBlp2ppp1ZDj8c+9IARNW4dwf1poq5MgHreEM2KV/GuR4cFA==
2289+
2290+
playwright@^1.44.1:
2291+
version "1.44.1"
2292+
resolved "https://registry.yarnpkg.com/playwright/-/playwright-1.44.1.tgz#5634369d777111c1eea9180430b7a184028e7892"
2293+
integrity sha512-qr/0UJ5CFAtloI3avF95Y0L1xQo6r3LQArLIg/z/PoGJ6xa+EwzrwO5lpNr/09STxdHuUoP2mvuELJS+hLdtgg==
2294+
dependencies:
2295+
playwright-core "1.44.1"
2296+
optionalDependencies:
2297+
fsevents "2.3.2"
2298+
22802299
22812300
version "2.0.3"
22822301
resolved "https://registry.yarnpkg.com/progress/-/progress-2.0.3.tgz#7e8cf8d8f5b8f239c1bc68beb4eb78567d572ef8"

0 commit comments

Comments
 (0)