From 47c670f7e556e7c00f5899621f3a713b820c0a8e Mon Sep 17 00:00:00 2001 From: James Hewitt Date: Wed, 24 Apr 2024 19:18:23 +0100 Subject: [PATCH] Add a wait_for option to browser jobs (#810) This waits for the default timeout of 30 seconds for the selector to be satisfied. The selector is based on https://playwright.dev/python/docs/locators#locate-by-css-or-xpath. Signed-off-by: James Hewitt --- CHANGELOG.md | 1 + docs/source/jobs.rst | 3 +++ lib/urlwatch/jobs.py | 7 ++++++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b119a74..6a099511 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ The format mostly follows [Keep a Changelog](http://keepachangelog.com/en/1.0.0/ - New `enabled` option for all jobs. Set to false to disable a job without needing to remove it or comment it out (Requested in #625 by snowman, contributed in #785 by jamstah) - New option `ignore_incomplete_reads` (Requested in #725 by wschoot, contributed in #787 by wfrisch) +- New option `wait_for` in browser jobs (Requested in #763 by yuis-ice, contributed in #810 by jamstah) ### Changed diff --git a/docs/source/jobs.rst b/docs/source/jobs.rst index a97d6ac9..5b091416 100644 --- a/docs/source/jobs.rst +++ b/docs/source/jobs.rst @@ -90,6 +90,9 @@ Job-specific optional keys: - ``wait_until``: Either ``load``, ``domcontentloaded``, ``networkidle``, or ``commit`` (see :ref:`advanced_topics`) +- ``wait_for``: A CSS or XPath selector based on the + _`Playwright Locator`: https://playwright.dev/python/docs/locators#locate-by-css-or-xpath + spec. The job will wait for the default timeout of 30 seconds. - ``useragent``: ``User-Agent`` header used for requests (otherwise browser default is used) - ``browser``: Either ``chromium``, ``chrome``, ``chrome-beta``, ``msedge``, ``msedge-beta``, ``msedge-dev``, ``firefox``, ``webkit`` (must be installed with ``playwright install``) diff --git a/lib/urlwatch/jobs.py b/lib/urlwatch/jobs.py index 003c8b82..966aab85 100644 --- a/lib/urlwatch/jobs.py +++ b/lib/urlwatch/jobs.py @@ -413,7 +413,7 @@ class BrowserJob(Job): __required__ = ('navigate',) - __optional__ = ('wait_until', 'useragent', 'browser') + __optional__ = ('wait_until', 'wait_for', 'useragent', 'browser') def get_location(self): return self.user_visible_url or self.navigate @@ -433,4 +433,9 @@ def retrieve(self, job_state): self.wait_until = 'networkidle' page.goto(self.navigate, wait_until=self.wait_until) + + if self.wait_for: + locator = page.locator(self.wait_for) + locator.wait_for() + return page.content()