Skip to content

Commit

Permalink
chg: merge generic captcha sitekey into one loop
Browse files Browse the repository at this point in the history
  • Loading branch information
Rafiot committed Mar 14, 2024
1 parent 7f6b135 commit 3df4ef4
Showing 1 changed file with 12 additions and 19 deletions.
31 changes: 12 additions & 19 deletions har2tree/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,25 +265,18 @@ def find_identifiers(html_doc: bytes) -> dict[str, list[str]] | None:
return None
to_return: dict[str, list[str]] = defaultdict(list)

if recaptchas := soup.select(".g-recaptcha"):
# We should have only one recaptcha per page, but have you seen the web?
for recaptcha in recaptchas:
if sitekey := recaptcha.get('data-sitekey'):
if isinstance(sitekey, list):
# Should not happen, but once again...
to_return['recaptcha'] += sitekey
else:
to_return['recaptcha'].append(sitekey)

if recaptchas := soup.select(".h-captcha"):
# We should have only one recaptcha per page, but have you seen the web?
for recaptcha in recaptchas:
if sitekey := recaptcha.get('data-sitekey'):
if isinstance(sitekey, list):
# Should not happen, but once again...
to_return['hcaptcha'] += sitekey
else:
to_return['hcaptcha'].append(sitekey)
default_captchas = ['g-recaptcha', 'h-captcha', 'cf-turnstile']
for captcha in default_captchas:
if captchas := soup.select(f".{captcha}"):
# We should have only one captcha per page, but have you seen the web?
for c in captchas:
if sitekey := c.get('data-sitekey'):
if isinstance(sitekey, list):
# Should not happen, but once again...
to_return[captcha] += sitekey
else:
to_return[captcha].append(sitekey)

return to_return


Expand Down

0 comments on commit 3df4ef4

Please sign in to comment.