Skip to content

being polite to api's #1821

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 47 additions & 4 deletions vulnerabilities/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import logging
import os
import re
import time
import urllib.request
from collections import defaultdict
from functools import total_ordering
Expand Down Expand Up @@ -49,6 +50,48 @@
cwe_regex = r"CWE-\d+"


# Store the last request time for each domain
last_request_times = {}


def polite_request(url, method="GET", headers=None, data=None, delay=1, max_retries=3):
"""
Make an API request while enforcing politeness (delays, retries, logging).

- Enforces a delay between requests to the same API.
- Retries if a request fails due to rate limits (429 Too Many Requests).
- Logs requests for debugging.
"""
global last_request_times

domain = url.split("/")[2]
last_time = last_request_times.get(domain, 0)
elapsed_time = time.time() - last_time

if elapsed_time < delay:
time.sleep(delay - elapsed_time)

for attempt in range(max_retries):
try:
response = requests.request(method, url, headers=headers, data=data)

if response.status_code == 429: # Too Many Requests
retry_after = int(response.headers.get("Retry-After", delay))
logging.warning(f"Rate limited! Retrying after {retry_after} seconds.")
time.sleep(retry_after)
continue # Retry again

# Update last request time for this domain
last_request_times[domain] = time.time()
return response

except requests.exceptions.RequestException as e:
logging.error(f"Request failed: {e}")
time.sleep(2**attempt)

raise Exception(f"Failed to fetch data from {url!r} after {max_retries} retries.")


@dataclasses.dataclass(order=True, frozen=True)
class AffectedPackage:
vulnerable_package: PackageURL
Expand All @@ -71,7 +114,7 @@ def load_toml(path):


def fetch_yaml(url):
response = requests.get(url)
response = polite_request(url)
return saneyaml.load(response.content)


Expand Down Expand Up @@ -266,7 +309,7 @@ def _get_gh_response(gh_token, graphql_query):
"""
endpoint = "https://api.github.com/graphql"
headers = {"Authorization": f"bearer {gh_token}"}
return requests.post(endpoint, headers=headers, json=graphql_query).json()
return polite_request(endpoint, headers=headers, json=graphql_query).json()
Copy link
Contributor

@TG1999 TG1999 Apr 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why will this work as a GET request and not a POST request ?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you @TG1999 for spending your time on my PR and spotting this. You are absolutely right. This should work as a POST request. I should have passed method='POST' .
I'll update _get_gh_response() accordingly to ensure it sends a proper POST request



def dedupe(original: List) -> List:
Expand Down Expand Up @@ -366,7 +409,7 @@ def fetch_response(url):
"""
Fetch and return `response` from the `url`
"""
response = requests.get(url)
response = polite_request(url)
if response.status_code == HTTPStatus.OK:
return response
raise Exception(f"Failed to fetch data from {url!r} with status code: {response.status_code!r}")
Expand All @@ -389,7 +432,7 @@ def plain_purl(purl):


def fetch_and_read_from_csv(url):
response = urllib.request.urlopen(url)
response = polite_request(url)
lines = [l.decode("utf-8") for l in response.readlines()]
return csv.reader(lines)

Expand Down