Skip to content

Commit 2150548

Browse files
authored
Merge pull request #407 from github/search-refactor
refactor: Move search functions and tests to their own files
2 parents c6f90db + 0e3c3f3 commit 2150548

File tree

4 files changed

+293
-273
lines changed

4 files changed

+293
-273
lines changed

issue_metrics.py

+1-131
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,6 @@
88
Functions:
99
get_env_vars() -> EnvVars: Get the environment variables for use
1010
in the script.
11-
search_issues(search_query: str, github_connection: github3.GitHub, owners_and_repositories: List[dict])
12-
-> github3.structs.SearchIterator:
13-
Searches for issues in a GitHub repository that match the given search query.
1411
get_per_issue_metrics(issues: Union[List[dict], List[github3.issues.Issue]],
1512
discussions: bool = False), labels: Union[List[str], None] = None,
1613
ignore_users: List[str] = [] -> tuple[List, int, int]:
@@ -21,8 +18,6 @@
2118
"""
2219

2320
import shutil
24-
import sys
25-
from time import sleep
2621
from typing import List, Union
2722

2823
import github3
@@ -36,6 +31,7 @@
3631
from markdown_helpers import markdown_too_large_for_issue_body, split_markdown_file
3732
from markdown_writer import write_to_markdown
3833
from most_active_mentors import count_comments_per_user, get_mentor_count
34+
from search import get_owners_and_repositories, search_issues
3935
from time_to_answer import get_stats_time_to_answer, measure_time_to_answer
4036
from time_to_close import get_stats_time_to_close, measure_time_to_close
4137
from time_to_first_response import (
@@ -46,101 +42,6 @@
4642
from time_to_ready_for_review import get_time_to_ready_for_review
4743

4844

49-
def search_issues(
50-
search_query: str,
51-
github_connection: github3.GitHub,
52-
owners_and_repositories: List[dict],
53-
rate_limit_bypass: bool = False,
54-
) -> List[github3.search.IssueSearchResult]: # type: ignore
55-
"""
56-
Searches for issues/prs/discussions in a GitHub repository that match
57-
the given search query and handles errors related to GitHub API responses.
58-
59-
Args:
60-
search_query (str): The search query to use for finding issues/prs/discussions.
61-
github_connection (github3.GitHub): A connection to the GitHub API.
62-
owners_and_repositories (List[dict]): A list of dictionaries containing
63-
the owner and repository names.
64-
65-
Returns:
66-
List[github3.search.IssueSearchResult]: A list of issues that match the search query.
67-
"""
68-
69-
# Rate Limit Handling: API only allows 30 requests per minute
70-
def wait_for_api_refresh(
71-
iterator: github3.structs.SearchIterator, rate_limit_bypass: bool = False
72-
):
73-
# If the rate limit bypass is enabled, don't wait for the API to refresh
74-
if rate_limit_bypass:
75-
return
76-
77-
max_retries = 5
78-
retry_count = 0
79-
sleep_time = 70
80-
81-
while iterator.ratelimit_remaining < 5:
82-
if retry_count >= max_retries:
83-
raise RuntimeError("Exceeded maximum retries for API rate limit")
84-
85-
print(
86-
f"GitHub API Rate Limit Low, waiting {sleep_time} seconds to refresh."
87-
)
88-
sleep(sleep_time)
89-
90-
# Exponentially increase the sleep time for the next retry
91-
sleep_time *= 2
92-
retry_count += 1
93-
94-
issues_per_page = 100
95-
96-
print("Searching for issues...")
97-
issues_iterator = github_connection.search_issues(
98-
search_query, per_page=issues_per_page
99-
)
100-
wait_for_api_refresh(issues_iterator, rate_limit_bypass)
101-
102-
issues = []
103-
repos_and_owners_string = ""
104-
for item in owners_and_repositories:
105-
repos_and_owners_string += (
106-
f"{item.get('owner', '')}/{item.get('repository', '')} "
107-
)
108-
109-
# Print the issue titles
110-
try:
111-
for idx, issue in enumerate(issues_iterator, 1):
112-
print(issue.title) # type: ignore
113-
issues.append(issue)
114-
115-
# requests are sent once per page of issues
116-
if idx % issues_per_page == 0:
117-
wait_for_api_refresh(issues_iterator, rate_limit_bypass)
118-
119-
except github3.exceptions.ForbiddenError:
120-
print(
121-
f"You do not have permission to view a repository from: '{repos_and_owners_string}'; Check your API Token."
122-
)
123-
sys.exit(1)
124-
except github3.exceptions.NotFoundError:
125-
print(
126-
f"The repository could not be found; Check the repository owner and names: '{repos_and_owners_string}"
127-
)
128-
sys.exit(1)
129-
except github3.exceptions.ConnectionError:
130-
print(
131-
"There was a connection error; Check your internet connection or API Token."
132-
)
133-
sys.exit(1)
134-
except github3.exceptions.AuthenticationFailed:
135-
print("Authentication failed; Check your API Token.")
136-
sys.exit(1)
137-
except github3.exceptions.UnprocessableEntity:
138-
print("The search query is invalid; Check the search query.")
139-
sys.exit(1)
140-
141-
return issues
142-
143-
14445
def get_per_issue_metrics(
14546
issues: Union[List[dict], List[github3.search.IssueSearchResult]], # type: ignore
14647
env_vars: EnvVars,
@@ -264,37 +165,6 @@ def get_per_issue_metrics(
264165
return issues_with_metrics, num_issues_open, num_issues_closed
265166

266167

267-
def get_owners_and_repositories(
268-
search_query: str,
269-
) -> List[dict]:
270-
"""Get the owners and repositories from the search query.
271-
272-
Args:
273-
search_query (str): The search query used to search for issues.
274-
275-
Returns:
276-
List[dict]: A list of dictionaries of owners and repositories.
277-
278-
"""
279-
search_query_split = search_query.split(" ")
280-
results_list = []
281-
for item in search_query_split:
282-
result = {}
283-
if "repo:" in item and "/" in item:
284-
result["owner"] = item.split(":")[1].split("/")[0]
285-
result["repository"] = item.split(":")[1].split("/")[1]
286-
if "org:" in item or "owner:" in item or "user:" in item:
287-
result["owner"] = item.split(":")[1]
288-
if "user:" in item:
289-
result["owner"] = item.split(":")[1]
290-
if "owner:" in item:
291-
result["owner"] = item.split(":")[1]
292-
if result:
293-
results_list.append(result)
294-
295-
return results_list
296-
297-
298168
def main(): # pragma: no cover
299169
"""Run the issue-metrics script.
300170

search.py

+138
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
""" A module to search for issues in a GitHub repository."""
2+
3+
import sys
4+
from time import sleep
5+
from typing import List
6+
7+
import github3
8+
import github3.structs
9+
10+
11+
def search_issues(
12+
search_query: str,
13+
github_connection: github3.GitHub,
14+
owners_and_repositories: List[dict],
15+
rate_limit_bypass: bool = False,
16+
) -> List[github3.search.IssueSearchResult]: # type: ignore
17+
"""
18+
Searches for issues/prs/discussions in a GitHub repository that match
19+
the given search query and handles errors related to GitHub API responses.
20+
21+
Args:
22+
search_query (str): The search query to use for finding issues/prs/discussions.
23+
github_connection (github3.GitHub): A connection to the GitHub API.
24+
owners_and_repositories (List[dict]): A list of dictionaries containing
25+
the owner and repository names.
26+
rate_limit_bypass (bool, optional): A flag to bypass the rate limit to be used
27+
when working with GitHub server that has rate limiting turned off. Defaults to False.
28+
29+
Returns:
30+
List[github3.search.IssueSearchResult]: A list of issues that match the search query.
31+
"""
32+
33+
# Rate Limit Handling: API only allows 30 requests per minute
34+
def wait_for_api_refresh(
35+
iterator: github3.structs.SearchIterator, rate_limit_bypass: bool = False
36+
):
37+
# If the rate limit bypass is enabled, don't wait for the API to refresh
38+
if rate_limit_bypass:
39+
return
40+
41+
max_retries = 5
42+
retry_count = 0
43+
sleep_time = 70
44+
45+
while iterator.ratelimit_remaining < 5:
46+
if retry_count >= max_retries:
47+
raise RuntimeError("Exceeded maximum retries for API rate limit")
48+
49+
print(
50+
f"GitHub API Rate Limit Low, waiting {sleep_time} seconds to refresh."
51+
)
52+
sleep(sleep_time)
53+
54+
# Exponentially increase the sleep time for the next retry
55+
sleep_time *= 2
56+
retry_count += 1
57+
58+
issues_per_page = 100
59+
60+
print("Searching for issues...")
61+
issues_iterator = github_connection.search_issues(
62+
search_query, per_page=issues_per_page
63+
)
64+
wait_for_api_refresh(issues_iterator, rate_limit_bypass)
65+
66+
issues = []
67+
repos_and_owners_string = ""
68+
for item in owners_and_repositories:
69+
repos_and_owners_string += (
70+
f"{item.get('owner', '')}/{item.get('repository', '')} "
71+
)
72+
73+
# Print the issue titles and add them to the list of issues
74+
try:
75+
for idx, issue in enumerate(issues_iterator, 1):
76+
print(issue.title) # type: ignore
77+
issues.append(issue)
78+
79+
# requests are sent once per page of issues
80+
if idx % issues_per_page == 0:
81+
wait_for_api_refresh(issues_iterator, rate_limit_bypass)
82+
83+
except github3.exceptions.ForbiddenError:
84+
print(
85+
f"You do not have permission to view a repository \
86+
from: '{repos_and_owners_string}'; Check your API Token."
87+
)
88+
sys.exit(1)
89+
except github3.exceptions.NotFoundError:
90+
print(
91+
f"The repository could not be found; \
92+
Check the repository owner and names: '{repos_and_owners_string}"
93+
)
94+
sys.exit(1)
95+
except github3.exceptions.ConnectionError:
96+
print(
97+
"There was a connection error; Check your internet connection or API Token."
98+
)
99+
sys.exit(1)
100+
except github3.exceptions.AuthenticationFailed:
101+
print("Authentication failed; Check your API Token.")
102+
sys.exit(1)
103+
except github3.exceptions.UnprocessableEntity:
104+
print("The search query is invalid; Check the search query.")
105+
sys.exit(1)
106+
107+
return issues
108+
109+
110+
def get_owners_and_repositories(
111+
search_query: str,
112+
) -> List[dict]:
113+
"""Get the owners and repositories from the search query.
114+
115+
Args:
116+
search_query (str): The search query used to search for issues.
117+
118+
Returns:
119+
List[dict]: A list of dictionaries of owners and repositories.
120+
121+
"""
122+
search_query_split = search_query.split(" ")
123+
results_list = []
124+
for item in search_query_split:
125+
result = {}
126+
if "repo:" in item and "/" in item:
127+
result["owner"] = item.split(":")[1].split("/")[0]
128+
result["repository"] = item.split(":")[1].split("/")[1]
129+
if "org:" in item or "owner:" in item or "user:" in item:
130+
result["owner"] = item.split(":")[1]
131+
if "user:" in item:
132+
result["owner"] = item.split(":")[1]
133+
if "owner:" in item:
134+
result["owner"] = item.split(":")[1]
135+
if result:
136+
results_list.append(result)
137+
138+
return results_list

0 commit comments

Comments
 (0)