-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(core): implement pull request collector (#2)
* chore(deps): add poetry configration * feat(environment): add github resolver * style(ruff): updated ignored * feat(output): implement * chore(ci): add makefile and coverage runner * test(output): remove engine tocsv * feat(collector): implemented github * style(ruff): evaluated and adjusted rules * chore(make): add verbose testing * chore(init): add * style(ruff): enforce single quotes * style(ruff): add missing type annotations * feat(main): implement * test(output): cover when timestamp is not set * chore(make): add extra verbose pytest output * chore(make): add all and clean * chore(make): add quiet mode to pytest * chore(workflows): add main python workflow * refactor(token): extract token outside of cli parser
- Loading branch information
Showing
17 changed files
with
3,484 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
all: lint coverage | ||
|
||
test: | ||
python -m pytest -vv | ||
|
||
lint: | ||
python -m ruff check . | ||
|
||
format: | ||
python -m ruff check --fix . | ||
|
||
coverage: | ||
python -m pytest -q --cov=prfiesta --cov-report=term # for local | ||
python -m pytest -q --cov=prfiesta --cov-report=html # for local | ||
python -m pytest -q --cov=prfiesta --cov-report=xml # for sonarqube | ||
|
||
export_requirements: | ||
poetry export --output requirements.txt --format requirements.txt | ||
poetry export --with dev --output requirements-dev.txt --format requirements.txt | ||
|
||
clean: | ||
rm ./coverage.xml | ||
rm -rf ./htmlcov | ||
rm -rf ./.pytest_cache | ||
rm -rf ./.ruff_cache |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
import logging | ||
import os | ||
|
||
from rich.logging import RichHandler | ||
|
||
LOGGING_LEVEL=os.environ.get('LOGGING_LEVEL', logging.INFO) | ||
LOGGING_FORMAT=os.environ.get('LOGGING_FORMAT', '%(message)s') | ||
SPINNER_STYLE=os.environ.get('SPINNER_STYLE', 'blue') | ||
|
||
logging.basicConfig( | ||
level=LOGGING_LEVEL, | ||
format=LOGGING_FORMAT, | ||
handlers=[RichHandler(markup=True, show_path=False, show_time=False, show_level=True)], | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import logging | ||
from datetime import datetime | ||
|
||
import click | ||
from rich.live import Live | ||
from rich.spinner import Spinner | ||
from rich.text import Text | ||
|
||
from prfiesta import SPINNER_STYLE | ||
from prfiesta.collectors.github import GitHubCollector | ||
from prfiesta.environment import GitHubEnvironment | ||
from prfiesta.output import output_frame | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
github_environment = GitHubEnvironment() | ||
|
||
|
||
@click.command() | ||
@click.option('-u', '--users', required=True, multiple=True, help='The GitHub Users to search for. Can be multiple (space delimited)') | ||
@click.option('-t', '--token', help='The Authentication token to use') | ||
@click.option('-x', '--url', help='The URL of the Git provider to use') | ||
@click.option('-o', '--output_type', type=click.Choice(['csv', 'parquet']), default='csv', help='The output format') | ||
@click.option('--after', type=click.DateTime(formats=['%Y-%m-%d']), help='Only search for pull requests after this date e.g 2023-01-01') | ||
@click.option('--before', type=click.DateTime(formats=['%Y-%m-%d']), help='Only search for pull requests before this date e.g 2023-04-30') | ||
def main(**kwargs) -> None: | ||
|
||
users: tuple[str] = kwargs.get('users') | ||
token: str = kwargs.get('token') or github_environment.get_token() | ||
url: str = kwargs.get('url') or github_environment.get_url() | ||
output_type: str = kwargs.get('output_type') | ||
before: datetime = kwargs.get('before') | ||
after: datetime = kwargs.get('after') | ||
|
||
logger.info('[bold green]Pull Request Fiesta 🦜🥳') | ||
|
||
spinner = Spinner('dots', text=Text('Loading', style=SPINNER_STYLE)) | ||
|
||
with Live(spinner, refresh_per_second=20, transient=True): | ||
|
||
collector = GitHubCollector(token=token, url=url, spinner=spinner) | ||
pr_frame = collector.collect(users, after=after, before=before) | ||
|
||
logger.info('Found [bold green]%s[/bold green] pull requests!', pr_frame.shape[0]) | ||
|
||
if not pr_frame.empty: | ||
output_frame(pr_frame, output_type, spinner=spinner) | ||
|
||
if __name__ == '__main__': # pragma: nocover | ||
main() |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
import logging | ||
from datetime import datetime | ||
|
||
import pandas as pd | ||
from github import Github | ||
from rich.spinner import Spinner | ||
|
||
from prfiesta import SPINNER_STYLE | ||
from prfiesta.environment import GitHubEnvironment | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class GitHubCollector: | ||
|
||
def __init__(self, **kwargs) -> None: | ||
|
||
environment = GitHubEnvironment() | ||
token = kwargs.get('token') or environment.get_token() | ||
self._url = kwargs.get('url') or environment.get_url() | ||
|
||
self._github = Github(token, base_url=self._url) | ||
self._spinner: Spinner = kwargs.get('spinner') | ||
|
||
self._sort_column = ['updated_at'] | ||
self._drop_columns = [ | ||
'labels_url', | ||
'comments_url', | ||
'events_url', | ||
'node_id', | ||
'performed_via_github_app', | ||
'active_lock_reason', | ||
] | ||
self._move_to_end_columns = [ | ||
'url', | ||
'repository_url', | ||
'html_url', | ||
'timeline_url', | ||
] | ||
self._datetime_columns = [ | ||
'created_at', | ||
'updated_at', | ||
'closed_at', | ||
'milestone.created_at', | ||
'milestone.updated_at', | ||
'milestone.due_on', | ||
'milestone.closed_at', | ||
] | ||
|
||
|
||
def collect(self, users: list[str], after: datetime | None= None, before: datetime | None = None) -> pd.DataFrame: | ||
|
||
query = self._construct_query(users, after, before) | ||
|
||
self._update_spinner(f'Searching {self._url} with[bold blue] {query}') | ||
pulls = self._github.search_issues(query=query) | ||
|
||
pull_request_data: list[dict] = [] | ||
for pr in pulls: | ||
pull_request_data.append(pr.__dict__['_rawData']) | ||
|
||
if not pull_request_data: | ||
logger.warning('Did not find any results for this search criteria!') | ||
return pd.DataFrame() | ||
|
||
self._update_spinner('Post Processing') | ||
pr_frame = pd.json_normalize(pull_request_data) | ||
|
||
pr_frame = pr_frame.drop(columns=self._drop_columns, errors='ignore') | ||
pr_frame = pr_frame.sort_values(by=self._sort_column, ascending=False) | ||
pr_frame = self._parse_datetime_columns(pr_frame) | ||
pr_frame['repository_name'] = pr_frame['repository_url'].str.extract(r'(.*)\/repos\/(?P<repository_name>(.*))')['repository_name'] | ||
pr_frame = self._move_column_to_end(pr_frame) | ||
|
||
return pr_frame | ||
|
||
|
||
@staticmethod | ||
def _construct_query(users: list[str], after: datetime | None= None, before: datetime | None = None) -> str: | ||
""" | ||
Constructs a GitHub Search Query | ||
that returns pull requests made by the passed users. | ||
Examples | ||
-------- | ||
type:pr author:user1 | ||
type:pr author:user2 updated:<=2021-01-01 | ||
type:pr author:user1 author:user2 updated:2021-01-01..2021-03-01 | ||
All dates are inclusive. | ||
See GitHub Docs for full optons https://docs.github.com/en/search-github/searching-on-github/searching-issues-and-pull-requests | ||
""" | ||
query: list[str] = [] | ||
|
||
query.append('type:pr') | ||
|
||
for u in users: | ||
query.append('author:' + u) | ||
|
||
if before and after: | ||
query.append(f"updated:{after.strftime('%Y-%m-%d')}..{before.strftime('%Y-%m-%d')}") | ||
elif before: | ||
query.append(f"updated:<={before.strftime('%Y-%m-%d')}") | ||
elif after: | ||
query.append(f"updated:>={after.strftime('%Y-%m-%d')}") | ||
|
||
return ' '.join(query) | ||
|
||
def _move_column_to_end(self, df: pd.DataFrame) -> pd.DataFrame: | ||
for col in self._move_to_end_columns: | ||
df.insert(len(df.columns)-1, col, df.pop(col)) | ||
df.drop(columns=col) | ||
|
||
return df | ||
|
||
def _parse_datetime_columns(self, df: pd.DataFrame) -> pd.DataFrame: | ||
for col in self._datetime_columns: | ||
df[col] = pd.to_datetime(df[col], errors='ignore') | ||
return df | ||
|
||
|
||
def _update_spinner(self, message: str, style: str=SPINNER_STYLE) -> None: | ||
if self._spinner: | ||
self._spinner.update(text=message, style=style) | ||
|
||
|
||
|
||
if __name__ == '__main__': # pragma: nocover | ||
g = GitHubCollector() | ||
logger.info(g._construct_query(['kiran94', 'hello'], datetime(2021, 1, 1), datetime(2021, 3, 1))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import os | ||
|
||
from github.Consts import DEFAULT_BASE_URL as GITHUB_DEFAULT_BASE_URL | ||
|
||
|
||
class GitHubEnvironment: | ||
|
||
def get_token(self) -> str: | ||
"""Gets the authentication token for this environment.""" | ||
token = os.environ.get('GITHUB_ENTERPRISE_TOKEN', os.environ.get('GITHUB_TOKEN')) | ||
if not token: | ||
raise ValueError('GITHUB_ENTERPRISE_TOKEN or GITHUB_TOKEN must be set') | ||
|
||
return token | ||
|
||
def get_url(self) -> str: | ||
"""Gets the URL for the git provider.""" | ||
return os.environ.get('GH_HOST', GITHUB_DEFAULT_BASE_URL) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import logging | ||
import os | ||
from datetime import datetime | ||
from typing import Literal | ||
|
||
import pandas as pd | ||
from rich.spinner import Spinner | ||
|
||
from prfiesta import SPINNER_STYLE | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
output_directory = 'output' | ||
OUTPUT_TYPE = Literal['csv', 'parquet'] | ||
|
||
|
||
def output_frame(frame: pd.DataFrame, output_type: OUTPUT_TYPE, spinner: Spinner, output_name: str = 'export', timestamp: datetime = None) -> None: | ||
|
||
if not timestamp: | ||
timestamp = datetime.now() | ||
|
||
os.makedirs(output_directory, exist_ok=True) | ||
|
||
output_name = str(output_name) + '.' + timestamp.strftime('%Y-%m-%d_%H:%M:%S') + '.' + output_type | ||
output_name = os.path.join(output_directory, output_name) | ||
|
||
spinner.update(text=f'Writing export to {output_name}', style=SPINNER_STYLE) | ||
|
||
match output_type: | ||
case 'csv': | ||
frame.to_csv(output_name, index=False) | ||
|
||
case 'parquet': | ||
frame.to_parquet(output_name, index=False) | ||
|
||
case _: | ||
raise ValueError('unknown output_type %s', output_type) | ||
|
||
logger.info('Exported to %s!', output_name) |
Oops, something went wrong.