Skip to content

Commit

Permalink
Merge branch 'main' into acouch/artillery-csv-load
Browse files Browse the repository at this point in the history
  • Loading branch information
coilysiren authored Nov 7, 2024
2 parents eceda54 + 4b5f330 commit 082ab2b
Show file tree
Hide file tree
Showing 73 changed files with 2,856 additions and 1,317 deletions.
1 change: 1 addition & 0 deletions .github/workflows/cd-analytics-infra.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ jobs:
name: Deploy Infrastructure
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
directory: ["database", "service"]
envs: ${{ github.event_name == 'release' && fromJSON('["prod"]') || fromJSON('["dev", "staging"]') }} # deploy prod on releases, otherwise deploy staging and dev
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/cd-analytics.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ jobs:
uses: ./.github/workflows/deploy.yml
strategy:
max-parallel: 1
fail-fast: false
matrix:
envs: ${{ github.event_name == 'release' && fromJSON('["prod"]') || github.ref_name == 'main' && fromJSON('["dev", "staging"]') || fromJSON('["dev"]') }}
with:
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/cd-api-infra.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ jobs:
name: Deploy Infrastructure
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
directory: ["database", "service"]
envs: ${{ github.event_name == 'release' && fromJSON('["prod"]') || fromJSON('["dev", "staging"]') }} # deploy prod on releases, otherwise deploy staging and dev
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/cd-api.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ jobs:
uses: ./.github/workflows/deploy.yml
strategy:
max-parallel: 1
fail-fast: false
matrix:
envs: ${{ github.event_name == 'release' && fromJSON('["prod"]') || github.ref_name == 'main' && fromJSON('["dev", "staging"]') || fromJSON('["dev"]') }}
with:
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/cd-frontend-infra.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ jobs:
name: Deploy Infrastructure
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
directory: ["service"]
envs: ${{ github.event_name == 'release' && fromJSON('["prod"]') || fromJSON('["dev", "staging"]') }} # deploy prod on releases, otherwise deploy staging and dev
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/cd-frontend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ jobs:
uses: ./.github/workflows/deploy.yml
strategy:
max-parallel: 1
fail-fast: false
matrix:
envs: ${{ github.event_name == 'release' && fromJSON('["prod"]') || github.ref_name == 'main' && fromJSON('["dev", "staging"]') || fromJSON('["dev"]') }}
with:
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ jobs:
pull-requests: write
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4 # Uploads repository content to the runner
with:
sparse-checkout: |
.github
- uses: actions/labeler@v4
- uses: actions/checkout@v4 # Uploads repository content to the runner
with:
sparse-checkout: |
.github
- uses: actions/labeler@v5
56 changes: 20 additions & 36 deletions analytics/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,16 @@ ROADMAP_PROJECT ?= 12
OUTPUT_DIR ?= data
CONFIG_DIR ?= config
PROJECT_CONFIG_FILE ?= $(CONFIG_DIR)/github-projects.json
SPRINT_FILE ?= $(OUTPUT_DIR)/sprint-data.json
ROADMAP_FILE ?= $(OUTPUT_DIR)/roadmap-data.json
ISSUE_FILE ?= $(OUTPUT_DIR)/issue-data.json
DELIVERY_FILE ?= $(OUTPUT_DIR)/delivery-data.json
ISSUE_FILE ?= $(OUTPUT_DIR)/delivery-data.json
SPRINT ?= @current
# Names of the points and sprint fields in the GitHub project
POINTS_FIELD ?= Points
POINTS_FIELD ?= Story Points
SPRINT_FIELD ?= Sprint
UNIT ?= points
ACTION ?= show-results
MIN_TEST_COVERAGE ?= 80
APP_NAME ?= grants-analytics
EFFECTIVE_DATE ?= $(shell date +"%Y-%m-%d")

# Required for CI to work properly
SHELL = /bin/bash -o pipefail
Expand Down Expand Up @@ -146,50 +144,38 @@ lint: ## runs code quality checks
# Data Commands #
#################

sprint-data-export:
@echo "=> Exporting project data from the sprint board"
init-db:
@echo "=> Initializing the database schema"
@echo "====================================================="
$(POETRY) analytics etl initialize_database
@echo "====================================================="
$(POETRY) analytics export gh_project_data \
--owner $(ORG) \
--project $(SPRINT_PROJECT) \
--output-file $(SPRINT_FILE)

gh-db-data-import:
@echo "=> Importing sprint data to the database"
gh-transform-and-load:
@echo "=> Transforming and loading GitHub data into the database"
@echo "====================================================="
$(POETRY) analytics etl transform_and_load \
--issue-file $(ISSUE_FILE) \
--effective-date $(EFFECTIVE_DATE)
@echo "====================================================="
$(POETRY) analytics import db_import --delivery-file $(DELIVERY_FILE)

roadmap-data-export:
@echo "=> Exporting project data from the product roadmap"
gh-db-data-import:
@echo "=> Importing sprint data to the database"
@echo "====================================================="
$(POETRY) analytics export gh_project_data \
--owner $(ORG) \
--project $(ROADMAP_PROJECT) \
--output-file $(ROADMAP_FILE)
$(POETRY) analytics import db_import --delivery-file $(ISSUE_FILE)

delivery-data-export:
gh-data-export:
@echo "=> Exporting GitHub issue and sprint data for delivery metrics"
@echo "====================================================="
$(POETRY) analytics export gh_delivery_data \
--config-file $(PROJECT_CONFIG_FILE) \
--output-file $(DELIVERY_FILE) \
--output-file $(ISSUE_FILE) \
--temp-dir $(OUTPUT_DIR)

issue-data-export:
@echo "=> Exporting issue data from the repository"
@echo "====================================================="
$(POETRY) analytics export gh_issue_data \
--owner $(ORG) \
--repo $(REPO) \
--output-file $(ISSUE_FILE)

gh-data-export: sprint-data-export issue-data-export roadmap-data-export delivery-data-export

sprint-burndown:
@echo "=> Running sprint burndown report for HHS/13"
@echo "====================================================="
$(POETRY) analytics calculate sprint_burndown \
--issue-file $(DELIVERY_FILE) \
--issue-file $(ISSUE_FILE) \
--output-dir $(OUTPUT_DIR) \
--sprint "$(SPRINT)" \
--project 13 \
Expand All @@ -199,7 +185,7 @@ sprint-burndown:
@echo "=> Running sprint burndown report for HHS/17"
@echo "====================================================="
$(POETRY) analytics calculate sprint_burndown \
--issue-file $(DELIVERY_FILE) \
--issue-file $(ISSUE_FILE) \
--output-dir $(OUTPUT_DIR) \
--sprint "$(SPRINT)" \
--project 17 \
Expand All @@ -210,8 +196,6 @@ percent-complete:
@echo "=> Running percent complete deliverable"
@echo "====================================================="
$(POETRY) analytics calculate deliverable_percent_complete \
--sprint-file $(SPRINT_FILE) \
--roadmap-file $(ROADMAP_FILE) \
--issue-file $(ISSUE_FILE) \
--output-dir $(OUTPUT_DIR) \
--include-status "In Progress" \
Expand Down
95 changes: 52 additions & 43 deletions analytics/src/analytics/cli.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
# pylint: disable=C0415
"""Expose a series of CLI entrypoints for the analytics package."""

import logging
import logging.config
from datetime import datetime
from pathlib import Path
from typing import Annotated, Optional

import typer
from slack_sdk import WebClient
from sqlalchemy import text

from analytics.datasets.deliverable_tasks import DeliverableTasks
from analytics.datasets.etl_dataset import EtlDataset
from analytics.datasets.issues import GitHubIssues
from analytics.etl.github import GitHubProjectConfig, GitHubProjectETL
from analytics.etl.utils import load_config
from analytics.integrations import db, github, slack
from analytics.integrations import db, etldb, slack
from analytics.metrics.base import BaseMetric, Unit
from analytics.metrics.burndown import SprintBurndown
from analytics.metrics.burnup import SprintBurnup
Expand All @@ -25,23 +27,20 @@
# fmt: off
# Instantiate typer options with help text for the commands below
CONFIG_FILE_ARG = typer.Option(help="Path to JSON file with configurations for this entrypoint")
SPRINT_FILE_ARG = typer.Option(help="Path to file with exported sprint data")
ISSUE_FILE_ARG = typer.Option(help="Path to file with exported issue data")
ROADMAP_FILE_ARG = typer.Option(help="Path to file with exported roadmap data")
OUTPUT_FILE_ARG = typer.Option(help="Path to file where exported data will be saved")
OUTPUT_DIR_ARG = typer.Option(help="Path to directory where output files will be saved")
TMP_DIR_ARG = typer.Option(help="Path to directory where intermediate files will be saved")
OWNER_ARG = typer.Option(help="GitHub handle of the repo or project owner")
REPO_ARG = typer.Option(help="Name of the GitHub repo")
PROJECT_ARG = typer.Option(help="Number of the GitHub project")
FIELD_ARG = typer.Option(help="Name of the GitHub project field")
SPRINT_ARG = typer.Option(help="Name of the sprint for which we're calculating burndown")
UNIT_ARG = typer.Option(help="Whether to calculate completion by 'points' or 'tickets'")
OWNER_ARG = typer.Option(help="Name of the GitHub project owner, e.g. HHS")
PROJECT_ARG = typer.Option(help="Number of the GitHub project, e.g. 13")
SHOW_RESULTS_ARG = typer.Option(help="Display a chart of the results in a browser")
POST_RESULTS_ARG = typer.Option(help="Post the results to slack")
STATUS_ARG = typer.Option(
help="Deliverable status to include in report, can be passed multiple times",
)
EFFECTIVE_DATE_ARG = typer.Option(help="YYYY-MM-DD effective date to apply to each imported row")
# fmt: on

# instantiate the main CLI entrypoint
Expand All @@ -50,10 +49,12 @@
export_app = typer.Typer()
metrics_app = typer.Typer()
import_app = typer.Typer()
etl_app = typer.Typer()
# add sub-commands to main entrypoint
app.add_typer(export_app, name="export", help="Export data needed to calculate metrics")
app.add_typer(metrics_app, name="calculate", help="Calculate key project metrics")
app.add_typer(import_app, name="import", help="Import data into the database")
app.add_typer(etl_app, name="etl", help="Transform and load local file")


@app.callback()
Expand All @@ -66,26 +67,6 @@ def callback() -> None:
# ===========================================================


@export_app.command(name="gh_project_data")
def export_github_project_data(
owner: Annotated[str, OWNER_ARG],
project: Annotated[int, PROJECT_ARG],
output_file: Annotated[str, OUTPUT_FILE_ARG],
) -> None:
"""Export data about items in a GitHub project and write it to an output file."""
github.export_project_data(owner, project, output_file)


@export_app.command(name="gh_issue_data")
def export_github_issue_data(
owner: Annotated[str, OWNER_ARG],
repo: Annotated[str, REPO_ARG],
output_file: Annotated[str, OUTPUT_FILE_ARG],
) -> None:
"""Export data about issues a GitHub repo and write it to an output file."""
github.export_issue_data(owner, repo, output_file)


@export_app.command(name="gh_delivery_data")
def export_github_data(
config_file: Annotated[str, CONFIG_FILE_ARG],
Expand Down Expand Up @@ -165,7 +146,6 @@ def calculate_sprint_burnup(

@metrics_app.command(name="deliverable_percent_complete")
def calculate_deliverable_percent_complete(
sprint_file: Annotated[str, SPRINT_FILE_ARG],
issue_file: Annotated[str, ISSUE_FILE_ARG],
# Typer uses the Unit enum to validate user inputs from the CLI
# but the default arg must be a string or the CLI will throw an error
Expand All @@ -174,23 +154,10 @@ def calculate_deliverable_percent_complete(
show_results: Annotated[bool, SHOW_RESULTS_ARG] = False,
post_results: Annotated[bool, POST_RESULTS_ARG] = False,
output_dir: Annotated[str, OUTPUT_DIR_ARG] = "data",
roadmap_file: Annotated[Optional[str], ROADMAP_FILE_ARG] = None, # noqa: UP007
include_status: Annotated[Optional[list[str]], STATUS_ARG] = None, # noqa: UP007
) -> None:
"""Calculate percentage completion by deliverable."""
if roadmap_file:
# load the input data using the new join path with roadmap data
task_data = DeliverableTasks.load_from_json_files_with_roadmap_data(
sprint_file=sprint_file,
issue_file=issue_file,
roadmap_file=roadmap_file,
)
else:
# load the input data using the original join path without roadmap data
task_data = DeliverableTasks.load_from_json_files(
sprint_file=sprint_file,
issue_file=issue_file,
)
task_data = GitHubIssues.from_json(issue_file)
# calculate percent complete
metric = DeliverablePercentComplete(
dataset=task_data,
Expand Down Expand Up @@ -279,3 +246,45 @@ def export_json_to_database(delivery_file: Annotated[str, ISSUE_FILE_ARG]) -> No
)
rows = len(issues.to_dict())
logger.info("Number of rows in table: %s", rows)


# ===========================================================
# Etl commands
# ===========================================================


@etl_app.command(name="initialize_database")
def initialize_database() -> None:
"""Initialize etl database."""
print("initializing database")
etldb.init_db()
print("done")


@etl_app.command(name="transform_and_load")
def transform_and_load(
issue_file: Annotated[str, ISSUE_FILE_ARG],
effective_date: Annotated[str, EFFECTIVE_DATE_ARG],
) -> None:
"""Transform and load etl data."""
# validate effective date arg
try:
dateformat = "%Y-%m-%d"
datestamp = (
datetime.strptime(effective_date, dateformat)
.astimezone()
.strftime(dateformat)
)
print(f"running transform and load with effective date {datestamp}")
except ValueError:
print("FATAL ERROR: malformed effective date, expected YYYY-MM-DD format")
return

# hydrate a dataset instance from the input data
dataset = EtlDataset.load_from_json_file(file_path=issue_file)

# sync data to db
etldb.sync_db(dataset, datestamp)

# finish
print("transform and load is done")
Loading

0 comments on commit 082ab2b

Please sign in to comment.