HHS · widal001 · Oct 29, 2024 · Oct 17, 2024 · Oct 21, 2024 · Oct 21, 2024
@@ -19,6 +19,7 @@ RUN apt-get update \
   libpq-dev \
   postgresql \
   wget \
+  jq \
   # Reduce the image size by clear apt cached lists
   # Complies with https://github.com/codacy/codacy-hadolint/blob/master/codacy-hadolint/docs/description/DL3009.md
   && rm -fr /var/lib/apt/lists/* \

@@ -10,7 +10,11 @@ OUTPUT_DIR ?= data
 SPRINT_FILE ?= $(OUTPUT_DIR)/sprint-data.json
 ROADMAP_FILE ?= $(OUTPUT_DIR)/roadmap-data.json
 ISSUE_FILE ?= $(OUTPUT_DIR)/issue-data.json
+DELIVERY_FILE ?= $(OUTPUT_DIR)/delivery-data.json
 SPRINT ?= @current
+# Names of the points and sprint fields in the GitHub project
+POINTS_FIELD ?= Points
+SPRINT_FIELD ?= Sprint
 UNIT ?= points
 ACTION ?= show-results
 MIN_TEST_COVERAGE ?= 80
@@ -151,9 +155,7 @@ sprint-data-export:
 gh-db-data-import:
 	@echo "=> Importing sprint data to the database"
 	@echo "====================================================="
-	$(POETRY) analytics import db_import \
-	--sprint-file $(SPRINT_FILE) \
-	--issue-file $(ISSUE_FILE)
+	$(POETRY) analytics import db_import --delivery-file $(DELIVERY_FILE)
 
 roadmap-data-export:
 	@echo "=> Exporting project data from the product roadmap"
@@ -163,6 +165,17 @@ roadmap-data-export:
 	--project $(ROADMAP_PROJECT) \
 	--output-file $(ROADMAP_FILE)
 
+delivery-data-export:
+	@echo "=> Exporting GitHub issue and sprint data for delivery metrics"
+	@echo "====================================================="
+	$(POETRY)  analytics export gh_delivery_data \
+	--owner $(ORG) \
+	--sprint-project $(SPRINT_PROJECT) \
+	--roadmap-project $(ROADMAP_PROJECT) \
+	--output-file $(DELIVERY_FILE) \
+	--points-field "$(POINTS_FIELD)" \
+	--sprint-field "$(SPRINT_FIELD)"
+
 issue-data-export:
 	@echo "=> Exporting issue data from the repository"
 	@echo "====================================================="
@@ -171,7 +184,7 @@ issue-data-export:
 	--repo $(REPO) \
 	--output-file $(ISSUE_FILE)
 
-gh-data-export: sprint-data-export issue-data-export roadmap-data-export
+gh-data-export: sprint-data-export issue-data-export roadmap-data-export delivery-data-export
 
 sprint-burndown:
 	@echo "=> Running sprint burndown report"
@@ -200,4 +213,3 @@ percent-complete:
 sprint-reports: sprint-burndown percent-complete
 
 sprint-reports-with-latest-data: gh-data-export sprint-reports
-
@@ -51,6 +51,7 @@ disable = [
   "R0913", # too-many-arguments
   "R0902", # too-many-instance-attributes
   "R0903", # too-few-public-methods
+  "W1514", # unspecified-encoding
 ]
 
 [tool.ruff]

@@ -1,6 +1,7 @@
 # pylint: disable=C0415
 """Expose a series of CLI entrypoints for the analytics package."""
 import logging
+import logging.config
 from pathlib import Path
 from typing import Annotated, Optional
 
@@ -9,6 +10,7 @@
 from sqlalchemy import text
 
 from analytics.datasets.deliverable_tasks import DeliverableTasks
+from analytics.datasets.issues import GitHubIssues
 from analytics.datasets.sprint_board import SprintBoard
 from analytics.integrations import db, github, slack
 from analytics.metrics.base import BaseMetric, Unit
@@ -26,9 +28,11 @@
 ROADMAP_FILE_ARG = typer.Option(help="Path to file with exported roadmap data")
 OUTPUT_FILE_ARG = typer.Option(help="Path to file where exported data will be saved")
 OUTPUT_DIR_ARG = typer.Option(help="Path to directory where output files will be saved")
+TMP_DIR_ARG = typer.Option(help="Path to directory where intermediate files will be saved")
 OWNER_ARG = typer.Option(help="GitHub handle of the repo or project owner")
 REPO_ARG = typer.Option(help="Name of the GitHub repo")
 PROJECT_ARG = typer.Option(help="Number of the GitHub project")
+FIELD_ARG = typer.Option(help="Name of the GitHub project field")
 SPRINT_ARG = typer.Option(help="Name of the sprint for which we're calculating burndown")
 UNIT_ARG = typer.Option(help="Whether to calculate completion by 'points' or 'tickets'")
 SHOW_RESULTS_ARG = typer.Option(help="Display a chart of the results in a browser")
@@ -55,6 +59,11 @@ def callback() -> None:
     """Analyze data about the Simpler.Grants.gov project."""
 
 
+# ===========================================================
+# Export commands
+# ===========================================================
+
+
 @export_app.command(name="gh_project_data")
 def export_github_project_data(
     owner: Annotated[str, OWNER_ARG],
@@ -75,6 +84,53 @@ def export_github_issue_data(
     github.export_issue_data(owner, repo, output_file)
 
 
+@export_app.command(name="gh_delivery_data")
+def export_github_data(
+    owner: Annotated[str, OWNER_ARG],
+    sprint_project: Annotated[int, PROJECT_ARG],
+    roadmap_project: Annotated[int, PROJECT_ARG],
+    output_file: Annotated[str, OUTPUT_FILE_ARG],
+    sprint_field: Annotated[str, FIELD_ARG] = "Sprint",
+    points_field: Annotated[str, FIELD_ARG] = "Points",
+    tmp_dir: Annotated[str, TMP_DIR_ARG] = "data",
+) -> None:
+    """Export and flatten metadata about GitHub issues used for delivery metrics."""
+    # Specify path to intermediate files
+    sprint_file = Path(tmp_dir) / "sprint-data.json"
+    roadmap_file = Path(tmp_dir) / "roadmap-data.json"
+
+    # # Export sprint and roadmap data
+    logger.info("Exporting roadmap data")
+    github.export_roadmap_data(
+        owner=owner,
+        project=roadmap_project,
+        quad_field="Quad",
+        pillar_field="Pillar",
+        output_file=str(roadmap_file),
+    )
+    logger.info("Exporting sprint data")
+    github.export_sprint_data(
+        owner=owner,
+        project=sprint_project,
+        sprint_field=sprint_field,
+        points_field=points_field,
+        output_file=str(sprint_file),
+    )
+
+    # load and flatten data into GitHubIssues dataset
+    logger.info("Transforming exported data")
+    issues = GitHubIssues.load_from_json_files(
+        sprint_file=str(sprint_file),
+        roadmap_file=str(roadmap_file),
+    )
+    issues.to_json(output_file)
+
+
+# ===========================================================
+# Calculate commands
+# ===========================================================
+
+
 @metrics_app.command(name="sprint_burndown")
 def calculate_sprint_burndown(
     sprint_file: Annotated[str, SPRINT_FILE_ARG],
@@ -129,55 +185,6 @@ def calculate_sprint_burnup(
     )
 
 
-@import_app.command(name="test_connection")
-def test_connection() -> None:
-    """Test function that ensures the DB connection works."""
-    engine = db.get_db()
-    # connection method from sqlalchemy
-    connection = engine.connect()
-
-    # Test INSERT INTO action
-    result = connection.execute(
-        text(
-            "INSERT INTO audit_log (topic,timestamp, end_timestamp, user_id, details)"
-            "VALUES('test','2024-06-11 10:41:15','2024-06-11 10:54:15',87654,'test from command');",
-        ),
-    )
-    # Test SELECT action
-    result = connection.execute(text("SELECT * FROM audit_log WHERE user_id=87654;"))
-    for row in result:
-        print(row)
-    # commits the transaction to the db
-    connection.commit()
-    result.close()
-
-
-@import_app.command(name="db_import")
-def export_json_to_database(
-    sprint_file: Annotated[str, SPRINT_FILE_ARG],
-    issue_file: Annotated[str, ISSUE_FILE_ARG],
-) -> None:
-    """Import JSON data to the database."""
-    logger.info("Beginning import")
-
-    # Get the database engine and establish a connection
-    engine = db.get_db()
-
-    # Load data from the sprint board
-    sprint_data = SprintBoard.load_from_json_files(
-        sprint_file=sprint_file,
-        issue_file=issue_file,
-    )
-
-    sprint_data.to_sql(
-        output_table="github_project_data",
-        engine=engine,
-        replace_table=True,
-    )
-    rows = len(sprint_data.to_dict())
-    logger.info("Number of rows in table: %s", rows)
-
-
 @metrics_app.command(name="deliverable_percent_complete")
 def calculate_deliverable_percent_complete(
     sprint_file: Annotated[str, SPRINT_FILE_ARG],
@@ -246,3 +253,51 @@ def show_and_or_post_results(
             channel_id=settings.reporting_channel_id,
             output_dir=Path(output_dir),
         )
+
+
+# ===========================================================
+# Import commands
+# ===========================================================
+
+
+@import_app.command(name="test_connection")
+def test_connection() -> None:
+    """Test function that ensures the DB connection works."""
+    engine = db.get_db()
+    # connection method from sqlalchemy
+    connection = engine.connect()
+
+    # Test INSERT INTO action
+    result = connection.execute(
+        text(
+            "INSERT INTO audit_log (topic,timestamp, end_timestamp, user_id, details)"
+            "VALUES('test','2024-06-11 10:41:15','2024-06-11 10:54:15',87654,'test from command');",
+        ),
+    )
+    # Test SELECT action
+    result = connection.execute(text("SELECT * FROM audit_log WHERE user_id=87654;"))
+    for row in result:
+        print(row)
+    # commits the transaction to the db
+    connection.commit()
+    result.close()
+
+
+@import_app.command(name="db_import")
+def export_json_to_database(delivery_file: Annotated[str, ISSUE_FILE_ARG]) -> None:
+    """Import JSON data to the database."""
+    logger.info("Beginning import")
+
+    # Get the database engine and establish a connection
+    engine = db.get_db()
+
+    # Load data from the sprint board
+    issues = GitHubIssues.from_json(delivery_file)
+
+    issues.to_sql(
+        output_table="github_project_data",
+        engine=engine,
+        replace_table=True,
+    )
+    rows = len(issues.to_dict())
+    logger.info("Number of rows in table: %s", rows)
@@ -4,9 +4,12 @@
 from pathlib import Path
 from typing import Self
 
+import numpy as np
 import pandas as pd
 from sqlalchemy import Engine
 
+from analytics.datasets.utils import dump_to_json, load_json_file
+
 
 class BaseDataset:
     """Base class for all datasets."""
@@ -25,6 +28,12 @@ def from_dict(cls, data: list[dict]) -> Self:
         """Load the dataset from a list of python dictionaries representing records."""
         return cls(df=pd.DataFrame(data))
 
+    @classmethod
+    def from_json(cls, file_path: str | Path) -> Self:
+        """Load the dataset from a JSON file."""
+        data = load_json_file(str(file_path))
+        return cls(df=pd.DataFrame(data))
+
     def to_sql(
         self,
         output_table: str,
@@ -112,4 +121,8 @@ def to_csv(
 
     def to_dict(self) -> list[dict]:
         """Export the dataset to a list of python dictionaries representing records."""
-        return self.df.to_dict(orient="records")
+        return self.df.replace([np.nan], [None], regex=False).to_dict(orient="records")
+
+    def to_json(self, output_file: str) -> None:
+        """Dump dataset to JSON."""
+        return dump_to_json(output_file, self.to_dict())