From bab9feea8c595722a91f81ed86100e078ac48d63 Mon Sep 17 00:00:00 2001 From: Michael Chouinard <46358556+chouinar@users.noreply.github.com> Date: Thu, 29 Feb 2024 15:37:27 -0500 Subject: [PATCH] [Issue #1279] Add tests for copy oracle data + a script for generating foreign tables (#1352) ## Summary Fixes #1279 ### Time to review: __5 mins__ ## Changes proposed Added a new script for generating foreign data tables which when run locally instead generates normal tables (for testing purposes) Added tests for copy-oracle-data which use this new script copy-oracle-data catches, logs, and throws any exceptions so our monitoring can more easily detect issues ## Context for reviewers This new `setup-foreign-tables` script is maybe a bit hackier than I'd like, but while generating a normal table and a foreign table are similar, there is just enough of a difference (how foreign keys are defined notably) that it required a bit of work to do and so rather than defining the SQL itself, it builds the SQL from the column definitions. It may be worth revisiting how we generate these tables at a later date as we add more of them, but we haven't yet settled on the transformation process so we can punt that discussion for now. ## Additional information Locally, it generates the table as you'd expect when running: ![Screenshot 2024-02-28 at 11 00 56 AM](https://github.com/HHS/simpler-grants-gov/assets/46358556/dd6f6534-5bf4-42ac-8593-f9801efbb857) And the table created: ![Screenshot 2024-02-28 at 11 02 28 AM](https://github.com/HHS/simpler-grants-gov/assets/46358556/2d912687-dd75-4d82-a204-864dcf793220) --- api/Makefile | 3 + api/local.env | 11 +- api/src/data_migration/__init__.py | 1 + api/src/data_migration/copy_oracle_data.py | 8 +- .../data_migration/setup_foreign_tables.py | 120 ++++++++++++++++++ api/src/db/migrations/env.py | 4 + api/tests/src/data_migration/__init__.py | 0 .../data_migration/test_copy_oracle_data.py | 99 +++++++++++++++ .../test_setup_foreign_tables.py | 80 ++++++++++++ api/tests/src/db/models/factories.py | 33 +++++ 10 files changed, 356 insertions(+), 3 deletions(-) create mode 100644 api/src/data_migration/setup_foreign_tables.py create mode 100644 api/tests/src/data_migration/__init__.py create mode 100644 api/tests/src/data_migration/test_copy_oracle_data.py create mode 100644 api/tests/src/data_migration/test_setup_foreign_tables.py diff --git a/api/Makefile b/api/Makefile index 4d344be89..172cf81f0 100644 --- a/api/Makefile +++ b/api/Makefile @@ -243,6 +243,9 @@ openapi-spec: init-db ## Generate OpenAPI spec copy-oracle-data: $(FLASK_CMD) data-migration copy-oracle-data +setup-foreign-tables: + $(FLASK_CMD) data-migration setup-foreign-tables + ################################################## # Miscellaneous Utilities ################################################## diff --git a/api/local.env b/api/local.env index d9be8fbcd..9c0a19a87 100644 --- a/api/local.env +++ b/api/local.env @@ -86,4 +86,13 @@ ENABLE_OPPORTUNITY_LOG_MSG=false ############################ # Endpoint Configuration ############################ -ENABLE_V_0_1_ENDPOINTS=true \ No newline at end of file +ENABLE_V_0_1_ENDPOINTS=true + +############################ +# Script Configuration +############################ + +# For the script to setup the foreign data tables +# this env var overrides it so the script generates normal +# tables that don't need to connect to an Oracle database +IS_LOCAL_FOREIGN_TABLE=true \ No newline at end of file diff --git a/api/src/data_migration/__init__.py b/api/src/data_migration/__init__.py index e6e4fb099..1e45d587b 100644 --- a/api/src/data_migration/__init__.py +++ b/api/src/data_migration/__init__.py @@ -2,5 +2,6 @@ # import any of the other files so they get initialized and attached to the blueprint import src.data_migration.copy_oracle_data # noqa: F401 E402 isort:skip +import src.data_migration.setup_foreign_tables # noqa: F401 E402 isort:skip __all__ = ["data_migration_blueprint"] diff --git a/api/src/data_migration/copy_oracle_data.py b/api/src/data_migration/copy_oracle_data.py index cee534e13..92578b214 100644 --- a/api/src/data_migration/copy_oracle_data.py +++ b/api/src/data_migration/copy_oracle_data.py @@ -52,8 +52,12 @@ class SqlCommands: def copy_oracle_data(db_session: db.Session) -> None: logger.info("Beginning copy of data from Oracle database") - with db_session.begin(): - _run_copy_commands(db_session) + try: + with db_session.begin(): + _run_copy_commands(db_session) + except Exception: + logger.exception("Failed to run copy-oracle-data command") + raise logger.info("Successfully ran copy-oracle-data") diff --git a/api/src/data_migration/setup_foreign_tables.py b/api/src/data_migration/setup_foreign_tables.py new file mode 100644 index 000000000..03c62fca6 --- /dev/null +++ b/api/src/data_migration/setup_foreign_tables.py @@ -0,0 +1,120 @@ +import logging +from dataclasses import dataclass + +from pydantic import Field +from sqlalchemy import text + +import src.adapters.db as db +import src.adapters.db.flask_db as flask_db +from src.data_migration.data_migration_blueprint import data_migration_blueprint +from src.util.env_config import PydanticBaseEnvConfig + +logger = logging.getLogger(__name__) + + +class ForeignTableConfig(PydanticBaseEnvConfig): + is_local_foreign_table: bool = Field(False) + + +@dataclass +class Column: + column_name: str + postgres_type: str + + is_nullable: bool = True + is_primary_key: bool = False + + +OPPORTUNITY_COLUMNS: list[Column] = [ + Column("OPPORTUNITY_ID", "numeric(20)", is_nullable=False, is_primary_key=True), + Column("OPPNUMBER", "character varying (40)"), + Column("REVISION_NUMBER", "numeric(20)"), + Column("OPPTITLE", "character varying (255)"), + Column("OWNINGAGENCY", "character varying (255)"), + Column("PUBLISHERUID", "character varying (255)"), + Column("LISTED", "CHAR(1)"), + Column("OPPCATEGORY", "CHAR(1)"), + Column("INITIAL_OPPORTUNITY_ID", "numeric(20)"), + Column("MODIFIED_COMMENTS", "character varying (2000)"), + Column("CREATED_DATE", "DATE"), + Column("LAST_UPD_DATE", "DATE"), + Column("CREATOR_ID", "character varying (50)"), + Column("LAST_UPD_ID", "character varying (50)"), + Column("FLAG_2006", "CHAR(1)"), + Column("CATEGORY_EXPLANATION", "character varying (255)"), + Column("PUBLISHER_PROFILE_ID", "numeric(20)"), + Column("IS_DRAFT", "character varying (1)"), +] + + +@data_migration_blueprint.cli.command( + "setup-foreign-tables", help="Setup the foreign tables for connecting to the Oracle database" +) +@flask_db.with_db_session() +def setup_foreign_tables(db_session: db.Session) -> None: + logger.info("Beginning setup of foreign Oracle tables") + + config = ForeignTableConfig() + + with db_session.begin(): + _run_create_table_commands(db_session, config) + + logger.info("Successfully ran setup-foreign-tables") + + +def build_sql(table_name: str, columns: list[Column], is_local: bool) -> str: + """ + Build the SQL for creating a possibly foreign data table. If running + with is_local, it instead creates a regular table. + + Assume you have a table with two columns, an "ID" primary key column, and a "description" text column, + you would call this as:: + + build_sql("EXAMPLE_TABLE", [Column("ID", "integer", is_nullable=False, is_primary_key=True), Column("DESCRIPTION", "text")], is_local) + + Depending on whether the is_local bool is true or false would give two different outputs. + + is_local is True:: + + CREATE TABLE IF NOT EXISTS foreign_example_table (ID integer CONSTRAINT EXAMPLE_TABLE_pkey PRIMARY KEY NOT NULL,DESCRIPTION text) + + is_local is False:: + + CREATE FOREIGN TABLE IF NOT EXISTS foreign_example_table (ID integer OPTIONS (key 'true') NOT NULL,DESCRIPTION text) SERVER grants OPTIONS (schema 'EGRANTSADMIN', table 'EXAMPLE_TABLE') + """ + + column_sql_parts = [] + for column in columns: + column_sql = f"{column.column_name} {column.postgres_type}" + + # Primary keys are defined as constraints in a regular table + # and as options in a foreign data table + if column.is_primary_key and is_local: + column_sql += f" CONSTRAINT {table_name}_pkey PRIMARY KEY" + elif column.is_primary_key and not is_local: + column_sql += " OPTIONS (key 'true')" + + if not column.is_nullable: + column_sql += " NOT NULL" + + column_sql_parts.append(column_sql) + + create_table_command = "CREATE FOREIGN TABLE IF NOT EXISTS" + if is_local: + # Don't make a foreign table if running locally + create_table_command = "CREATE TABLE IF NOT EXISTS" + + create_command_suffix = ( + f" SERVER grants OPTIONS (schema 'EGRANTSADMIN', table '{table_name}')" # noqa: B907 + ) + if is_local: + # We don't want the config at the end if we're running locally so unset it + create_command_suffix = "" + + return f"{create_table_command} foreign_{table_name.lower()} ({','.join(column_sql_parts)}){create_command_suffix}" + + +def _run_create_table_commands(db_session: db.Session, config: ForeignTableConfig) -> None: + db_session.execute( + text(build_sql("TOPPORTUNITY", OPPORTUNITY_COLUMNS, config.is_local_foreign_table)) + ) diff --git a/api/src/db/migrations/env.py b/api/src/db/migrations/env.py index 54a6800c9..1da37884a 100644 --- a/api/src/db/migrations/env.py +++ b/api/src/db/migrations/env.py @@ -38,6 +38,10 @@ def include_object( ) -> bool: if type_ == "schema" and getattr(object, "schema", None) is not None: return False + if type_ == "table" and name is not None and name.startswith("foreign_"): + # We create foreign tables to an Oracle database, if we see those locally + # just ignore them as they aren't something we want included in Alembic + return False else: return True diff --git a/api/tests/src/data_migration/__init__.py b/api/tests/src/data_migration/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/api/tests/src/data_migration/test_copy_oracle_data.py b/api/tests/src/data_migration/test_copy_oracle_data.py new file mode 100644 index 000000000..21d4cd57d --- /dev/null +++ b/api/tests/src/data_migration/test_copy_oracle_data.py @@ -0,0 +1,99 @@ +from datetime import date + +import pytest +from sqlalchemy import text + +from src.data_migration.copy_oracle_data import _run_copy_commands +from src.data_migration.setup_foreign_tables import ForeignTableConfig, _run_create_table_commands +from src.db.models.transfer.topportunity_models import TransferTopportunity +from tests.src.db.models.factories import ForeignTopportunityFactory, TransferTopportunityFactory + + +@pytest.fixture(autouse=True) +def setup_foreign_tables(db_session): + _run_create_table_commands(db_session, ForeignTableConfig(is_local_foreign_table=True)) + + +@pytest.fixture(autouse=True, scope="function") +def truncate_tables(db_session): + # Automatically delete all the data in the relevant tables before tests + db_session.execute(text("TRUNCATE TABLE foreign_topportunity")) + db_session.execute(text("TRUNCATE TABLE transfer_topportunity")) + + +def convert_value_for_insert(value) -> str: + if value is None: + return "NULL" + + if isinstance(value, int): + return str(value) + if isinstance(value, str): + return f"'{value}'" # noqa: B907 + if isinstance(value, date): + return f"'{value.isoformat()}'" # noqa: B907 + + raise Exception("Type not configured for conversion") + + +def build_foreign_opportunity(db_session, opp_params: dict): + opp = ForeignTopportunityFactory.build(**opp_params) + + columns = opp.keys() + values = [convert_value_for_insert(opp[column]) for column in columns] + + db_session.execute( + text( + f"INSERT INTO foreign_topportunity ({','.join(columns)}) VALUES ({','.join(values)})" # nosec + ) + ) + + return opp + + +def test_copy_oracle_data_foreign_empty(db_session, enable_factory_create): + TransferTopportunityFactory.create_batch(size=5) + # The foreign table is empty, so this just truncates the transfer table + assert db_session.query(TransferTopportunity).count() == 5 + _run_copy_commands(db_session) + assert db_session.query(TransferTopportunity).count() == 0 + + +def test_copy_oracle_data(db_session, enable_factory_create): + print(db_session.__class__.__name__) + + # Create some records initially in the table that we'll wipe + TransferTopportunityFactory.create_batch(size=3) + + foreign_records = [ + build_foreign_opportunity(db_session, {}), + build_foreign_opportunity(db_session, {}), + build_foreign_opportunity(db_session, {}), + build_foreign_opportunity(db_session, {"oppnumber": "ABC-123-454-321-CBA"}), + build_foreign_opportunity(db_session, {"opportunity_id": 100}), + ] + + # The copy script won't fetch anything with is_draft not equaling "N" so add one + build_foreign_opportunity(db_session, {"is_draft": "Y"}) + + _run_copy_commands(db_session) + + copied_opportunities = db_session.query(TransferTopportunity).all() + + assert len(copied_opportunities) == len(foreign_records) + + copied_opportunities.sort(key=lambda opportunity: opportunity.opportunity_id) + foreign_records.sort(key=lambda opportunity: opportunity["opportunity_id"]) + + for copied_opportunity, foreign_record in zip( + copied_opportunities, foreign_records, strict=True + ): + assert copied_opportunity.opportunity_id == foreign_record["opportunity_id"] + assert copied_opportunity.oppnumber == foreign_record["oppnumber"] + assert copied_opportunity.opptitle == foreign_record["opptitle"] + assert copied_opportunity.owningagency == foreign_record["owningagency"] + assert copied_opportunity.oppcategory == foreign_record["oppcategory"] + assert copied_opportunity.category_explanation == foreign_record["category_explanation"] + assert copied_opportunity.is_draft == foreign_record["is_draft"] + assert copied_opportunity.revision_number == foreign_record["revision_number"] + assert copied_opportunity.last_upd_date == foreign_record["last_upd_date"] + assert copied_opportunity.created_date == foreign_record["created_date"] diff --git a/api/tests/src/data_migration/test_setup_foreign_tables.py b/api/tests/src/data_migration/test_setup_foreign_tables.py new file mode 100644 index 000000000..0a23b51e0 --- /dev/null +++ b/api/tests/src/data_migration/test_setup_foreign_tables.py @@ -0,0 +1,80 @@ +import pytest + +from src.data_migration.setup_foreign_tables import OPPORTUNITY_COLUMNS, Column, build_sql + +EXPECTED_LOCAL_OPPORTUNITY_SQL = ( + "CREATE TABLE IF NOT EXISTS foreign_topportunity " + "(OPPORTUNITY_ID numeric(20) CONSTRAINT TOPPORTUNITY_pkey PRIMARY KEY NOT NULL," + "OPPNUMBER character varying (40)," + "REVISION_NUMBER numeric(20)," + "OPPTITLE character varying (255)," + "OWNINGAGENCY character varying (255)," + "PUBLISHERUID character varying (255)," + "LISTED CHAR(1)," + "OPPCATEGORY CHAR(1)," + "INITIAL_OPPORTUNITY_ID numeric(20)," + "MODIFIED_COMMENTS character varying (2000)," + "CREATED_DATE DATE," + "LAST_UPD_DATE DATE," + "CREATOR_ID character varying (50)," + "LAST_UPD_ID character varying (50)," + "FLAG_2006 CHAR(1)," + "CATEGORY_EXPLANATION character varying (255)," + "PUBLISHER_PROFILE_ID numeric(20)," + "IS_DRAFT character varying (1))" +) + +EXPECTED_NONLOCAL_OPPORTUNITY_SQL = ( + "CREATE FOREIGN TABLE IF NOT EXISTS foreign_topportunity " + "(OPPORTUNITY_ID numeric(20) OPTIONS (key 'true') NOT NULL," + "OPPNUMBER character varying (40)," + "REVISION_NUMBER numeric(20)," + "OPPTITLE character varying (255)," + "OWNINGAGENCY character varying (255)," + "PUBLISHERUID character varying (255)," + "LISTED CHAR(1)," + "OPPCATEGORY CHAR(1)," + "INITIAL_OPPORTUNITY_ID numeric(20)," + "MODIFIED_COMMENTS character varying (2000)," + "CREATED_DATE DATE," + "LAST_UPD_DATE DATE," + "CREATOR_ID character varying (50)," + "LAST_UPD_ID character varying (50)," + "FLAG_2006 CHAR(1)," + "CATEGORY_EXPLANATION character varying (255)," + "PUBLISHER_PROFILE_ID numeric(20)," + "IS_DRAFT character varying (1))" + " SERVER grants OPTIONS (schema 'EGRANTSADMIN', table 'TOPPORTUNITY')" +) + + +TEST_COLUMNS = [ + Column("ID", "integer", is_nullable=False, is_primary_key=True), + Column("DESCRIPTION", "text"), +] +EXPECTED_LOCAL_TEST_SQL = ( + "CREATE TABLE IF NOT EXISTS foreign_test_table " + "(ID integer CONSTRAINT TEST_TABLE_pkey PRIMARY KEY NOT NULL," + "DESCRIPTION text)" +) +EXPECTED_NONLOCAL_TEST_SQL = ( + "CREATE FOREIGN TABLE IF NOT EXISTS foreign_test_table " + "(ID integer OPTIONS (key 'true') NOT NULL," + "DESCRIPTION text)" + " SERVER grants OPTIONS (schema 'EGRANTSADMIN', table 'TEST_TABLE')" +) + + +@pytest.mark.parametrize( + "table_name,columns,is_local,expected_sql", + [ + ("TEST_TABLE", TEST_COLUMNS, True, EXPECTED_LOCAL_TEST_SQL), + ("TEST_TABLE", TEST_COLUMNS, False, EXPECTED_NONLOCAL_TEST_SQL), + ("TOPPORTUNITY", OPPORTUNITY_COLUMNS, True, EXPECTED_LOCAL_OPPORTUNITY_SQL), + ("TOPPORTUNITY", OPPORTUNITY_COLUMNS, False, EXPECTED_NONLOCAL_OPPORTUNITY_SQL), + ], +) +def test_build_sql(table_name, columns, is_local, expected_sql): + sql = build_sql(table_name, columns, is_local) + + assert sql == expected_sql diff --git a/api/tests/src/db/models/factories.py b/api/tests/src/db/models/factories.py index 1a68f5c1e..6fd7a2050 100644 --- a/api/tests/src/db/models/factories.py +++ b/api/tests/src/db/models/factories.py @@ -277,3 +277,36 @@ class Meta: created_date = factory.LazyAttribute(lambda o: o.created_at.date()) last_upd_date = factory.LazyAttribute(lambda o: o.updated_at.date()) + + +#################################### +# Foreign Table Factories +#################################### + + +class ForeignTopportunityFactory(factory.DictFactory): + """ + NOTE: This generates a dictionary - and does not connect to the database directly + """ + + opportunity_id = factory.Sequence(lambda n: n) + + oppnumber = factory.Sequence(lambda n: f"F-ABC-{n}-XYZ-001") + opptitle = factory.LazyFunction(lambda: f"Research into {fake.job()} industry") + + owningagency = factory.Iterator(["F-US-ABC", "F-US-XYZ", "F-US-123"]) + + oppcategory = factory.fuzzy.FuzzyChoice(OpportunityCategoryLegacy) + # only set the category explanation if category is Other + category_explanation = factory.Maybe( + decider=factory.LazyAttribute(lambda o: o.oppcategory == OpportunityCategoryLegacy.OTHER), + yes_declaration=factory.Sequence(lambda n: f"Category as chosen by order #{n * n - 1}"), + no_declaration=None, + ) + + is_draft = "N" # Because we filter out drafts, just default these to False + + revision_number = 0 + + created_date = factory.Faker("date_between", start_date="-10y", end_date="-5y") + last_upd_date = factory.Faker("date_between", start_date="-5y", end_date="today")