Skip to content

Commit

Permalink
[Issue #1279] Add tests for copy oracle data + a script for generatin…
Browse files Browse the repository at this point in the history
…g foreign tables (#1352)

## Summary
Fixes #1279

### Time to review: __5 mins__

## Changes proposed
Added a new script for generating foreign data tables which when run
locally instead generates normal tables (for testing purposes)

Added tests for copy-oracle-data which use this new script

copy-oracle-data catches, logs, and throws any exceptions so our
monitoring can more easily detect issues

## Context for reviewers
This new `setup-foreign-tables` script is maybe a bit hackier than I'd
like, but while generating a normal table and a foreign table are
similar, there is just enough of a difference (how foreign keys are
defined notably) that it required a bit of work to do and so rather than
defining the SQL itself, it builds the SQL from the column definitions.
It may be worth revisiting how we generate these tables at a later date
as we add more of them, but we haven't yet settled on the transformation
process so we can punt that discussion for now.

## Additional information
Locally, it generates the table as you'd expect when running:


![Screenshot 2024-02-28 at 11 00 56
AM](https://github.com/HHS/simpler-grants-gov/assets/46358556/dd6f6534-5bf4-42ac-8593-f9801efbb857)

And the table created:
![Screenshot 2024-02-28 at 11 02 28
AM](https://github.com/HHS/simpler-grants-gov/assets/46358556/2d912687-dd75-4d82-a204-864dcf793220)
  • Loading branch information
chouinar authored Feb 29, 2024
1 parent 9cca344 commit bab9fee
Show file tree
Hide file tree
Showing 10 changed files with 356 additions and 3 deletions.
3 changes: 3 additions & 0 deletions api/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,9 @@ openapi-spec: init-db ## Generate OpenAPI spec
copy-oracle-data:
$(FLASK_CMD) data-migration copy-oracle-data

setup-foreign-tables:
$(FLASK_CMD) data-migration setup-foreign-tables

##################################################
# Miscellaneous Utilities
##################################################
Expand Down
11 changes: 10 additions & 1 deletion api/local.env
Original file line number Diff line number Diff line change
Expand Up @@ -86,4 +86,13 @@ ENABLE_OPPORTUNITY_LOG_MSG=false
############################
# Endpoint Configuration
############################
ENABLE_V_0_1_ENDPOINTS=true
ENABLE_V_0_1_ENDPOINTS=true

############################
# Script Configuration
############################

# For the script to setup the foreign data tables
# this env var overrides it so the script generates normal
# tables that don't need to connect to an Oracle database
IS_LOCAL_FOREIGN_TABLE=true
1 change: 1 addition & 0 deletions api/src/data_migration/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@

# import any of the other files so they get initialized and attached to the blueprint
import src.data_migration.copy_oracle_data # noqa: F401 E402 isort:skip
import src.data_migration.setup_foreign_tables # noqa: F401 E402 isort:skip

__all__ = ["data_migration_blueprint"]
8 changes: 6 additions & 2 deletions api/src/data_migration/copy_oracle_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,12 @@ class SqlCommands:
def copy_oracle_data(db_session: db.Session) -> None:
logger.info("Beginning copy of data from Oracle database")

with db_session.begin():
_run_copy_commands(db_session)
try:
with db_session.begin():
_run_copy_commands(db_session)
except Exception:
logger.exception("Failed to run copy-oracle-data command")
raise

logger.info("Successfully ran copy-oracle-data")

Expand Down
120 changes: 120 additions & 0 deletions api/src/data_migration/setup_foreign_tables.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import logging
from dataclasses import dataclass

from pydantic import Field
from sqlalchemy import text

import src.adapters.db as db
import src.adapters.db.flask_db as flask_db
from src.data_migration.data_migration_blueprint import data_migration_blueprint
from src.util.env_config import PydanticBaseEnvConfig

logger = logging.getLogger(__name__)


class ForeignTableConfig(PydanticBaseEnvConfig):
is_local_foreign_table: bool = Field(False)


@dataclass
class Column:
column_name: str
postgres_type: str

is_nullable: bool = True
is_primary_key: bool = False


OPPORTUNITY_COLUMNS: list[Column] = [
Column("OPPORTUNITY_ID", "numeric(20)", is_nullable=False, is_primary_key=True),
Column("OPPNUMBER", "character varying (40)"),
Column("REVISION_NUMBER", "numeric(20)"),
Column("OPPTITLE", "character varying (255)"),
Column("OWNINGAGENCY", "character varying (255)"),
Column("PUBLISHERUID", "character varying (255)"),
Column("LISTED", "CHAR(1)"),
Column("OPPCATEGORY", "CHAR(1)"),
Column("INITIAL_OPPORTUNITY_ID", "numeric(20)"),
Column("MODIFIED_COMMENTS", "character varying (2000)"),
Column("CREATED_DATE", "DATE"),
Column("LAST_UPD_DATE", "DATE"),
Column("CREATOR_ID", "character varying (50)"),
Column("LAST_UPD_ID", "character varying (50)"),
Column("FLAG_2006", "CHAR(1)"),
Column("CATEGORY_EXPLANATION", "character varying (255)"),
Column("PUBLISHER_PROFILE_ID", "numeric(20)"),
Column("IS_DRAFT", "character varying (1)"),
]


@data_migration_blueprint.cli.command(
"setup-foreign-tables", help="Setup the foreign tables for connecting to the Oracle database"
)
@flask_db.with_db_session()
def setup_foreign_tables(db_session: db.Session) -> None:
logger.info("Beginning setup of foreign Oracle tables")

config = ForeignTableConfig()

with db_session.begin():
_run_create_table_commands(db_session, config)

logger.info("Successfully ran setup-foreign-tables")


def build_sql(table_name: str, columns: list[Column], is_local: bool) -> str:
"""
Build the SQL for creating a possibly foreign data table. If running
with is_local, it instead creates a regular table.
Assume you have a table with two columns, an "ID" primary key column, and a "description" text column,
you would call this as::
build_sql("EXAMPLE_TABLE", [Column("ID", "integer", is_nullable=False, is_primary_key=True), Column("DESCRIPTION", "text")], is_local)
Depending on whether the is_local bool is true or false would give two different outputs.
is_local is True::
CREATE TABLE IF NOT EXISTS foreign_example_table (ID integer CONSTRAINT EXAMPLE_TABLE_pkey PRIMARY KEY NOT NULL,DESCRIPTION text)
is_local is False::
CREATE FOREIGN TABLE IF NOT EXISTS foreign_example_table (ID integer OPTIONS (key 'true') NOT NULL,DESCRIPTION text) SERVER grants OPTIONS (schema 'EGRANTSADMIN', table 'EXAMPLE_TABLE')
"""

column_sql_parts = []
for column in columns:
column_sql = f"{column.column_name} {column.postgres_type}"

# Primary keys are defined as constraints in a regular table
# and as options in a foreign data table
if column.is_primary_key and is_local:
column_sql += f" CONSTRAINT {table_name}_pkey PRIMARY KEY"
elif column.is_primary_key and not is_local:
column_sql += " OPTIONS (key 'true')"

if not column.is_nullable:
column_sql += " NOT NULL"

column_sql_parts.append(column_sql)

create_table_command = "CREATE FOREIGN TABLE IF NOT EXISTS"
if is_local:
# Don't make a foreign table if running locally
create_table_command = "CREATE TABLE IF NOT EXISTS"

create_command_suffix = (
f" SERVER grants OPTIONS (schema 'EGRANTSADMIN', table '{table_name}')" # noqa: B907
)
if is_local:
# We don't want the config at the end if we're running locally so unset it
create_command_suffix = ""

return f"{create_table_command} foreign_{table_name.lower()} ({','.join(column_sql_parts)}){create_command_suffix}"


def _run_create_table_commands(db_session: db.Session, config: ForeignTableConfig) -> None:
db_session.execute(
text(build_sql("TOPPORTUNITY", OPPORTUNITY_COLUMNS, config.is_local_foreign_table))
)
4 changes: 4 additions & 0 deletions api/src/db/migrations/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ def include_object(
) -> bool:
if type_ == "schema" and getattr(object, "schema", None) is not None:
return False
if type_ == "table" and name is not None and name.startswith("foreign_"):
# We create foreign tables to an Oracle database, if we see those locally
# just ignore them as they aren't something we want included in Alembic
return False
else:
return True

Expand Down
Empty file.
99 changes: 99 additions & 0 deletions api/tests/src/data_migration/test_copy_oracle_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
from datetime import date

import pytest
from sqlalchemy import text

from src.data_migration.copy_oracle_data import _run_copy_commands
from src.data_migration.setup_foreign_tables import ForeignTableConfig, _run_create_table_commands
from src.db.models.transfer.topportunity_models import TransferTopportunity
from tests.src.db.models.factories import ForeignTopportunityFactory, TransferTopportunityFactory


@pytest.fixture(autouse=True)
def setup_foreign_tables(db_session):
_run_create_table_commands(db_session, ForeignTableConfig(is_local_foreign_table=True))


@pytest.fixture(autouse=True, scope="function")
def truncate_tables(db_session):
# Automatically delete all the data in the relevant tables before tests
db_session.execute(text("TRUNCATE TABLE foreign_topportunity"))
db_session.execute(text("TRUNCATE TABLE transfer_topportunity"))


def convert_value_for_insert(value) -> str:
if value is None:
return "NULL"

if isinstance(value, int):
return str(value)
if isinstance(value, str):
return f"'{value}'" # noqa: B907
if isinstance(value, date):
return f"'{value.isoformat()}'" # noqa: B907

raise Exception("Type not configured for conversion")


def build_foreign_opportunity(db_session, opp_params: dict):
opp = ForeignTopportunityFactory.build(**opp_params)

columns = opp.keys()
values = [convert_value_for_insert(opp[column]) for column in columns]

db_session.execute(
text(
f"INSERT INTO foreign_topportunity ({','.join(columns)}) VALUES ({','.join(values)})" # nosec
)
)

return opp


def test_copy_oracle_data_foreign_empty(db_session, enable_factory_create):
TransferTopportunityFactory.create_batch(size=5)
# The foreign table is empty, so this just truncates the transfer table
assert db_session.query(TransferTopportunity).count() == 5
_run_copy_commands(db_session)
assert db_session.query(TransferTopportunity).count() == 0


def test_copy_oracle_data(db_session, enable_factory_create):
print(db_session.__class__.__name__)

# Create some records initially in the table that we'll wipe
TransferTopportunityFactory.create_batch(size=3)

foreign_records = [
build_foreign_opportunity(db_session, {}),
build_foreign_opportunity(db_session, {}),
build_foreign_opportunity(db_session, {}),
build_foreign_opportunity(db_session, {"oppnumber": "ABC-123-454-321-CBA"}),
build_foreign_opportunity(db_session, {"opportunity_id": 100}),
]

# The copy script won't fetch anything with is_draft not equaling "N" so add one
build_foreign_opportunity(db_session, {"is_draft": "Y"})

_run_copy_commands(db_session)

copied_opportunities = db_session.query(TransferTopportunity).all()

assert len(copied_opportunities) == len(foreign_records)

copied_opportunities.sort(key=lambda opportunity: opportunity.opportunity_id)
foreign_records.sort(key=lambda opportunity: opportunity["opportunity_id"])

for copied_opportunity, foreign_record in zip(
copied_opportunities, foreign_records, strict=True
):
assert copied_opportunity.opportunity_id == foreign_record["opportunity_id"]
assert copied_opportunity.oppnumber == foreign_record["oppnumber"]
assert copied_opportunity.opptitle == foreign_record["opptitle"]
assert copied_opportunity.owningagency == foreign_record["owningagency"]
assert copied_opportunity.oppcategory == foreign_record["oppcategory"]
assert copied_opportunity.category_explanation == foreign_record["category_explanation"]
assert copied_opportunity.is_draft == foreign_record["is_draft"]
assert copied_opportunity.revision_number == foreign_record["revision_number"]
assert copied_opportunity.last_upd_date == foreign_record["last_upd_date"]
assert copied_opportunity.created_date == foreign_record["created_date"]
80 changes: 80 additions & 0 deletions api/tests/src/data_migration/test_setup_foreign_tables.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import pytest

from src.data_migration.setup_foreign_tables import OPPORTUNITY_COLUMNS, Column, build_sql

EXPECTED_LOCAL_OPPORTUNITY_SQL = (
"CREATE TABLE IF NOT EXISTS foreign_topportunity "
"(OPPORTUNITY_ID numeric(20) CONSTRAINT TOPPORTUNITY_pkey PRIMARY KEY NOT NULL,"
"OPPNUMBER character varying (40),"
"REVISION_NUMBER numeric(20),"
"OPPTITLE character varying (255),"
"OWNINGAGENCY character varying (255),"
"PUBLISHERUID character varying (255),"
"LISTED CHAR(1),"
"OPPCATEGORY CHAR(1),"
"INITIAL_OPPORTUNITY_ID numeric(20),"
"MODIFIED_COMMENTS character varying (2000),"
"CREATED_DATE DATE,"
"LAST_UPD_DATE DATE,"
"CREATOR_ID character varying (50),"
"LAST_UPD_ID character varying (50),"
"FLAG_2006 CHAR(1),"
"CATEGORY_EXPLANATION character varying (255),"
"PUBLISHER_PROFILE_ID numeric(20),"
"IS_DRAFT character varying (1))"
)

EXPECTED_NONLOCAL_OPPORTUNITY_SQL = (
"CREATE FOREIGN TABLE IF NOT EXISTS foreign_topportunity "
"(OPPORTUNITY_ID numeric(20) OPTIONS (key 'true') NOT NULL,"
"OPPNUMBER character varying (40),"
"REVISION_NUMBER numeric(20),"
"OPPTITLE character varying (255),"
"OWNINGAGENCY character varying (255),"
"PUBLISHERUID character varying (255),"
"LISTED CHAR(1),"
"OPPCATEGORY CHAR(1),"
"INITIAL_OPPORTUNITY_ID numeric(20),"
"MODIFIED_COMMENTS character varying (2000),"
"CREATED_DATE DATE,"
"LAST_UPD_DATE DATE,"
"CREATOR_ID character varying (50),"
"LAST_UPD_ID character varying (50),"
"FLAG_2006 CHAR(1),"
"CATEGORY_EXPLANATION character varying (255),"
"PUBLISHER_PROFILE_ID numeric(20),"
"IS_DRAFT character varying (1))"
" SERVER grants OPTIONS (schema 'EGRANTSADMIN', table 'TOPPORTUNITY')"
)


TEST_COLUMNS = [
Column("ID", "integer", is_nullable=False, is_primary_key=True),
Column("DESCRIPTION", "text"),
]
EXPECTED_LOCAL_TEST_SQL = (
"CREATE TABLE IF NOT EXISTS foreign_test_table "
"(ID integer CONSTRAINT TEST_TABLE_pkey PRIMARY KEY NOT NULL,"
"DESCRIPTION text)"
)
EXPECTED_NONLOCAL_TEST_SQL = (
"CREATE FOREIGN TABLE IF NOT EXISTS foreign_test_table "
"(ID integer OPTIONS (key 'true') NOT NULL,"
"DESCRIPTION text)"
" SERVER grants OPTIONS (schema 'EGRANTSADMIN', table 'TEST_TABLE')"
)


@pytest.mark.parametrize(
"table_name,columns,is_local,expected_sql",
[
("TEST_TABLE", TEST_COLUMNS, True, EXPECTED_LOCAL_TEST_SQL),
("TEST_TABLE", TEST_COLUMNS, False, EXPECTED_NONLOCAL_TEST_SQL),
("TOPPORTUNITY", OPPORTUNITY_COLUMNS, True, EXPECTED_LOCAL_OPPORTUNITY_SQL),
("TOPPORTUNITY", OPPORTUNITY_COLUMNS, False, EXPECTED_NONLOCAL_OPPORTUNITY_SQL),
],
)
def test_build_sql(table_name, columns, is_local, expected_sql):
sql = build_sql(table_name, columns, is_local)

assert sql == expected_sql
33 changes: 33 additions & 0 deletions api/tests/src/db/models/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,3 +277,36 @@ class Meta:

created_date = factory.LazyAttribute(lambda o: o.created_at.date())
last_upd_date = factory.LazyAttribute(lambda o: o.updated_at.date())


####################################
# Foreign Table Factories
####################################


class ForeignTopportunityFactory(factory.DictFactory):
"""
NOTE: This generates a dictionary - and does not connect to the database directly
"""

opportunity_id = factory.Sequence(lambda n: n)

oppnumber = factory.Sequence(lambda n: f"F-ABC-{n}-XYZ-001")
opptitle = factory.LazyFunction(lambda: f"Research into {fake.job()} industry")

owningagency = factory.Iterator(["F-US-ABC", "F-US-XYZ", "F-US-123"])

oppcategory = factory.fuzzy.FuzzyChoice(OpportunityCategoryLegacy)
# only set the category explanation if category is Other
category_explanation = factory.Maybe(
decider=factory.LazyAttribute(lambda o: o.oppcategory == OpportunityCategoryLegacy.OTHER),
yes_declaration=factory.Sequence(lambda n: f"Category as chosen by order #{n * n - 1}"),
no_declaration=None,
)

is_draft = "N" # Because we filter out drafts, just default these to False

revision_number = 0

created_date = factory.Faker("date_between", start_date="-10y", end_date="-5y")
last_upd_date = factory.Faker("date_between", start_date="-5y", end_date="today")

0 comments on commit bab9fee

Please sign in to comment.