diff --git a/api/src/db/migrations/env.py b/api/src/db/migrations/env.py index 4abeee6ca..5ef78cc40 100644 --- a/api/src/db/migrations/env.py +++ b/api/src/db/migrations/env.py @@ -48,6 +48,8 @@ def include_object( # We create foreign tables to an Oracle database, if we see those locally # just ignore them as they aren't something we want included in Alembic return False + if type_ == "table" and getattr(object, "schema", None) == Schemas.LEGACY: + return False else: return True diff --git a/api/src/db/migrations/run.py b/api/src/db/migrations/run.py index f4d16bb56..bea4bc568 100644 --- a/api/src/db/migrations/run.py +++ b/api/src/db/migrations/run.py @@ -3,6 +3,8 @@ # running on the production docker image from any directory. import logging import os +import time +from typing import Any import alembic.command as command import alembic.script as script @@ -21,6 +23,7 @@ def up(revision: str = "head") -> None: + enable_query_logging() command.upgrade(alembic_cfg, revision) # We want logging for the lookups, but alembic already sets @@ -30,13 +33,49 @@ def up(revision: str = "head") -> None: def down(revision: str = "-1") -> None: + enable_query_logging() command.downgrade(alembic_cfg, revision) def downall(revision: str = "base") -> None: + enable_query_logging() command.downgrade(alembic_cfg, revision) +def enable_query_logging() -> None: + """Log each migration query as it happens along with timing. + + Based on the example at https://docs.sqlalchemy.org/en/20/faq/performance.html#query-profiling + """ + + @sqlalchemy.event.listens_for(sqlalchemy.engine.Engine, "before_cursor_execute", retval=True) + def before_execute( + conn: sqlalchemy.Connection, + _cursor: Any, + statement: str, + _parameters: Any, + _context: Any, + _executemany: bool, + ) -> tuple[str, Any]: + conn.info.setdefault("query_start_time", []).append(time.monotonic()) + logger.info("before execute", extra={"migrate.sql": statement.strip()}) + return statement, _parameters + + @sqlalchemy.event.listens_for(sqlalchemy.engine.Engine, "after_cursor_execute") + def after_execute( + conn: sqlalchemy.Connection, + _cursor: Any, + statement: str, + _parameters: Any, + _context: Any, + _executemany: bool, + ) -> None: + total = int(1000 * (time.monotonic() - conn.info["query_start_time"].pop(-1))) + logger.info( + "after execute", extra={"migrate.sql": statement.strip(), "migrate.time_ms": total} + ) + + def have_all_migrations_run(db_engine: sqlalchemy.engine.Engine) -> None: directory = script.ScriptDirectory.from_config(alembic_cfg) with db_engine.begin() as connection: diff --git a/api/src/db/migrations/versions/2024_05_01_add_created_at_updated_at_and_deleted_.py b/api/src/db/migrations/versions/2024_05_01_add_created_at_updated_at_and_deleted_.py new file mode 100644 index 000000000..288d8b382 --- /dev/null +++ b/api/src/db/migrations/versions/2024_05_01_add_created_at_updated_at_and_deleted_.py @@ -0,0 +1,73 @@ +"""Add created_at, updated_at, and deleted_at columns to staging tables + +Revision ID: 1ddd1d051a99 +Revises: e3a1be603d26 +Create Date: 2024-05-01 11:14:34.332661 + +""" +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = "1ddd1d051a99" +down_revision = "e3a1be603d26" +branch_labels = None +depends_on = None + + +TABLES = ( + "tapplicanttypes_forecast", + "tapplicanttypes_forecast_hist", + "tapplicanttypes_synopsis", + "tapplicanttypes_synopsis_hist", + "tforecast", + "tforecast_hist", + "tfundactcat_forecast", + "tfundactcat_forecast_hist", + "tfundactcat_synopsis", + "tfundactcat_synopsis_hist", + "tfundinstr_forecast", + "tfundinstr_forecast_hist", + "tfundinstr_synopsis", + "tfundinstr_synopsis_hist", + "topportunity", + "topportunity_cfda", + "tsynopsis", + "tsynopsis_hist", +) + + +def upgrade(): + for table_name in TABLES: + op.add_column( + table_name, + sa.Column( + "created_at", + sa.TIMESTAMP(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + schema="staging", + ) + op.add_column( + table_name, + sa.Column( + "updated_at", + sa.TIMESTAMP(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + schema="staging", + ) + op.add_column( + table_name, + sa.Column("deleted_at", sa.TIMESTAMP(timezone=True), nullable=True), + schema="staging", + ) + + +def downgrade(): + for table_name in TABLES: + op.drop_column(table_name, "deleted_at", schema="staging") + op.drop_column(table_name, "updated_at", schema="staging") + op.drop_column(table_name, "created_at", schema="staging") diff --git a/api/src/db/models/staging/staging_base.py b/api/src/db/models/staging/staging_base.py index a4b101f51..30d4f0c11 100644 --- a/api/src/db/models/staging/staging_base.py +++ b/api/src/db/models/staging/staging_base.py @@ -5,6 +5,7 @@ from sqlalchemy.orm import Mapped, declarative_mixin, mapped_column from src.constants.schema import Schemas +from src.util import datetime_util metadata = sqlalchemy.MetaData( naming_convention={ @@ -44,7 +45,30 @@ def __rich_repr__(self) -> Iterable[tuple[str, Any]]: return self._dict().items() +def same_as_created_at(context: Any) -> Any: + return context.get_current_parameters()["created_at"] + + @declarative_mixin class StagingParamMixin: is_deleted: Mapped[bool] transformed_at: Mapped[datetime.datetime | None] = mapped_column(index=True) + + created_at: Mapped[datetime.datetime] = mapped_column( + nullable=False, + default=datetime_util.utcnow, + server_default=sqlalchemy.sql.functions.now(), + ) + + updated_at: Mapped[datetime.datetime] = mapped_column( + nullable=False, + default=same_as_created_at, + onupdate=datetime_util.utcnow, + server_default=sqlalchemy.sql.functions.now(), + ) + + deleted_at: Mapped[datetime.datetime | None] = mapped_column( + nullable=True, + default=None, + server_default=None, + ) diff --git a/documentation/api/database/erds/full-schema.png b/documentation/api/database/erds/full-schema.png index 63cc23b7e..aeba01bf7 100644 Binary files a/documentation/api/database/erds/full-schema.png and b/documentation/api/database/erds/full-schema.png differ diff --git a/documentation/api/database/erds/staging-schema.png b/documentation/api/database/erds/staging-schema.png index a795744bd..2cbac09fd 100644 Binary files a/documentation/api/database/erds/staging-schema.png and b/documentation/api/database/erds/staging-schema.png differ