Enhance SQL Server adapter to support safe type expansion for column types

axellpadilla · axellpadilla · commit 59cf6de740a4 · 2025-09-29T23:12:49.000Z
- Implemented column type expansion logic in SQLServerAdapter.
- Added support for NVARCHAR and other type promotions in SQLServerColumn.
- Introduced tests for integer and numeric type promotions, as well as VARCHAR to NVARCHAR conversions.
diff --git a/dbt/adapters/sqlserver/sqlserver_adapter.py b/dbt/adapters/sqlserver/sqlserver_adapter.py
@@ -1,9 +1,12 @@
-from typing import Optional
+from typing import List, Optional
 
 import dbt.exceptions
 from dbt.adapters.base.impl import ConstraintSupport
+from dbt.adapters.cache import _make_ref_key_dict
+from dbt.adapters.events.types import ColTypeChange
 from dbt.adapters.fabric import FabricAdapter
 from dbt.contracts.graph.nodes import ConstraintType
+from dbt_common.events.functions import fire_event
 
 from dbt.adapters.sqlserver.sqlserver_column import SQLServerColumn
 from dbt.adapters.sqlserver.sqlserver_connections import SQLServerConnectionManager
@@ -65,3 +68,61 @@ def valid_incremental_strategies(self):
         Not used to validate custom strategies defined by end users.
         """
         return ["append", "delete+insert", "merge", "microbatch"]
+
+    def expand_column_types(self, goal, current):
+        """Override to ensure we use the reference column's dtype when constructing the
+        new column type during an expansion (so NVARCHAR on the goal yields NVARCHAR).
+        """
+        reference_columns = {c.name: c for c in self.get_columns_in_relation(goal)}
+
+        target_columns = {c.name: c for c in self.get_columns_in_relation(current)}
+
+        for column_name, reference_column in reference_columns.items():
+            target_column = target_columns.get(column_name)
+
+            if target_column is not None and target_column.can_expand_to(
+                reference_column,
+                enable_safe_type_expansion=self.behavior.enable_safe_type_expansion,
+            ):
+                # If the reference column is a string, compute the new type using
+                # the reference column's instance-level string helper so we
+                # respect NVARCHAR/NCHAR vs VARCHAR/CHAR correctly. For non-
+                # string expansions (numeric/integer promotions), use the
+                # reference column's resolved data_type directly.
+                if reference_column.is_string():
+                    col_string_size = reference_column.string_size()
+                    new_type = reference_column.string_type_instance(col_string_size)
+                else:
+                    # For numeric/integer/other type expansions, use the
+                    # reference column's computed data_type (eg. INT,
+                    # DECIMAL(p,s), etc.).
+                    new_type = reference_column.data_type
+                fire_event(
+                    ColTypeChange(
+                        orig_type=target_column.data_type,
+                        new_type=new_type,
+                        table=_make_ref_key_dict(current),
+                    )
+                )
+
+                self.alter_column_type(current, column_name, new_type)
+
+    @property
+    def _behavior_flags(self) -> List[dict]:
+        """Adapter-specific behavior flags. These are merged with project overrides
+        by the BaseAdapter.behavior machinery.
+        """
+        return [
+            {
+                "name": "enable_safe_type_expansion",
+                "default": False,
+                "source": "dbt-sqlserver",
+                "description": (
+                    "Allow the SQL Server adapter to widen column types during schema-expansion. "
+                    "This enables promotions like varchar->nvarchar, "
+                    "  bit->tinyint->smallint->int->bigint, "
+                    "and numeric(p,s)->numeric(p2,s2) using alter column."
+                ),
+                "docs_url": None,
+            },
+        ]
diff --git a/dbt/adapters/sqlserver/sqlserver_column.py b/dbt/adapters/sqlserver/sqlserver_column.py
@@ -1,22 +1,154 @@
-from dbt.adapters.fabric import FabricColumn
+from typing import Any, ClassVar, Dict
 
+from dbt.adapters.base import Column
+from dbt_common.exceptions import DbtRuntimeError
+
+
+class SQLServerColumn(Column):
+    TYPE_LABELS: ClassVar[Dict[str, str]] = {
+        "STRING": "VARCHAR(8000)",
+        "VARCHAR": "VARCHAR(8000)",
+        "CHAR": "CHAR(1)",
+        "NCHAR": "NCHAR(1)",
+        "NVARCHAR": "NVARCHAR(4000)",
+        "TIMESTAMP": "DATETIME2(6)",
+        "DATETIME2": "DATETIME2(6)",
+        "DATETIME2(6)": "DATETIME2(6)",
+        "DATE": "DATE",
+        "TIME": "TIME(6)",
+        "FLOAT": "FLOAT",
+        "REAL": "REAL",
+        "INT": "INT",
+        "INTEGER": "INT",
+        "BIGINT": "BIGINT",
+        "SMALLINT": "SMALLINT",
+        "TINYINT": "SMALLINT",
+        "BIT": "BIT",
+        "BOOLEAN": "BIT",
+        "DECIMAL": "DECIMAL",
+        "NUMERIC": "NUMERIC",
+        "MONEY": "DECIMAL",
+        "SMALLMONEY": "DECIMAL",
+        "UNIQUEIDENTIFIER": "UNIQUEIDENTIFIER",
+        "VARBINARY": "VARBINARY(MAX)",
+        "BINARY": "BINARY(1)",
+    }
+
+    @classmethod
+    def string_type(cls, size: int) -> str:
+        """Class-level string_type used by SQLAdapter.expand_column_types.
+
+        Return a VARCHAR default for the SQLAdapter path; this keeps behaviour
+        consistent with the rest of dbt where class-level string_type is
+        generic and not instance-aware.
+        """
+        return f"varchar({size if size > 0 else '8000'})"
+
+    def string_type_instance(self, size: int) -> str:
+        """
+        Instance-level string type selection that respects NVARCHAR/NCHAR.
+        """
+        dtype = (self.dtype or "").lower()
+        # n types use half the byte size for character count
+        if dtype == "nvarchar":
+            return f"nvarchar({size//2 if size > 0 else '4000'})"
+        if dtype == "nchar":
+            return f"nchar({size//2 if size > 1 else '1'})"
+        # default to varchar/char behaviour
+        return f"varchar({size if size > 0 else '8000'})"
+
+    def literal(self, value: Any) -> str:
+        return "cast('{}' as {})".format(value, self.data_type)
+
+    @property
+    def data_type(self) -> str:
+        # Always enforce datetime2 precision
+        if self.dtype.lower() == "datetime2":
+            return "datetime2(6)"
+        if self.is_string():
+            return self.string_type_instance(self.string_size())
+        elif self.is_numeric():
+            return self.numeric_type(self.dtype, self.numeric_precision, self.numeric_scale)
+        else:
+            return self.dtype
+
+    def is_string(self) -> bool:
+        return self.dtype.lower() in ["varchar", "char", "nvarchar", "nchar"]
+
+    def is_number(self):
+        return any([self.is_integer(), self.is_numeric(), self.is_float()])
+
+    def is_float(self):
+        return self.dtype.lower() in ["float", "real"]
 
-class SQLServerColumn(FabricColumn):
     def is_integer(self) -> bool:
-        return self.dtype.lower() in [
-            # real types
-            "smallint",
-            "integer",
-            "bigint",
-            "smallserial",
-            "serial",
-            "bigserial",
-            # aliases
-            "int2",
-            "int4",
-            "int8",
-            "serial2",
-            "serial4",
-            "serial8",
-            "int",
-        ]
+        # Treat BIT as an integer-like type so it participates in integer
+        # promotions (bit -> tinyint -> smallint -> int -> bigint).
+        return self.dtype.lower() in ["int", "integer", "bigint", "smallint", "tinyint", "bit"]
+
+    def is_numeric(self) -> bool:
+        return self.dtype.lower() in ["numeric", "decimal", "money", "smallmoney"]
+
+    def string_size(self) -> int:
+        if not self.is_string():
+            raise DbtRuntimeError("Called string_size() on non-string field!")
+        if self.char_size is None:
+            return 8000
+        else:
+            return int(self.char_size)
+
+    def can_expand_to(
+        self, other_column: Column, enable_safe_type_expansion: bool = False
+    ) -> bool:
+        # If both are strings, allow size-based expansion regardless of the
+        # feature flag. Only allow family changes (VARCHAR -> NVARCHAR) when
+        # `enable_safe_type_expansion` is set by the adapter.
+        self_dtype = self.dtype.lower()
+        other_dtype = other_column.dtype.lower()
+        if self.is_string() and other_column.is_string():
+            self_size = self.string_size()
+            other_size = other_column.string_size()
+
+            if other_size > self_size and self_dtype == other_dtype:
+                return True
+
+            # Allow safe conversions across the CHAR/VARCHAR -> NCHAR/NVARCHAR family
+            # only when the feature flag is enabled. Do NOT allow shrinking
+            # conversions or NVARCHAR -> VARCHAR.
+            if self_dtype in ("varchar", "char") and other_dtype in ("nvarchar", "nchar"):
+                # allow when target has at least the same character capacity
+                if other_size >= self_size and enable_safe_type_expansion:
+                    return True
+
+            # If none of the string rules matched, we can't expand.
+            return False
+
+        # If we reach here, at least one side is not a string. Apply integer/
+        # numeric promotion logic only if the adapter has enabled type expansion.
+        if not enable_safe_type_expansion or not self.is_number() or not other_column.is_number():
+            return False
+
+        # Integer family promotions (tinyint -> smallint -> int -> bigint)
+        int_family = ("bit", "tinyint", "smallint", "int", "bigint")
+        if self_dtype in int_family and other_dtype in int_family:
+            if int_family.index(other_dtype) > int_family.index(self_dtype):
+                return True
+
+        self_prec = int(self.numeric_precision or 0)
+        other_prec = int(other_column.numeric_precision or 0)
+        # Integer -> numeric/decimal is a safe widening (integers fit in numerics).
+        if self.is_integer() and other_column.is_numeric() and other_prec > self_prec:
+            return True
+
+        # Numeric/Decimal promotions: allow when target precision >= source precision
+        # and target scale >= source scale (so we don't lose fractional digits).
+        if self.is_numeric() and other_column.is_numeric():
+            # Access precision/scale directly from columns. Fall back to 0 when missing.
+            self_scale = int(self.numeric_scale or 0)
+            other_scale = int(other_column.numeric_scale or 0)
+
+            if other_prec >= self_prec and other_scale >= self_scale:
+                if other_prec > self_prec or other_scale > self_scale:
+                    return True
+
+        return False
diff --git a/tests/functional/adapter/dbt/test_column_types.py b/tests/functional/adapter/dbt/test_column_types.py
@@ -88,7 +88,8 @@
     CAST(5.0 AS float) as double_col,
     CAST(6.0 AS numeric) as numeric_col,
     CAST(7 AS varchar(20)) as text_col,
-    CAST(8 AS varchar(20)) as varchar_col
+    CAST(8 AS varchar(20)) as varchar_col,
+    cast(9 as nvarchar(20)) as nvarchar_col
 """
 
 schema_yml = """
@@ -106,6 +107,7 @@
             numeric_col: ['numeric', 'number']
             text_col: ['string', 'not number']
             varchar_col: ['string', 'not number']
+            nvarchar_col: ['string', 'not number']
 """  # noqa
 
 
diff --git a/tests/functional/adapter/dbt/test_incremental.py b/tests/functional/adapter/dbt/test_incremental.py
@@ -3,10 +3,7 @@
 from dbt.tests.adapter.incremental.test_incremental_on_schema_change import (
     BaseIncrementalOnSchemaChange,
 )
-from dbt.tests.adapter.incremental.test_incremental_predicates import (
-    TestIncrementalPredicatesDeleteInsert,
-    TestPredicatesDeleteInsert,
-)
+from dbt.tests.util import run_dbt, write_file
 
 _MODELS__INCREMENTAL_IGNORE_SQLServer = """
 {{
@@ -92,9 +89,51 @@ def models(self):
         }
 
 
-class TestIncrementalPredicatesDeleteInsert(TestIncrementalPredicatesDeleteInsert):
-    pass
+_INCREMENTAL__WIDEN_TYPES_SQLServer = """
+{{
+    config(
+        materialized='incremental',
+        unique_key='id',
+        on_schema_change='append_new_columns'
+    )
+}}
+
+{% if is_incremental() %}
+-- incremental branch: uses larger types and values that would fail if table types were not widened
+select
+  2 as id,
+  cast(40000 as int) as num_int,
+  cast('abcdef' as nvarchar(10)) as field1,
+  cast(100.25 as decimal(10,4)) as num_decimal,
+  cast(999999999999998.9999 as decimal(20,4)) as num_money
+{% else %}
+-- full-refresh branch: creates the table with smaller types
+select
+  1 as id,
+  cast(1 as smallint) as num_int,
+  cast('abc' as varchar(5)) as field1,
+  cast(10.5 as decimal(5,2)) as num_decimal,
+  cast(1240.14 as money) as num_money
+{% endif %}
+"""
+
+
+class TestIncrementalOnSchemaChangeExpands:
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {"flags": {"enable_safe_type_expansion": True}}
+
+    def test_run_incremental_widen_types(self, project):
+        """Full-refresh to create small types, then incremental to widen types."""
+        write_file(_INCREMENTAL__WIDEN_TYPES_SQLServer, "models", "incremental_change_widen.sql")
+
+        # Full-refresh to create table with smallint and varchar(5)
+        run_dbt(
+            ["run", "--models", "incremental_change_widen", "--full-refresh"]
+        )  # creates small types
 
+        # Run again to trigger incremental insert which requires widened types
+        # incremental branch inserts larger values
+        run_dbt(["run", "--models", "incremental_change_widen"])
 
-class TestPredicatesDeleteInsert(TestPredicatesDeleteInsert):
-    pass
+        return True
diff --git a/tests/unit/adapters/mssql/test_can_expand_to.py b/tests/unit/adapters/mssql/test_can_expand_to.py
@@ -0,0 +1,48 @@
+import pytest
+
+from dbt.adapters.sqlserver.sqlserver_column import SQLServerColumn
+
+
+def col_kwargs(dtype, char_size=None, numeric_precision=0, numeric_scale=0):
+    return {
+        "column": "c",
+        "dtype": dtype,
+        "char_size": char_size,
+        "numeric_precision": numeric_precision,
+        "numeric_scale": numeric_scale,
+    }
+
+
+@pytest.mark.parametrize(
+    "src_kwargs,tgt_kwargs,expect_with_flag,expect_without_flag",
+    [
+        # Integer family promotions require the feature flag
+        (col_kwargs("int"), col_kwargs("bigint"), True, False),
+        (col_kwargs("bit"), col_kwargs("tinyint"), True, False),
+        # Integer -> numeric widening requires the feature flag
+        (col_kwargs("int"), col_kwargs("numeric", numeric_precision=10), True, False),
+        (col_kwargs("bit"), col_kwargs("numeric", numeric_precision=5), True, False),
+        # Numeric/decimal promotions: precision/scale must increase; flag required
+        (
+            col_kwargs("numeric", numeric_precision=10, numeric_scale=2),
+            col_kwargs("numeric", numeric_precision=12, numeric_scale=4),
+            True,
+            False,
+        ),
+        (
+            col_kwargs("numeric", numeric_precision=10, numeric_scale=2),
+            col_kwargs("numeric", numeric_precision=12, numeric_scale=1),
+            False,
+            False,
+        ),
+        # String family change (VARCHAR -> NVARCHAR) is only allowed when the
+        # feature flag is set and capacity is sufficient
+        (col_kwargs("varchar", char_size=10), col_kwargs("nvarchar", char_size=10), True, False),
+    ],
+)
+def test_can_expand_parametrized(src_kwargs, tgt_kwargs, expect_with_flag, expect_without_flag):
+    src = SQLServerColumn(**src_kwargs)
+    tgt = SQLServerColumn(**tgt_kwargs)
+
+    assert src.can_expand_to(tgt, enable_safe_type_expansion=True) is expect_with_flag
+    assert src.can_expand_to(tgt, enable_safe_type_expansion=False) is expect_without_flag