Skip to content

Commit b46e955

Browse files
authored
BUG: support INT64 and other standard SQL aliases in to_gbq table_schema (#340)
1 parent f1995f8 commit b46e955

File tree

3 files changed

+54
-18
lines changed

3 files changed

+54
-18
lines changed

docs/source/changelog.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ Bug fixes
1010
~~~~~~~~~
1111

1212
- Encode floating point values with greater precision. (:issue:`326`)
13+
- Support ``INT64`` and other standard SQL aliases in
14+
:func:`~pandas_gbq.to_gbq` ``table_schema`` argument. (:issue:`322`)
1315

1416

1517
.. _changelog-0.14.0:

pandas_gbq/schema.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,16 @@
33
import copy
44

55

6+
# API may return data types as legacy SQL, so maintain a mapping of aliases
7+
# from standard SQL to legacy data types.
8+
_TYPE_ALIASES = {
9+
"BOOL": "BOOLEAN",
10+
"FLOAT64": "FLOAT",
11+
"INT64": "INTEGER",
12+
"STRUCT": "RECORD",
13+
}
14+
15+
616
def to_pandas_gbq(client_schema):
717
"""Given a sequence of :class:`google.cloud.bigquery.schema.SchemaField`,
818
return a schema in pandas-gbq API format.
@@ -24,10 +34,12 @@ def _clean_schema_fields(fields):
2434
are not generated by func:`pandas_gbq.schema.generate_bq_schema`.
2535
"""
2636
fields_sorted = sorted(fields, key=lambda field: field["name"])
27-
return [
28-
{"name": field["name"], "type": field["type"]}
29-
for field in fields_sorted
30-
]
37+
clean_schema = []
38+
for field in fields_sorted:
39+
field_type = field["type"].upper()
40+
field_type = _TYPE_ALIASES.get(field_type, field_type)
41+
clean_schema.append({"name": field["name"], "type": field_type})
42+
return clean_schema
3143

3244

3345
def schema_is_subset(schema_remote, schema_local):

tests/unit/test_schema.py

Lines changed: 36 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,22 +11,44 @@ def module_under_test():
1111
return pandas_gbq.schema
1212

1313

14-
def test_schema_is_subset_passes_if_subset(module_under_test):
14+
@pytest.mark.parametrize(
15+
"original_fields,dataframe_fields",
16+
[
17+
(
18+
[
19+
{"name": "A", "type": "FLOAT"},
20+
{"name": "B", "type": "FLOAT64"},
21+
{"name": "C", "type": "STRING"},
22+
],
23+
[
24+
{"name": "A", "type": "FLOAT64"},
25+
{"name": "B", "type": "FLOAT"},
26+
],
27+
),
28+
# Original schema from API may contain legacy SQL datatype names.
29+
# https://github.com/pydata/pandas-gbq/issues/322
30+
(
31+
[{"name": "A", "type": "INTEGER"}],
32+
[{"name": "A", "type": "INT64"}],
33+
),
34+
(
35+
[{"name": "A", "type": "BOOL"}],
36+
[{"name": "A", "type": "BOOLEAN"}],
37+
),
38+
(
39+
# TODO: include sub-fields when struct uploads are supported.
40+
[{"name": "A", "type": "STRUCT"}],
41+
[{"name": "A", "type": "RECORD"}],
42+
),
43+
],
44+
)
45+
def test_schema_is_subset_passes_if_subset(
46+
module_under_test, original_fields, dataframe_fields
47+
):
1548
# Issue #24 schema_is_subset indicates whether the schema of the
1649
# dataframe is a subset of the schema of the bigquery table
17-
table_schema = {
18-
"fields": [
19-
{"name": "A", "type": "FLOAT"},
20-
{"name": "B", "type": "FLOAT"},
21-
{"name": "C", "type": "STRING"},
22-
]
23-
}
24-
tested_schema = {
25-
"fields": [
26-
{"name": "A", "type": "FLOAT"},
27-
{"name": "B", "type": "FLOAT"},
28-
]
29-
}
50+
table_schema = {"fields": original_fields}
51+
tested_schema = {"fields": dataframe_fields}
3052
assert module_under_test.schema_is_subset(table_schema, tested_schema)
3153

3254

0 commit comments

Comments
 (0)