Skip to content

Commit 853f792

Browse files
authored
fix: avoid 403 from to_gbq when table has policyTags (#356)
* fix: avoid 403 from to_gbq when table has policyTags * pin dependency versions in conda test session * remove pyarrow and bqstorage API from conda session with min pandas
1 parent 81ab6b8 commit 853f792

File tree

9 files changed

+359
-256
lines changed

9 files changed

+359
-256
lines changed

ci/requirements-3.7-0.23.2.conda

+2-1
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@ codecov
22
coverage
33
fastavro
44
flake8
5+
numpy==1.14.5
56
google-cloud-bigquery==1.11.1
6-
google-cloud-bigquery-storage
77
pydata-google-auth
88
pytest
99
pytest-cov
10+
tqdm==4.23.0

docs/source/changelog.rst

+5
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,11 @@ Features
1313
client project. Specify the target table ID as ``project.dataset.table`` to
1414
use this feature. (:issue:`321`, :issue:`347`)
1515

16+
Bug fixes
17+
~~~~~~~~~
18+
19+
- Avoid 403 error from ``to_gbq`` when table has ``policyTags``. (:issue:`354`)
20+
1621
Dependencies
1722
~~~~~~~~~~~~
1823

pandas_gbq/features.py

+95
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
"""Module for checking dependency versions and supported features."""
2+
3+
# https://github.com/googleapis/python-bigquery/blob/master/CHANGELOG.md
4+
BIGQUERY_MINIMUM_VERSION = "1.11.1"
5+
BIGQUERY_CLIENT_INFO_VERSION = "1.12.0"
6+
BIGQUERY_BQSTORAGE_VERSION = "1.24.0"
7+
BIGQUERY_FROM_DATAFRAME_CSV_VERSION = "2.6.0"
8+
PANDAS_VERBOSITY_DEPRECATION_VERSION = "0.23.0"
9+
10+
11+
class Features:
12+
def __init__(self):
13+
self._bigquery_installed_version = None
14+
self._pandas_installed_version = None
15+
16+
@property
17+
def bigquery_installed_version(self):
18+
import google.cloud.bigquery
19+
import pkg_resources
20+
21+
if self._bigquery_installed_version is not None:
22+
return self._bigquery_installed_version
23+
24+
self._bigquery_installed_version = pkg_resources.parse_version(
25+
google.cloud.bigquery.__version__
26+
)
27+
bigquery_minimum_version = pkg_resources.parse_version(
28+
BIGQUERY_MINIMUM_VERSION
29+
)
30+
31+
if self._bigquery_installed_version < bigquery_minimum_version:
32+
raise ImportError(
33+
"pandas-gbq requires google-cloud-bigquery >= {0}, "
34+
"current version {1}".format(
35+
bigquery_minimum_version, self._bigquery_installed_version
36+
)
37+
)
38+
39+
return self._bigquery_installed_version
40+
41+
@property
42+
def bigquery_has_client_info(self):
43+
import pkg_resources
44+
45+
bigquery_client_info_version = pkg_resources.parse_version(
46+
BIGQUERY_CLIENT_INFO_VERSION
47+
)
48+
return self.bigquery_installed_version >= bigquery_client_info_version
49+
50+
@property
51+
def bigquery_has_bqstorage(self):
52+
import pkg_resources
53+
54+
bigquery_bqstorage_version = pkg_resources.parse_version(
55+
BIGQUERY_BQSTORAGE_VERSION
56+
)
57+
return self.bigquery_installed_version >= bigquery_bqstorage_version
58+
59+
@property
60+
def bigquery_has_from_dataframe_with_csv(self):
61+
import pkg_resources
62+
63+
bigquery_from_dataframe_version = pkg_resources.parse_version(
64+
BIGQUERY_FROM_DATAFRAME_CSV_VERSION
65+
)
66+
return (
67+
self.bigquery_installed_version >= bigquery_from_dataframe_version
68+
)
69+
70+
@property
71+
def pandas_installed_version(self):
72+
import pandas
73+
import pkg_resources
74+
75+
if self._pandas_installed_version is not None:
76+
return self._pandas_installed_version
77+
78+
self._pandas_installed_version = pkg_resources.parse_version(
79+
pandas.__version__
80+
)
81+
return self._pandas_installed_version
82+
83+
@property
84+
def pandas_has_deprecated_verbose(self):
85+
import pkg_resources
86+
87+
# Add check for Pandas version before showing deprecation warning.
88+
# https://github.com/pydata/pandas-gbq/issues/157
89+
pandas_verbosity_deprecation = pkg_resources.parse_version(
90+
PANDAS_VERBOSITY_DEPRECATION_VERSION
91+
)
92+
return self.pandas_installed_version >= pandas_verbosity_deprecation
93+
94+
95+
FEATURES = Features()

pandas_gbq/gbq.py

+20-80
Original file line numberDiff line numberDiff line change
@@ -16,100 +16,45 @@
1616

1717
from pandas_gbq.exceptions import AccessDenied
1818
from pandas_gbq.exceptions import PerformanceWarning
19+
from pandas_gbq import features
20+
from pandas_gbq.features import FEATURES
1921
import pandas_gbq.schema
2022
import pandas_gbq.timestamp
2123

2224

2325
logger = logging.getLogger(__name__)
2426

25-
BIGQUERY_INSTALLED_VERSION = None
26-
BIGQUERY_CLIENT_INFO_VERSION = "1.12.0"
27-
BIGQUERY_BQSTORAGE_VERSION = "1.24.0"
28-
HAS_CLIENT_INFO = False
29-
HAS_BQSTORAGE_SUPPORT = False
30-
3127
try:
3228
import tqdm # noqa
3329
except ImportError:
3430
tqdm = None
3531

3632

37-
def _check_google_client_version():
38-
global BIGQUERY_INSTALLED_VERSION, HAS_CLIENT_INFO, HAS_BQSTORAGE_SUPPORT, SHOW_VERBOSE_DEPRECATION
39-
40-
try:
41-
import pkg_resources
42-
43-
except ImportError:
44-
raise ImportError("Could not import pkg_resources (setuptools).")
45-
46-
# https://github.com/googleapis/python-bigquery/blob/master/CHANGELOG.md
47-
bigquery_minimum_version = pkg_resources.parse_version("1.11.0")
48-
bigquery_client_info_version = pkg_resources.parse_version(
49-
BIGQUERY_CLIENT_INFO_VERSION
50-
)
51-
bigquery_bqstorage_version = pkg_resources.parse_version(
52-
BIGQUERY_BQSTORAGE_VERSION
53-
)
54-
BIGQUERY_INSTALLED_VERSION = pkg_resources.get_distribution(
55-
"google-cloud-bigquery"
56-
).parsed_version
57-
58-
HAS_CLIENT_INFO = (
59-
BIGQUERY_INSTALLED_VERSION >= bigquery_client_info_version
60-
)
61-
HAS_BQSTORAGE_SUPPORT = (
62-
BIGQUERY_INSTALLED_VERSION >= bigquery_bqstorage_version
63-
)
64-
65-
if BIGQUERY_INSTALLED_VERSION < bigquery_minimum_version:
66-
raise ImportError(
67-
"pandas-gbq requires google-cloud-bigquery >= {0}, "
68-
"current version {1}".format(
69-
bigquery_minimum_version, BIGQUERY_INSTALLED_VERSION
70-
)
71-
)
72-
73-
# Add check for Pandas version before showing deprecation warning.
74-
# https://github.com/pydata/pandas-gbq/issues/157
75-
pandas_installed_version = pkg_resources.get_distribution(
76-
"pandas"
77-
).parsed_version
78-
pandas_version_wo_verbosity = pkg_resources.parse_version("0.23.0")
79-
SHOW_VERBOSE_DEPRECATION = (
80-
pandas_installed_version >= pandas_version_wo_verbosity
81-
)
82-
83-
8433
def _test_google_api_imports():
34+
try:
35+
import pkg_resources # noqa
36+
except ImportError as ex:
37+
raise ImportError("pandas-gbq requires setuptools") from ex
8538

8639
try:
8740
import pydata_google_auth # noqa
8841
except ImportError as ex:
89-
raise ImportError(
90-
"pandas-gbq requires pydata-google-auth: {0}".format(ex)
91-
)
42+
raise ImportError("pandas-gbq requires pydata-google-auth") from ex
9243

9344
try:
9445
from google_auth_oauthlib.flow import InstalledAppFlow # noqa
9546
except ImportError as ex:
96-
raise ImportError(
97-
"pandas-gbq requires google-auth-oauthlib: {0}".format(ex)
98-
)
47+
raise ImportError("pandas-gbq requires google-auth-oauthlib") from ex
9948

10049
try:
10150
import google.auth # noqa
10251
except ImportError as ex:
103-
raise ImportError("pandas-gbq requires google-auth: {0}".format(ex))
52+
raise ImportError("pandas-gbq requires google-auth") from ex
10453

10554
try:
10655
from google.cloud import bigquery # noqa
10756
except ImportError as ex:
108-
raise ImportError(
109-
"pandas-gbq requires google-cloud-bigquery: {0}".format(ex)
110-
)
111-
112-
_check_google_client_version()
57+
raise ImportError("pandas-gbq requires google-cloud-bigquery") from ex
11358

11459

11560
class DatasetCreationError(ValueError):
@@ -416,7 +361,7 @@ def get_client(self):
416361
# In addition to new enough version of google-api-core, a new enough
417362
# version of google-cloud-bigquery is required to populate the
418363
# client_info.
419-
if HAS_CLIENT_INFO:
364+
if FEATURES.bigquery_has_client_info:
420365
return bigquery.Client(
421366
project=self.project_id,
422367
credentials=self.credentials,
@@ -550,14 +495,15 @@ def _download_results(
550495
if user_dtypes is None:
551496
user_dtypes = {}
552497

553-
if self.use_bqstorage_api and not HAS_BQSTORAGE_SUPPORT:
498+
if self.use_bqstorage_api and not FEATURES.bigquery_has_bqstorage:
554499
warnings.warn(
555500
(
556501
"use_bqstorage_api was set, but have google-cloud-bigquery "
557502
"version {}. Requires google-cloud-bigquery version "
558503
"{} or later."
559504
).format(
560-
BIGQUERY_INSTALLED_VERSION, BIGQUERY_BQSTORAGE_VERSION
505+
FEATURES.bigquery_installed_version,
506+
features.BIGQUERY_BQSTORAGE_VERSION,
561507
),
562508
PerformanceWarning,
563509
stacklevel=4,
@@ -568,7 +514,7 @@ def _download_results(
568514
create_bqstorage_client = False
569515

570516
to_dataframe_kwargs = {}
571-
if HAS_BQSTORAGE_SUPPORT:
517+
if FEATURES.bigquery_has_bqstorage:
572518
to_dataframe_kwargs[
573519
"create_bqstorage_client"
574520
] = create_bqstorage_client
@@ -880,7 +826,7 @@ def read_gbq(
880826

881827
_test_google_api_imports()
882828

883-
if verbose is not None and SHOW_VERBOSE_DEPRECATION:
829+
if verbose is not None and FEATURES.pandas_has_deprecated_verbose:
884830
warnings.warn(
885831
"verbose is deprecated and will be removed in "
886832
"a future version. Set logging level in order to vary "
@@ -1054,7 +1000,7 @@ def to_gbq(
10541000

10551001
_test_google_api_imports()
10561002

1057-
if verbose is not None and SHOW_VERBOSE_DEPRECATION:
1003+
if verbose is not None and FEATURES.pandas_has_deprecated_verbose:
10581004
warnings.warn(
10591005
"verbose is deprecated and will be removed in "
10601006
"a future version. Set logging level in order to vary "
@@ -1133,8 +1079,8 @@ def to_gbq(
11331079
"schema of the destination table."
11341080
)
11351081

1136-
# Update the local `table_schema` so mode matches.
1137-
# See: https://github.com/pydata/pandas-gbq/issues/315
1082+
# Update the local `table_schema` so mode (NULLABLE/REQUIRED)
1083+
# matches. See: https://github.com/pydata/pandas-gbq/issues/315
11381084
table_schema = pandas_gbq.schema.update_schema(
11391085
table_schema, original_schema
11401086
)
@@ -1252,7 +1198,6 @@ def create(self, table_id, schema):
12521198
dataframe.
12531199
"""
12541200
from google.cloud.bigquery import DatasetReference
1255-
from google.cloud.bigquery import SchemaField
12561201
from google.cloud.bigquery import Table
12571202
from google.cloud.bigquery import TableReference
12581203

@@ -1274,12 +1219,7 @@ def create(self, table_id, schema):
12741219
DatasetReference(self.project_id, self.dataset_id), table_id
12751220
)
12761221
table = Table(table_ref)
1277-
1278-
schema = pandas_gbq.schema.add_default_nullable_mode(schema)
1279-
1280-
table.schema = [
1281-
SchemaField.from_api_repr(field) for field in schema["fields"]
1282-
]
1222+
table.schema = pandas_gbq.schema.to_google_cloud_bigquery(schema)
12831223

12841224
try:
12851225
self.client.create_table(table)

0 commit comments

Comments
 (0)