Skip to content

Commit 7330463

Browse files
committed
ENH: Update to_gbq and read_gbq to pandas-gbq 0.5.0
* Add link to Pandas-GBQ 0.5.0 in what's new. * Remove unnecessary sleep in GBQ tests. Closes googleapis/python-bigquery-pandas#177 Closes #21627
1 parent 36422a8 commit 7330463

File tree

4 files changed

+92
-73
lines changed

4 files changed

+92
-73
lines changed

Diff for: doc/source/whatsnew/v0.24.0.txt

+5
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ Other Enhancements
1919
- :func:`to_csv` now supports ``compression`` keyword when a file handle is passed. (:issue:`21227`)
2020
- :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with :class:`MultiIndex` (:issue:`21115`)
2121
- Added support for reading from Google Cloud Storage via the ``gcsfs`` library (:issue:`19454`)
22+
- :func:`to_gbq` and :func:`read_gbq` signature and documentation updated to
23+
reflect changes from the `Pandas-GBQ library version 0.5.0
24+
<https://pandas-gbq.readthedocs.io/en/latest/changelog.html#changelog-0-5-0>`__.
25+
(:issue:`21627`)
26+
2227

2328
.. _whatsnew_0240.api_breaking:
2429

Diff for: pandas/core/frame.py

+32-27
Original file line numberDiff line numberDiff line change
@@ -1102,37 +1102,27 @@ def to_dict(self, orient='dict', into=dict):
11021102
else:
11031103
raise ValueError("orient '{o}' not understood".format(o=orient))
11041104

1105-
def to_gbq(self, destination_table, project_id, chunksize=None,
1106-
verbose=None, reauth=False, if_exists='fail', private_key=None,
1107-
auth_local_webserver=False, table_schema=None):
1105+
def to_gbq(self, destination_table, project_id=None, chunksize=None,
1106+
reauth=False, if_exists='fail', private_key=None,
1107+
auth_local_webserver=False, table_schema=None, location=None,
1108+
progress_bar=True, verbose=None):
11081109
"""
11091110
Write a DataFrame to a Google BigQuery table.
11101111
11111112
This function requires the `pandas-gbq package
11121113
<https://pandas-gbq.readthedocs.io>`__.
11131114
1114-
Authentication to the Google BigQuery service is via OAuth 2.0.
1115-
1116-
- If ``private_key`` is provided, the library loads the JSON service
1117-
account credentials and uses those to authenticate.
1118-
1119-
- If no ``private_key`` is provided, the library tries `application
1120-
default credentials`_.
1121-
1122-
.. _application default credentials:
1123-
https://cloud.google.com/docs/authentication/production#providing_credentials_to_your_application
1124-
1125-
- If application default credentials are not found or cannot be used
1126-
with BigQuery, the library authenticates with user account
1127-
credentials. In this case, you will be asked to grant permissions
1128-
for product name 'pandas GBQ'.
1115+
See the `How to authenticate with Google BigQuery
1116+
<https://pandas-gbq.readthedocs.io/en/latest/howto/authentication.html>`__
1117+
guide for authentication instructions.
11291118
11301119
Parameters
11311120
----------
11321121
destination_table : str
1133-
Name of table to be written, in the form 'dataset.tablename'.
1134-
project_id : str
1135-
Google BigQuery Account project ID.
1122+
Name of table to be written, in the form ``dataset.tablename``.
1123+
project_id : str, optional
1124+
Google BigQuery Account project ID. Optional when available from
1125+
the environment.
11361126
chunksize : int, optional
11371127
Number of rows to be inserted in each chunk from the dataframe.
11381128
Set to ``None`` to load the whole dataframe at once.
@@ -1170,8 +1160,21 @@ def to_gbq(self, destination_table, project_id, chunksize=None,
11701160
BigQuery API documentation on available names of a field.
11711161
11721162
*New in version 0.3.1 of pandas-gbq*.
1173-
verbose : boolean, deprecated
1174-
*Deprecated in Pandas-GBQ 0.4.0.* Use the `logging module
1163+
location : str, optional
1164+
Location where the load job should run. See the `BigQuery locations
1165+
documentation
1166+
<https://cloud.google.com/bigquery/docs/dataset-locations>`__ for a
1167+
list of available locations. The location must match that of the
1168+
target dataset.
1169+
1170+
*New in version 0.5.0 of pandas-gbq*.
1171+
progress_bar : bool, default True
1172+
Use the library `tqdm` to show the progress bar for the upload,
1173+
chunk by chunk.
1174+
1175+
*New in version 0.5.0 of pandas-gbq*.
1176+
verbose : bool, deprecated
1177+
Deprecated in Pandas-GBQ 0.4.0. Use the `logging module
11751178
to adjust verbosity instead
11761179
<https://pandas-gbq.readthedocs.io/en/latest/intro.html#logging>`__.
11771180
@@ -1182,10 +1185,12 @@ def to_gbq(self, destination_table, project_id, chunksize=None,
11821185
"""
11831186
from pandas.io import gbq
11841187
return gbq.to_gbq(
1185-
self, destination_table, project_id, chunksize=chunksize,
1186-
verbose=verbose, reauth=reauth, if_exists=if_exists,
1187-
private_key=private_key, auth_local_webserver=auth_local_webserver,
1188-
table_schema=table_schema)
1188+
self, destination_table, project_id=project_id,
1189+
chunksize=chunksize, reauth=reauth,
1190+
if_exists=if_exists, private_key=private_key,
1191+
auth_local_webserver=auth_local_webserver,
1192+
table_schema=table_schema, location=location,
1193+
progress_bar=progress_bar, verbose=verbose)
11891194

11901195
@classmethod
11911196
def from_records(cls, data, index=None, exclude=None, columns=None,

Diff for: pandas/io/gbq.py

+48-38
Original file line numberDiff line numberDiff line change
@@ -22,34 +22,26 @@ def _try_import():
2222

2323

2424
def read_gbq(query, project_id=None, index_col=None, col_order=None,
25-
reauth=False, verbose=None, private_key=None, dialect='legacy',
26-
**kwargs):
25+
reauth=False, private_key=None, auth_local_webserver=False,
26+
dialect='legacy', location=None, configuration=None,
27+
verbose=None):
2728
"""
2829
Load data from Google BigQuery.
2930
3031
This function requires the `pandas-gbq package
3132
<https://pandas-gbq.readthedocs.io>`__.
3233
33-
Authentication to the Google BigQuery service is via OAuth 2.0.
34-
35-
- If "private_key" is not provided:
36-
37-
By default "application default credentials" are used.
38-
39-
If default application credentials are not found or are restrictive,
40-
user account credentials are used. In this case, you will be asked to
41-
grant permissions for product name 'pandas GBQ'.
42-
43-
- If "private_key" is provided:
44-
45-
Service account credentials will be used to authenticate.
34+
See the `How to authenticate with Google BigQuery
35+
<https://pandas-gbq.readthedocs.io/en/latest/howto/authentication.html>`__
36+
guide for authentication instructions.
4637
4738
Parameters
4839
----------
4940
query : str
5041
SQL-Like Query to return data values.
51-
project_id : str
52-
Google BigQuery Account project ID.
42+
project_id : str, optional
43+
Google BigQuery Account project ID. Optional when available from
44+
the environment.
5345
index_col : str, optional
5446
Name of result column to use for index in results DataFrame.
5547
col_order : list(str), optional
@@ -62,6 +54,16 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
6254
Service account private key in JSON format. Can be file path
6355
or string contents. This is useful for remote server
6456
authentication (eg. Jupyter/IPython notebook on remote host).
57+
auth_local_webserver : boolean, default False
58+
Use the `local webserver flow`_ instead of the `console flow`_
59+
when getting user credentials.
60+
61+
.. _local webserver flow:
62+
http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server
63+
.. _console flow:
64+
http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console
65+
66+
*New in version 0.2.0 of pandas-gbq*.
6567
dialect : str, default 'legacy'
6668
SQL syntax dialect to use. Value can be one of:
6769
@@ -74,19 +76,26 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
7476
compliant with the SQL 2011 standard. For more information
7577
see `BigQuery Standard SQL Reference
7678
<https://cloud.google.com/bigquery/docs/reference/standard-sql/>`__.
77-
verbose : boolean, deprecated
78-
*Deprecated in Pandas-GBQ 0.4.0.* Use the `logging module
79-
to adjust verbosity instead
80-
<https://pandas-gbq.readthedocs.io/en/latest/intro.html#logging>`__.
81-
kwargs : dict
82-
Arbitrary keyword arguments.
83-
configuration (dict): query config parameters for job processing.
79+
location : str, optional
80+
Location where the query job should run. See the `BigQuery locations
81+
documentation
82+
<https://cloud.google.com/bigquery/docs/dataset-locations>`__ for a
83+
list of available locations. The location must match that of any
84+
datasets used in the query.
85+
86+
*New in version 0.5.0 of pandas-gbq*.
87+
configuration : dict, optional
88+
Query config parameters for job processing.
8489
For example:
8590
8691
configuration = {'query': {'useQueryCache': False}}
8792
88-
For more information see `BigQuery SQL Reference
89-
<https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query>`__
93+
For more information see `BigQuery REST API Reference
94+
<https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query>`__.
95+
verbose : None, deprecated
96+
Deprecated in Pandas-GBQ 0.4.0. Use the `logging module
97+
to adjust verbosity instead
98+
<https://pandas-gbq.readthedocs.io/en/latest/intro.html#logging>`__.
9099
91100
Returns
92101
-------
@@ -100,20 +109,21 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
100109
"""
101110
pandas_gbq = _try_import()
102111
return pandas_gbq.read_gbq(
103-
query, project_id=project_id,
104-
index_col=index_col, col_order=col_order,
105-
reauth=reauth, verbose=verbose,
106-
private_key=private_key,
107-
dialect=dialect,
108-
**kwargs)
112+
query, project_id=project_id, index_col=index_col,
113+
col_order=col_order, reauth=reauth, verbose=verbose,
114+
private_key=private_key, auth_local_webserver=auth_local_webserver,
115+
dialect=dialect, location=location, configuration=configuration)
109116

110117

111-
def to_gbq(dataframe, destination_table, project_id, chunksize=None,
118+
def to_gbq(dataframe, destination_table, project_id=None, chunksize=None,
112119
verbose=None, reauth=False, if_exists='fail', private_key=None,
113-
auth_local_webserver=False, table_schema=None):
120+
auth_local_webserver=False, table_schema=None, location=None,
121+
progress_bar=True):
114122
pandas_gbq = _try_import()
115123
return pandas_gbq.to_gbq(
116-
dataframe, destination_table, project_id, chunksize=chunksize,
117-
verbose=verbose, reauth=reauth, if_exists=if_exists,
118-
private_key=private_key, auth_local_webserver=auth_local_webserver,
119-
table_schema=table_schema)
124+
dataframe, destination_table, project_id=project_id,
125+
chunksize=chunksize, verbose=verbose, reauth=reauth,
126+
if_exists=if_exists, private_key=private_key,
127+
auth_local_webserver=auth_local_webserver,
128+
table_schema=table_schema, location=location,
129+
progress_bar=progress_bar)

Diff for: pandas/tests/io/test_gbq.py

+7-8
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from datetime import datetime
33
import pytz
44
import platform
5-
from time import sleep
65
import os
76

87
import numpy as np
@@ -48,16 +47,18 @@ def _in_travis_environment():
4847
def _get_project_id():
4948
if _in_travis_environment():
5049
return os.environ.get('GBQ_PROJECT_ID')
51-
else:
52-
return PROJECT_ID
50+
return PROJECT_ID or os.environ.get('GBQ_PROJECT_ID')
5351

5452

5553
def _get_private_key_path():
5654
if _in_travis_environment():
5755
return os.path.join(*[os.environ.get('TRAVIS_BUILD_DIR'), 'ci',
5856
'travis_gbq.json'])
59-
else:
60-
return PRIVATE_KEY_JSON_PATH
57+
58+
private_key_path = PRIVATE_KEY_JSON_PATH
59+
if not private_key_path:
60+
private_key_path = os.environ.get('GBQ_GOOGLE_APPLICATION_CREDENTIALS')
61+
return private_key_path
6162

6263

6364
def clean_gbq_environment(private_key=None):
@@ -123,11 +124,9 @@ def test_roundtrip(self):
123124
test_size = 20001
124125
df = make_mixed_dataframe_v2(test_size)
125126

126-
df.to_gbq(destination_table, _get_project_id(), chunksize=10000,
127+
df.to_gbq(destination_table, _get_project_id(), chunksize=None,
127128
private_key=_get_private_key_path())
128129

129-
sleep(30) # <- Curses Google!!!
130-
131130
result = pd.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}"
132131
.format(destination_table),
133132
project_id=_get_project_id(),

0 commit comments

Comments
 (0)