ENH: Update to_gbq and read_gbq to pandas-gbq 0.5.0

tswast · tswast · commit 7330463aba3c · 2018-06-26T09:48:58.000-07:00
* Add link to Pandas-GBQ 0.5.0 in what's new. * Remove unnecessary sleep in GBQ tests. Closes googleapis/python-bigquery-pandas#177 Closes #21627
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -19,6 +19,11 @@ Other Enhancements
 - :func:`to_csv` now supports ``compression`` keyword when a file handle is passed. (:issue:`21227`)
 - :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with :class:`MultiIndex` (:issue:`21115`)
 - Added support for reading from Google Cloud Storage via the ``gcsfs`` library (:issue:`19454`)
+- :func:`to_gbq` and :func:`read_gbq` signature and documentation updated to
+  reflect changes from the `Pandas-GBQ library version 0.5.0
+  <https://pandas-gbq.readthedocs.io/en/latest/changelog.html#changelog-0-5-0>`__.
+  (:issue:`21627`)
+
 
 .. _whatsnew_0240.api_breaking:
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1102,37 +1102,27 @@ def to_dict(self, orient='dict', into=dict):
         else:
             raise ValueError("orient '{o}' not understood".format(o=orient))
 
-    def to_gbq(self, destination_table, project_id, chunksize=None,
-               verbose=None, reauth=False, if_exists='fail', private_key=None,
-               auth_local_webserver=False, table_schema=None):
+    def to_gbq(self, destination_table, project_id=None, chunksize=None,
+               reauth=False, if_exists='fail', private_key=None,
+               auth_local_webserver=False, table_schema=None, location=None,
+               progress_bar=True, verbose=None):
         """
         Write a DataFrame to a Google BigQuery table.
 
         This function requires the `pandas-gbq package
         <https://pandas-gbq.readthedocs.io>`__.
 
-        Authentication to the Google BigQuery service is via OAuth 2.0.
-
-        - If ``private_key`` is provided, the library loads the JSON service
-          account credentials and uses those to authenticate.
-
-        - If no ``private_key`` is provided, the library tries `application
-          default credentials`_.
-
-          .. _application default credentials:
-              https://cloud.google.com/docs/authentication/production#providing_credentials_to_your_application
-
-        - If application default credentials are not found or cannot be used
-          with BigQuery, the library authenticates with user account
-          credentials. In this case, you will be asked to grant permissions
-          for product name 'pandas GBQ'.
+        See the `How to authenticate with Google BigQuery
+        <https://pandas-gbq.readthedocs.io/en/latest/howto/authentication.html>`__
+        guide for authentication instructions.
 
         Parameters
         ----------
         destination_table : str
-            Name of table to be written, in the form 'dataset.tablename'.
-        project_id : str
-            Google BigQuery Account project ID.
+            Name of table to be written, in the form ``dataset.tablename``.
+        project_id : str, optional
+            Google BigQuery Account project ID. Optional when available from
+            the environment.
         chunksize : int, optional
             Number of rows to be inserted in each chunk from the dataframe.
             Set to ``None`` to load the whole dataframe at once.
@@ -1170,8 +1160,21 @@ def to_gbq(self, destination_table, project_id, chunksize=None,
             BigQuery API documentation on available names of a field.
 
             *New in version 0.3.1 of pandas-gbq*.
-        verbose : boolean, deprecated
-            *Deprecated in Pandas-GBQ 0.4.0.* Use the `logging module
+        location : str, optional
+            Location where the load job should run. See the `BigQuery locations
+            documentation
+            <https://cloud.google.com/bigquery/docs/dataset-locations>`__ for a
+            list of available locations. The location must match that of the
+            target dataset.
+
+            *New in version 0.5.0 of pandas-gbq*.
+        progress_bar : bool, default True
+            Use the library `tqdm` to show the progress bar for the upload,
+            chunk by chunk.
+
+            *New in version 0.5.0 of pandas-gbq*.
+        verbose : bool, deprecated
+            Deprecated in Pandas-GBQ 0.4.0. Use the `logging module
             to adjust verbosity instead
             <https://pandas-gbq.readthedocs.io/en/latest/intro.html#logging>`__.
 
@@ -1182,10 +1185,12 @@ def to_gbq(self, destination_table, project_id, chunksize=None,
         """
         from pandas.io import gbq
         return gbq.to_gbq(
-            self, destination_table, project_id, chunksize=chunksize,
-            verbose=verbose, reauth=reauth, if_exists=if_exists,
-            private_key=private_key, auth_local_webserver=auth_local_webserver,
-            table_schema=table_schema)
+            self, destination_table, project_id=project_id,
+            chunksize=chunksize, reauth=reauth,
+            if_exists=if_exists, private_key=private_key,
+            auth_local_webserver=auth_local_webserver,
+            table_schema=table_schema, location=location,
+            progress_bar=progress_bar, verbose=verbose)
 
     @classmethod
     def from_records(cls, data, index=None, exclude=None, columns=None,
diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py
@@ -22,34 +22,26 @@ def _try_import():
 
 
 def read_gbq(query, project_id=None, index_col=None, col_order=None,
-             reauth=False, verbose=None, private_key=None, dialect='legacy',
-             **kwargs):
+             reauth=False, private_key=None, auth_local_webserver=False,
+             dialect='legacy', location=None, configuration=None,
+             verbose=None):
     """
     Load data from Google BigQuery.
 
     This function requires the `pandas-gbq package
     <https://pandas-gbq.readthedocs.io>`__.
 
-    Authentication to the Google BigQuery service is via OAuth 2.0.
-
-    - If "private_key" is not provided:
-
-      By default "application default credentials" are used.
-
-      If default application credentials are not found or are restrictive,
-      user account credentials are used. In this case, you will be asked to
-      grant permissions for product name 'pandas GBQ'.
-
-    - If "private_key" is provided:
-
-      Service account credentials will be used to authenticate.
+    See the `How to authenticate with Google BigQuery
+    <https://pandas-gbq.readthedocs.io/en/latest/howto/authentication.html>`__
+    guide for authentication instructions.
 
     Parameters
     ----------
     query : str
         SQL-Like Query to return data values.
-    project_id : str
-        Google BigQuery Account project ID.
+    project_id : str, optional
+        Google BigQuery Account project ID. Optional when available from
+        the environment.
     index_col : str, optional
         Name of result column to use for index in results DataFrame.
     col_order : list(str), optional
@@ -62,6 +54,16 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
         Service account private key in JSON format. Can be file path
         or string contents. This is useful for remote server
         authentication (eg. Jupyter/IPython notebook on remote host).
+    auth_local_webserver : boolean, default False
+        Use the `local webserver flow`_ instead of the `console flow`_
+        when getting user credentials.
+
+        .. _local webserver flow:
+            http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server
+        .. _console flow:
+            http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console
+
+        *New in version 0.2.0 of pandas-gbq*.
     dialect : str, default 'legacy'
         SQL syntax dialect to use. Value can be one of:
 
@@ -74,19 +76,26 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
             compliant with the SQL 2011 standard. For more information
             see `BigQuery Standard SQL Reference
             <https://cloud.google.com/bigquery/docs/reference/standard-sql/>`__.
-    verbose : boolean, deprecated
-        *Deprecated in Pandas-GBQ 0.4.0.* Use the `logging module
-        to adjust verbosity instead
-        <https://pandas-gbq.readthedocs.io/en/latest/intro.html#logging>`__.
-    kwargs : dict
-        Arbitrary keyword arguments.
-        configuration (dict): query config parameters for job processing.
+    location : str, optional
+        Location where the query job should run. See the `BigQuery locations
+        documentation
+        <https://cloud.google.com/bigquery/docs/dataset-locations>`__ for a
+        list of available locations. The location must match that of any
+        datasets used in the query.
+
+        *New in version 0.5.0 of pandas-gbq*.
+    configuration : dict, optional
+        Query config parameters for job processing.
         For example:
 
             configuration = {'query': {'useQueryCache': False}}
 
-        For more information see `BigQuery SQL Reference
-        <https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query>`__
+        For more information see `BigQuery REST API Reference
+        <https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query>`__.
+    verbose : None, deprecated
+        Deprecated in Pandas-GBQ 0.4.0. Use the `logging module
+        to adjust verbosity instead
+        <https://pandas-gbq.readthedocs.io/en/latest/intro.html#logging>`__.
 
     Returns
     -------
@@ -100,20 +109,21 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
     """
     pandas_gbq = _try_import()
     return pandas_gbq.read_gbq(
-        query, project_id=project_id,
-        index_col=index_col, col_order=col_order,
-        reauth=reauth, verbose=verbose,
-        private_key=private_key,
-        dialect=dialect,
-        **kwargs)
+        query, project_id=project_id, index_col=index_col,
+        col_order=col_order, reauth=reauth, verbose=verbose,
+        private_key=private_key, auth_local_webserver=auth_local_webserver,
+        dialect=dialect, location=location, configuration=configuration)
 
 
-def to_gbq(dataframe, destination_table, project_id, chunksize=None,
+def to_gbq(dataframe, destination_table, project_id=None, chunksize=None,
            verbose=None, reauth=False, if_exists='fail', private_key=None,
-           auth_local_webserver=False, table_schema=None):
+           auth_local_webserver=False, table_schema=None, location=None,
+           progress_bar=True):
     pandas_gbq = _try_import()
     return pandas_gbq.to_gbq(
-        dataframe, destination_table, project_id, chunksize=chunksize,
-        verbose=verbose, reauth=reauth, if_exists=if_exists,
-        private_key=private_key, auth_local_webserver=auth_local_webserver,
-        table_schema=table_schema)
+        dataframe, destination_table, project_id=project_id,
+        chunksize=chunksize, verbose=verbose, reauth=reauth,
+        if_exists=if_exists, private_key=private_key,
+        auth_local_webserver=auth_local_webserver,
+        table_schema=table_schema, location=location,
+        progress_bar=progress_bar)
diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py
@@ -2,7 +2,6 @@
 from datetime import datetime
 import pytz
 import platform
-from time import sleep
 import os
 
 import numpy as np
@@ -48,16 +47,18 @@ def _in_travis_environment():
 def _get_project_id():
     if _in_travis_environment():
         return os.environ.get('GBQ_PROJECT_ID')
-    else:
-        return PROJECT_ID
+    return PROJECT_ID or os.environ.get('GBQ_PROJECT_ID')
 
 
 def _get_private_key_path():
     if _in_travis_environment():
         return os.path.join(*[os.environ.get('TRAVIS_BUILD_DIR'), 'ci',
                               'travis_gbq.json'])
-    else:
-        return PRIVATE_KEY_JSON_PATH
+
+    private_key_path = PRIVATE_KEY_JSON_PATH
+    if not private_key_path:
+        private_key_path = os.environ.get('GBQ_GOOGLE_APPLICATION_CREDENTIALS')
+    return private_key_path
 
 
 def clean_gbq_environment(private_key=None):
@@ -123,11 +124,9 @@ def test_roundtrip(self):
         test_size = 20001
         df = make_mixed_dataframe_v2(test_size)
 
-        df.to_gbq(destination_table, _get_project_id(), chunksize=10000,
+        df.to_gbq(destination_table, _get_project_id(), chunksize=None,
                   private_key=_get_private_key_path())
 
-        sleep(30)  # <- Curses Google!!!
-
         result = pd.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}"
                              .format(destination_table),
                              project_id=_get_project_id(),