Skip to content

Commit 7c3dbaf

Browse files
cbandytswast
authored andcommitted
Allow newlines in data passed to to_gbq() (#230)
* Allow newlines in data passed to to_gbq() * Add version header to changelog
1 parent 5d0346a commit 7c3dbaf

File tree

4 files changed

+49
-0
lines changed

4 files changed

+49
-0
lines changed

docs/source/changelog.rst

+7
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
Changelog
22
=========
33

4+
.. _changelog-0.7.1:
5+
6+
0.7.1 / unreleased
7+
--------------------
8+
9+
- Allow newlines in data passed to ``to_gbq``. (:issue:`180`)
10+
411
.. _changelog-0.7.0:
512

613
0.7.0 / 2018-10-19

pandas_gbq/load.py

+1
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ def load_chunks(
6161
job_config = bigquery.LoadJobConfig()
6262
job_config.write_disposition = "WRITE_APPEND"
6363
job_config.source_format = "CSV"
64+
job_config.allow_quoted_newlines = True
6465

6566
if schema is None:
6667
schema = pandas_gbq.schema.generate_bq_schema(dataframe)

tests/system/test_gbq.py

+29
Original file line numberDiff line numberDiff line change
@@ -1167,6 +1167,35 @@ def test_upload_mixed_float_and_int(self, project_id):
11671167

11681168
assert len(result_df) == test_size
11691169

1170+
def test_upload_data_with_newlines(self, project_id):
1171+
test_id = "data_with_newlines"
1172+
test_size = 2
1173+
df = DataFrame({"s": ["abcd", "ef\ngh"]})
1174+
1175+
gbq.to_gbq(
1176+
df,
1177+
self.destination_table + test_id,
1178+
project_id=project_id,
1179+
private_key=self.credentials,
1180+
)
1181+
1182+
result_df = gbq.read_gbq(
1183+
"SELECT * FROM {0}".format(self.destination_table + test_id),
1184+
project_id=project_id,
1185+
private_key=self.credentials,
1186+
dialect="legacy",
1187+
)
1188+
1189+
assert len(result_df) == test_size
1190+
1191+
if sys.version_info.major < 3:
1192+
pytest.skip(msg="Unicode comparison in Py2 not working")
1193+
1194+
result = result_df["s"].sort_values()
1195+
expected = df["s"].sort_values()
1196+
1197+
tm.assert_numpy_array_equal(expected.values, result.values)
1198+
11701199
def test_upload_data_flexible_column_order(self, project_id):
11711200
test_id = "13"
11721201
test_size = 10

tests/unit/test_load.py

+12
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,18 @@ def test_encode_chunk_with_floats():
3737
assert "1.05153" in csv_string
3838

3939

40+
def test_encode_chunk_with_newlines():
41+
"""See: https://github.com/pydata/pandas-gbq/issues/180
42+
"""
43+
df = pandas.DataFrame({"s": ["abcd", "ef\ngh", "ij\r\nkl"]})
44+
csv_buffer = load.encode_chunk(df)
45+
csv_bytes = csv_buffer.read()
46+
csv_string = csv_bytes.decode("utf-8")
47+
assert "abcd" in csv_string
48+
assert '"ef\ngh"' in csv_string
49+
assert '"ij\r\nkl"' in csv_string
50+
51+
4052
def test_encode_chunks_splits_dataframe():
4153
df = pandas.DataFrame(numpy.random.randn(6, 4), index=range(6))
4254
chunks = list(load.encode_chunks(df, chunksize=2))

0 commit comments

Comments
 (0)