Skip to content

Commit 944516d

Browse files
authored
Added parameters for batch_size and insertion_delay (#15)
Added parameters for batch_size and sleep + changed default batch_size
1 parent 1030640 commit 944516d

File tree

2 files changed

+24
-1
lines changed

2 files changed

+24
-1
lines changed

custom-recipes/googlesheets-append/recipe.json

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,24 @@
131131
"description": "",
132132
"type": "BOOLEAN",
133133
"defaultValue": false
134+
},
135+
{
136+
"name": "batch_size",
137+
"label": "Batch size",
138+
"description": "Number of rows inserted in the dataframe at once. The bigger, the less API calls and the quickest pipeline, but putting too big a value could lead to out of memory errors, especially if there is a high number of columns.",
139+
"type": "INT",
140+
"visibilityCondition": "model.show_advanced_parameters==true",
141+
"defaultValue": 200,
142+
"minI": 1
143+
},
144+
{
145+
"name": "insertion_delay",
146+
"label": "Insertion delay (in ms)",
147+
"description": "In milliseconds. Wait time between each API call to Google Sheets. If you are experiencing issues with API call limits, try increasing it to 10 or 20 ms, since there is a 60 write-per-minute limit on users (https://developers.google.com/sheets/api/limits). Warning: it will slow down your pipeline.",
148+
"type": "INT",
149+
"visibilityCondition": "model.show_advanced_parameters==true",
150+
"defaultValue": 0,
151+
"minI": 0
134152
}
135153
],
136154

custom-recipes/googlesheets-append/recipe.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from gspread.utils import rowcol_to_a1
77
from safe_logger import SafeLogger
88
from googlesheets_common import DSSConstants, extract_credentials, get_tab_ids
9+
from time import sleep
910
from googlesheets_append import append_rows
1011

1112

@@ -40,6 +41,8 @@
4041
write_mode = config.get("write_mode", "append")
4142
session = GoogleSheetsSession(credentials, credentials_type)
4243

44+
batch_size = config.get("batch_size", 200)
45+
insertion_delay = config.get("insertion_delay", 0)
4346

4447
# Load worksheet
4548
worksheet = session.get_spreadsheet(doc_id, tab_id)
@@ -80,7 +83,9 @@ def serializer_dss(obj):
8083
# write to spreadsheet by batch
8184
batch.append([serializer(v) for k, v in list(row.items())])
8285

83-
if len(batch) >= 50:
86+
if len(batch) >= batch_size:
87+
if insertion_delay > 0:
88+
sleep(0.01 * insertion_delay)
8489
worksheet.append_rows(batch, insert_format)
8590
batch = []
8691

0 commit comments

Comments
 (0)