diff --git a/custom-recipes/googlesheets-append/recipe.json b/custom-recipes/googlesheets-append/recipe.json index 7610e6e..83197a8 100644 --- a/custom-recipes/googlesheets-append/recipe.json +++ b/custom-recipes/googlesheets-append/recipe.json @@ -131,6 +131,24 @@ "description": "", "type": "BOOLEAN", "defaultValue": false + }, + { + "name": "batch_size", + "label": "Batch size", + "description": "Number of rows inserted in the dataframe at once. The bigger, the less API calls and the quickest pipeline, but putting too big a value could lead to out of memory errors, especially if there is a high number of columns.", + "type": "INT", + "visibilityCondition": "model.show_advanced_parameters==true", + "defaultValue": 200, + "minI": 1 + }, + { + "name": "insertion_delay", + "label": "Insertion delay (in ms)", + "description": "In milliseconds. Wait time between each API call to Google Sheets. If you are experiencing issues with API call limits, try increasing it to 10 or 20 ms, since there is a 60 write-per-minute limit on users (https://developers.google.com/sheets/api/limits). Warning: it will slow down your pipeline.", + "type": "INT", + "visibilityCondition": "model.show_advanced_parameters==true", + "defaultValue": 0, + "minI": 0 } ], diff --git a/custom-recipes/googlesheets-append/recipe.py b/custom-recipes/googlesheets-append/recipe.py index b072c09..59dc4ab 100644 --- a/custom-recipes/googlesheets-append/recipe.py +++ b/custom-recipes/googlesheets-append/recipe.py @@ -6,6 +6,7 @@ from gspread.utils import rowcol_to_a1 from safe_logger import SafeLogger from googlesheets_common import DSSConstants, extract_credentials, get_tab_ids +from time import sleep from googlesheets_append import append_rows @@ -40,6 +41,8 @@ write_mode = config.get("write_mode", "append") session = GoogleSheetsSession(credentials, credentials_type) +batch_size = config.get("batch_size", 200) +insertion_delay = config.get("insertion_delay", 0) # Load worksheet worksheet = session.get_spreadsheet(doc_id, tab_id) @@ -80,7 +83,9 @@ def serializer_dss(obj): # write to spreadsheet by batch batch.append([serializer(v) for k, v in list(row.items())]) - if len(batch) >= 50: + if len(batch) >= batch_size: + if insertion_delay > 0: + sleep(0.01 * insertion_delay) worksheet.append_rows(batch, insert_format) batch = []