diff --git a/notion/client.py b/notion/client.py index 8b0ab18..32bd363 100644 --- a/notion/client.py +++ b/notion/client.py @@ -2,12 +2,15 @@ import json import re import uuid +import time +import os -from requests import Session, HTTPError +from requests import Session, HTTPError, get from requests.cookies import cookiejar_from_dict from urllib.parse import urljoin from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry +from zipfile import ZipFile from .block import Block, BLOCK_TYPES from .collection import ( @@ -168,6 +171,92 @@ def refresh_collection_rows(self, collection_id): row_ids = [row.id for row in self.get_collection(collection_id).get_rows()] self._store.set_collection_rows(collection_id, row_ids) + def _get_task_id(self, response): + """ + When you export a file, notion creates a task to make the file with the 'enqueueTask' endpoint. + Then another method looks at the task ID and returns the file when the task finishes. + So, we need to save the taskId into a variable. This is a helper function to do that. + """ + return response.json()['taskId'] + + # Source from https://requests.readthedocs.io/en/master/user/quickstart/#raw-response-content + def _download_url(self, url, save_path, chunk_size=128): + """ + Downloads the zip file and saves it to a file. + url - string of the url from which to download. + save_path - string of the file name to output the zip file into. + chunk_size = size of the chunk. This is adjustable. See the documentation for more info. + """ + r = get(url, stream=True) + with open(save_path, 'wb') as fd: + for chunk in r.iter_content(chunk_size=chunk_size): + fd.write(chunk) + + def _unzip_file(self, file, delete=True): + """ + Helper function to unzip the zipped download. + file - string of the zip file name + delete - delete the zip file or not. + """ + with ZipFile(file) as zipObj: + zipObj.extractall() + if delete: + os.remove(file) + + def download_block(self, block_id, export_type, event_name="exportBlock", recursive=False, time_zone="America/Chicago", locale="en"): + """ + block_id - id of the block. Should be a string. + export_type - Type of the output file. The options are 'markdown', 'pdf', 'html' + eventName - notion object you're exporting. I haven't seen anything other than exportBlock yet. + recursive - include sub pages or not. + time_zone - I don't know what values go here. I'm in the Chicago timezone (central) and this is what I saw in the request. + locale - self explanatory. + + TODO: If export_type are 'pdf' or 'html', there is another field in exportOptions called 'pdfFormat'. It should be set to "Letter". + This needs to be implemented. + TODO: Add support for downloading a list of blocks + TODO: Review this code. Does it suck? Error handling? This is version 0 of this method and my first open source contribution. + Give me some criticisms so I can improve as a programmer! + """ + tmp_zip = 'tmp.zip' + data = { + "task" : { + "eventName" : event_name, + "request" : { + "blockId" : block_id, + "recursive" : recursive, + "exportOptions" : { + "exportType" : export_type, + "timeZone" : time_zone, + "locale" : locale + } + } + } + } + + task_id = self.post("enqueueTask", data).json()['taskId'] + response = self.post("getTasks", {"taskIds" : [task_id]}) + + task = response.json() + + # This is a simple way to ensure that we're getting the data when it's ready. + while 'status' not in task['results'][0]: + time.sleep(0.1) + response = self.post('getTasks', {'taskIds' : [task_id]}) + task = response.json() + + while 'exportURL' not in task['results'][0]['status']: + time.sleep(0.1) + response = self.post('getTasks', {'taskIds' : [task_id]}) + task = response.json() + + url = task['results'][0]['status']['exportURL'] + + self._download_url(url, tmp_zip) + self._unzip_file(tmp_zip) + + + def post(self, endpoint, data): """ All API requests on Notion.so are done as POSTs (except the websocket communications).