From a98cb34bdaad3c495613b703c028072d62cbcfcf Mon Sep 17 00:00:00 2001 From: Ian Stride Date: Wed, 13 Mar 2024 23:33:25 +0000 Subject: [PATCH] Save spreadsheets in JSON format (#122) --- src/rpft/cli.py | 111 ++++++++++++++------- src/rpft/converters.py | 27 +++++ src/rpft/parsers/sheets.py | 33 ++++-- tests/input/example1/content_index.json | 127 ++++++++++++++++++++++++ tests/mocks.py | 6 +- tests/test_converters.py | 60 +++++++++++ tests/test_sheet_reader.py | 9 +- 7 files changed, 329 insertions(+), 44 deletions(-) create mode 100644 tests/input/example1/content_index.json create mode 100644 tests/test_converters.py diff --git a/src/rpft/cli.py b/src/rpft/cli.py index 66fffef..9a53d70 100644 --- a/src/rpft/cli.py +++ b/src/rpft/cli.py @@ -1,7 +1,7 @@ import argparse import json -from rpft.converters import create_flows +from rpft import converters from rpft.logger.logger import initialize_main_logger LOGGER = initialize_main_logger() @@ -9,7 +9,11 @@ def main(): args = create_parser().parse_args() - flows = create_flows( + args.func(args) + + +def create_flows(args): + flows = converters.create_flows( args.input, None, args.format, @@ -21,62 +25,101 @@ def main(): json.dump(flows, export, indent=4) +def convert_to_json(args): + content = converters.convert_to_json(args.input, args.format) + + with open(args.output, "w") as export: + export.write(content) + + def create_parser(): parser = argparse.ArgumentParser( - description=( - "Generate RapidPro flows JSON from spreadsheets\n" - "\n" - "Example usage:\n" - "create_flows --output=flows.json --format=csv --datamodels=example.models" - " sheet1.csv sheet2.csv" + description=("create RapidPro flows JSON from spreadsheets"), + ) + sub = parser.add_subparsers( + help="run {subcommand} --help for further information", + required=True, + title="subcommands", + ) + + _add_create_command(sub) + _add_convert_command(sub) + + return parser + + +def _add_create_command(sub): + parser = sub.add_parser( + "create", + aliases=["create_flows"], + help="create RapidPro flows from spreadsheets", + ) + + parser.set_defaults(func=create_flows) + parser.add_argument( + "--datamodels", + help=( + "name of the module defining user data models underlying the data sheets," + " e.g. if the model definitions reside in" + " ./myfolder/mysubfolder/mymodelsfile.py, then this argument should be" + " myfolder.mysubfolder.mymodelsfile" ), - formatter_class=argparse.RawTextHelpFormatter, ) parser.add_argument( - "command", - choices=["create_flows"], + "-f", + "--format", + choices=["csv", "google_sheets", "json", "xlsx"], + help="input sheet format", + required=True, + ) + parser.add_argument( + "-o", + "--output", + help="output JSON filename", + required=True, + ) + parser.add_argument( + "--tags", help=( - "create_flows: create flows from spreadsheets\n" - "flow_to_sheet: create spreadsheets from flows (not implemented)" + "tags to filter the content index sheet: a sequence of lists, with each " + "list starting with an integer (tag position) followed by tags to include " + "for this position, e.g. '1 foo bar 2 baz', means only include rows if " + "tags:1 is empty, foo or bar, and tags:2 is empty or baz" ), + nargs="*", ) parser.add_argument( "input", - nargs="+", help=( - "CSV/XLSX: path to files on local file system\n" - "Google Sheets: sheet ID i.e." - " https://docs.google.com/spreadsheets/d/[ID]/edit" + "paths to XLSX or JSON files, or directories containing CSV files, or" + " Google Sheets IDs i.e. from the URL; inputs should be of the same format" ), + nargs="+", ) - parser.add_argument("-o", "--output", required=True, help="Output JSON filename") + + +def _add_convert_command(sub): + parser = sub.add_parser("convert", help="save input spreadsheets as JSON") + + parser.set_defaults(func=convert_to_json) parser.add_argument( "-f", "--format", + choices=["csv", "google_sheets", "json", "xlsx"], + help="input sheet format", required=True, - choices=["csv", "google_sheets", "xlsx"], - help="Input sheet format", ) parser.add_argument( - "--datamodels", + "input", help=( - "Module name of the module defining user data models, i.e. models " - "underlying the data sheets. E.g. if the model definitions reside in " - "./myfolder/mysubfolder/mymodelsfile.py, then this argument should be " - "myfolder.mysubfolder.mymodelsfile" + "path to XLSX or JSON file, or directory containing CSV files, or Google" + " Sheets ID i.e. from the URL" ), ) parser.add_argument( - "--tags", - nargs="*", - help=( - "Tags to filter the content index sheet. A sequence of lists, with each " - "list starting with an integer (tag position) followed by tags to include " - "for this position. Example: 1 foo bar 2 baz means: only include rows if " - "tags:1 is empty, foo or bar, and tags:2 is empty or baz" - ), + "output", + help=("path to output JSON file"), ) - return parser if __name__ == "__main__": diff --git a/src/rpft/converters.py b/src/rpft/converters.py index 268b68f..dd7131f 100644 --- a/src/rpft/converters.py +++ b/src/rpft/converters.py @@ -5,8 +5,10 @@ from rpft.parsers.creation.contentindexparser import ContentIndexParser from rpft.parsers.creation.tagmatcher import TagMatcher from rpft.parsers.sheets import ( + AbstractSheetReader, CSVSheetReader, GoogleSheetReader, + JSONSheetReader, XLSXSheetReader, CompositeSheetReader, ) @@ -39,11 +41,25 @@ def create_flows(input_files, output_file, sheet_format, data_models=None, tags= return flows +def convert_to_json(input_file, sheet_format): + """ + Convert source spreadsheet(s) into json. + + :param input_file: source spreadsheet to convert + :param sheet_format: format of the input spreadsheet + :returns: content of the input file converted to json. + """ + + return to_json(create_sheet_reader(sheet_format, input_file)) + + def create_sheet_reader(sheet_format, input_file): if sheet_format == "csv": sheet_reader = CSVSheetReader(input_file) elif sheet_format == "xlsx": sheet_reader = XLSXSheetReader(input_file) + elif sheet_format == "json": + sheet_reader = JSONSheetReader(input_file) elif sheet_format == "google_sheets": sheet_reader = GoogleSheetReader(input_file) else: @@ -73,6 +89,17 @@ def sheet_to_csv(path, sheet_id): csv_file.write(sheet.table.export("csv")) +def to_json(reader: AbstractSheetReader) -> str: + book = { + "meta": { + "version": "0.1.0", + }, + "sheets": {name: sheet.table.dict for name, sheet in reader.sheets.items()}, + } + + return json.dumps(book, ensure_ascii=False, indent=2) + + def prepare_dir(path): directory = Path(path) diff --git a/src/rpft/parsers/sheets.py b/src/rpft/parsers/sheets.py index 4e6960b..4b78106 100644 --- a/src/rpft/parsers/sheets.py +++ b/src/rpft/parsers/sheets.py @@ -2,7 +2,7 @@ import os from abc import ABC from pathlib import Path -from typing import List +from typing import List, Mapping import tablib from google.auth.transport.requests import Request @@ -24,6 +24,10 @@ def __init__(self, reader, name, table): class AbstractSheetReader(ABC): + @property + def sheets(self) -> Mapping[str, Sheet]: + return self._sheets + def get_sheet(self, name) -> Sheet: return self.sheets.get(name) @@ -34,18 +38,29 @@ def get_sheets_by_name(self, name) -> List[Sheet]: class CSVSheetReader(AbstractSheetReader): def __init__(self, path): self.name = path - self.sheets = { + self._sheets = { f.stem: Sheet(reader=self, name=f.stem, table=load_csv(f)) for f in Path(path).glob("*.csv") } +class JSONSheetReader(AbstractSheetReader): + def __init__(self, filename): + self.name = filename + data = load_json(filename) + self._sheets = {} + for name, content in data["sheets"].items(): + table = tablib.Dataset() + table.dict = content + self._sheets[name] = Sheet(reader=self, name=name, table=table) + + class XLSXSheetReader(AbstractSheetReader): def __init__(self, filename): self.name = filename with open(filename, "rb") as table_data: data = tablib.Databook().load(table_data.read(), "xlsx") - self.sheets = {} + self._sheets = {} for sheet in data.sheets(): self.sheets[sheet.title] = Sheet( reader=self, @@ -98,16 +113,16 @@ def __init__(self, spreadsheet_id): .execute() ) - self.sheets = {} + self._sheets = {} for sheet in result.get("valueRanges", []): name = sheet.get("range", "").split("!")[0] if name.startswith("'") and name.endswith("'"): name = name[1:-1] content = sheet.get("values", []) - if name in self.sheets: + if name in self._sheets: raise ValueError(f"Warning: Duplicate sheet name: {name}") else: - self.sheets[name] = Sheet( + self._sheets[name] = Sheet( reader=self, name=name, table=self._table_from_content(content), @@ -182,5 +197,11 @@ def load_csv(path): return tablib.import_set(csv, format="csv") +def load_json(path): + with open(path, mode="r", encoding="utf-8") as fjson: + data = json.load(fjson) + return data + + def pad(row, n): return row + ([""] * (n - len(row))) diff --git a/tests/input/example1/content_index.json b/tests/input/example1/content_index.json new file mode 100644 index 0000000..ed2ce4b --- /dev/null +++ b/tests/input/example1/content_index.json @@ -0,0 +1,127 @@ +{ + "meta": { + "version": "0.1.0" + }, + "sheets": { + "content_index": [ + { + "type": "create_flow", + "sheet_name": "my_template", + "data_sheet": "nesteddata", + "data_row_id": "row1", + "new_name": "", + "data_model": "", + "status": "", + "group": "", + "tags.1": "advanced", + "tags.2": "type1" + }, + { + "type": "create_flow", + "sheet_name": "my_template", + "data_sheet": "nesteddata", + "data_row_id": "row2", + "new_name": "", + "data_model": "", + "status": "", + "group": "", + "tags.1": "advanced", + "tags.2": "type2" + }, + { + "type": "create_flow", + "sheet_name": "my_basic_flow", + "data_sheet": "", + "data_row_id": "", + "new_name": "", + "data_model": "", + "status": "", + "group": "", + "tags.1": "basic", + "tags.2": "" + }, + { + "type": "data_sheet", + "sheet_name": "nesteddata", + "data_sheet": "", + "data_row_id": "", + "new_name": "", + "data_model": "NestedRowModel", + "status": "", + "group": "", + "tags.1": "", + "tags.2": "" + }, + { + "type": "create_campaign", + "sheet_name": "my_campaign", + "data_sheet": "", + "data_row_id": "", + "new_name": "", + "data_model": "", + "status": "", + "group": "My Group", + "tags.1": "basic", + "tags.2": "" + } + ], + "my_basic_flow": [ + { + "row_id": "", + "type": "send_message", + "from": "start", + "message_text": "Some text" + } + ], + "my_campaign": [ + { + "offset": "15", + "unit": "H", + "event_type": "F", + "delivery_hour": "", + "message": "", + "relative_to": "Last Seen On", + "start_mode": "I", + "flow": "my_basic_flow" + }, + { + "offset": "120", + "unit": "D", + "event_type": "M", + "delivery_hour": "12", + "message": "Hello!", + "relative_to": "Created On", + "start_mode": "S", + "flow": "" + } + ], + "my_template": [ + { + "row_id": "", + "type": "send_message", + "from": "start", + "message_text": "{{value1}}" + }, + { + "row_id": "", + "type": "send_message", + "from": "", + "message_text": "{{custom_field.happy}} and {{custom_field.sad}}" + } + ], + "nesteddata": [ + { + "ID": "row1", + "value1": "Value1", + "custom_field.happy": "Happy1", + "custom_field.sad": "Sad1" + }, + { + "ID": "row2", + "value1": "Value2", + "custom_field.happy": "Happy2", + "custom_field.sad": "Sad2" + } + ] + } +} \ No newline at end of file diff --git a/tests/mocks.py b/tests/mocks.py index 1a4a32d..1bd629a 100644 --- a/tests/mocks.py +++ b/tests/mocks.py @@ -52,17 +52,17 @@ def parse_next_row(self, omit_templating=False, return_index=False): class MockSheetReader(AbstractSheetReader): def __init__(self, main_sheet_data=None, sheet_data_dict={}, name="mock"): self.name = name - self.sheets = {} + self._sheets = {} if main_sheet_data: - self.sheets["content_index"] = Sheet( + self._sheets["content_index"] = Sheet( reader=self, name="content_index", table=tablib.import_set(main_sheet_data, format="csv"), ) for name, content in sheet_data_dict.items(): - self.sheets[name] = Sheet( + self._sheets[name] = Sheet( reader=self, name=name, table=tablib.import_set(content, format="csv"), diff --git a/tests/test_converters.py b/tests/test_converters.py new file mode 100644 index 0000000..44527c9 --- /dev/null +++ b/tests/test_converters.py @@ -0,0 +1,60 @@ +import json +from unittest import TestCase + +from tablib import Dataset + +from rpft.converters import to_json +from rpft.parsers.sheets import AbstractSheetReader, Sheet + + +class TestReaderToJson(TestCase): + def test_something(self): + reader = MockSheetReader( + { + "sheet1": Sheet( + reader=None, + name="sheet1", + table=Dataset( + ("row1_col1", "row1_col2", "row1_col3"), + headers=["col1", "col2", "col3"], + ), + ), + "sheet2": Sheet( + reader=None, + name="sheet2", + table=Dataset( + ("row1_col1", "row1_col2", "row1_col3"), + headers=["col1", "col2", "col3"], + ), + ), + } + ) + self.assertDictEqual( + json.loads(to_json(reader)), + { + "meta": { + "version": "0.1.0", + }, + "sheets": { + "sheet1": [ + { + "col1": "row1_col1", + "col2": "row1_col2", + "col3": "row1_col3", + }, + ], + "sheet2": [ + { + "col1": "row1_col1", + "col2": "row1_col2", + "col3": "row1_col3", + }, + ], + }, + }, + ) + + +class MockSheetReader(AbstractSheetReader): + def __init__(self, sheets): + self._sheets = sheets diff --git a/tests/test_sheet_reader.py b/tests/test_sheet_reader.py index 86d7278..d322b96 100644 --- a/tests/test_sheet_reader.py +++ b/tests/test_sheet_reader.py @@ -1,6 +1,6 @@ from unittest import TestCase -from rpft.parsers.sheets import CSVSheetReader, Sheet, XLSXSheetReader +from rpft.parsers.sheets import CSVSheetReader, Sheet, XLSXSheetReader, JSONSheetReader from tests import TESTS_ROOT @@ -35,3 +35,10 @@ def setUp(self): filename = str(TESTS_ROOT / "input/example1/content_index.xlsx") self.reader = XLSXSheetReader(filename=filename) self.expected_reader_name = filename + + +class TestJsonSheetReader(Base.SheetReaderTestCase): + def setUp(self): + filename = str(TESTS_ROOT / "input/example1/content_index.json") + self.reader = JSONSheetReader(filename=filename) + self.expected_reader_name = filename