Skip to content

Commit

Permalink
Save spreadsheets in JSON format (#122)
Browse files Browse the repository at this point in the history
  • Loading branch information
istride authored Mar 13, 2024
1 parent 5b77023 commit a98cb34
Show file tree
Hide file tree
Showing 7 changed files with 329 additions and 44 deletions.
111 changes: 77 additions & 34 deletions src/rpft/cli.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
import argparse
import json

from rpft.converters import create_flows
from rpft import converters
from rpft.logger.logger import initialize_main_logger

LOGGER = initialize_main_logger()


def main():
args = create_parser().parse_args()
flows = create_flows(
args.func(args)


def create_flows(args):
flows = converters.create_flows(
args.input,
None,
args.format,
Expand All @@ -21,62 +25,101 @@ def main():
json.dump(flows, export, indent=4)


def convert_to_json(args):
content = converters.convert_to_json(args.input, args.format)

with open(args.output, "w") as export:
export.write(content)


def create_parser():
parser = argparse.ArgumentParser(
description=(
"Generate RapidPro flows JSON from spreadsheets\n"
"\n"
"Example usage:\n"
"create_flows --output=flows.json --format=csv --datamodels=example.models"
" sheet1.csv sheet2.csv"
description=("create RapidPro flows JSON from spreadsheets"),
)
sub = parser.add_subparsers(
help="run {subcommand} --help for further information",
required=True,
title="subcommands",
)

_add_create_command(sub)
_add_convert_command(sub)

return parser


def _add_create_command(sub):
parser = sub.add_parser(
"create",
aliases=["create_flows"],
help="create RapidPro flows from spreadsheets",
)

parser.set_defaults(func=create_flows)
parser.add_argument(
"--datamodels",
help=(
"name of the module defining user data models underlying the data sheets,"
" e.g. if the model definitions reside in"
" ./myfolder/mysubfolder/mymodelsfile.py, then this argument should be"
" myfolder.mysubfolder.mymodelsfile"
),
formatter_class=argparse.RawTextHelpFormatter,
)
parser.add_argument(
"command",
choices=["create_flows"],
"-f",
"--format",
choices=["csv", "google_sheets", "json", "xlsx"],
help="input sheet format",
required=True,
)
parser.add_argument(
"-o",
"--output",
help="output JSON filename",
required=True,
)
parser.add_argument(
"--tags",
help=(
"create_flows: create flows from spreadsheets\n"
"flow_to_sheet: create spreadsheets from flows (not implemented)"
"tags to filter the content index sheet: a sequence of lists, with each "
"list starting with an integer (tag position) followed by tags to include "
"for this position, e.g. '1 foo bar 2 baz', means only include rows if "
"tags:1 is empty, foo or bar, and tags:2 is empty or baz"
),
nargs="*",
)
parser.add_argument(
"input",
nargs="+",
help=(
"CSV/XLSX: path to files on local file system\n"
"Google Sheets: sheet ID i.e."
" https://docs.google.com/spreadsheets/d/[ID]/edit"
"paths to XLSX or JSON files, or directories containing CSV files, or"
" Google Sheets IDs i.e. from the URL; inputs should be of the same format"
),
nargs="+",
)
parser.add_argument("-o", "--output", required=True, help="Output JSON filename")


def _add_convert_command(sub):
parser = sub.add_parser("convert", help="save input spreadsheets as JSON")

parser.set_defaults(func=convert_to_json)
parser.add_argument(
"-f",
"--format",
choices=["csv", "google_sheets", "json", "xlsx"],
help="input sheet format",
required=True,
choices=["csv", "google_sheets", "xlsx"],
help="Input sheet format",
)
parser.add_argument(
"--datamodels",
"input",
help=(
"Module name of the module defining user data models, i.e. models "
"underlying the data sheets. E.g. if the model definitions reside in "
"./myfolder/mysubfolder/mymodelsfile.py, then this argument should be "
"myfolder.mysubfolder.mymodelsfile"
"path to XLSX or JSON file, or directory containing CSV files, or Google"
" Sheets ID i.e. from the URL"
),
)
parser.add_argument(
"--tags",
nargs="*",
help=(
"Tags to filter the content index sheet. A sequence of lists, with each "
"list starting with an integer (tag position) followed by tags to include "
"for this position. Example: 1 foo bar 2 baz means: only include rows if "
"tags:1 is empty, foo or bar, and tags:2 is empty or baz"
),
"output",
help=("path to output JSON file"),
)
return parser


if __name__ == "__main__":
Expand Down
27 changes: 27 additions & 0 deletions src/rpft/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
from rpft.parsers.creation.contentindexparser import ContentIndexParser
from rpft.parsers.creation.tagmatcher import TagMatcher
from rpft.parsers.sheets import (
AbstractSheetReader,
CSVSheetReader,
GoogleSheetReader,
JSONSheetReader,
XLSXSheetReader,
CompositeSheetReader,
)
Expand Down Expand Up @@ -39,11 +41,25 @@ def create_flows(input_files, output_file, sheet_format, data_models=None, tags=
return flows


def convert_to_json(input_file, sheet_format):
"""
Convert source spreadsheet(s) into json.
:param input_file: source spreadsheet to convert
:param sheet_format: format of the input spreadsheet
:returns: content of the input file converted to json.
"""

return to_json(create_sheet_reader(sheet_format, input_file))


def create_sheet_reader(sheet_format, input_file):
if sheet_format == "csv":
sheet_reader = CSVSheetReader(input_file)
elif sheet_format == "xlsx":
sheet_reader = XLSXSheetReader(input_file)
elif sheet_format == "json":
sheet_reader = JSONSheetReader(input_file)
elif sheet_format == "google_sheets":
sheet_reader = GoogleSheetReader(input_file)
else:
Expand Down Expand Up @@ -73,6 +89,17 @@ def sheet_to_csv(path, sheet_id):
csv_file.write(sheet.table.export("csv"))


def to_json(reader: AbstractSheetReader) -> str:
book = {
"meta": {
"version": "0.1.0",
},
"sheets": {name: sheet.table.dict for name, sheet in reader.sheets.items()},
}

return json.dumps(book, ensure_ascii=False, indent=2)


def prepare_dir(path):
directory = Path(path)

Expand Down
33 changes: 27 additions & 6 deletions src/rpft/parsers/sheets.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
from abc import ABC
from pathlib import Path
from typing import List
from typing import List, Mapping

import tablib
from google.auth.transport.requests import Request
Expand All @@ -24,6 +24,10 @@ def __init__(self, reader, name, table):


class AbstractSheetReader(ABC):
@property
def sheets(self) -> Mapping[str, Sheet]:
return self._sheets

def get_sheet(self, name) -> Sheet:
return self.sheets.get(name)

Expand All @@ -34,18 +38,29 @@ def get_sheets_by_name(self, name) -> List[Sheet]:
class CSVSheetReader(AbstractSheetReader):
def __init__(self, path):
self.name = path
self.sheets = {
self._sheets = {
f.stem: Sheet(reader=self, name=f.stem, table=load_csv(f))
for f in Path(path).glob("*.csv")
}


class JSONSheetReader(AbstractSheetReader):
def __init__(self, filename):
self.name = filename
data = load_json(filename)
self._sheets = {}
for name, content in data["sheets"].items():
table = tablib.Dataset()
table.dict = content
self._sheets[name] = Sheet(reader=self, name=name, table=table)


class XLSXSheetReader(AbstractSheetReader):
def __init__(self, filename):
self.name = filename
with open(filename, "rb") as table_data:
data = tablib.Databook().load(table_data.read(), "xlsx")
self.sheets = {}
self._sheets = {}
for sheet in data.sheets():
self.sheets[sheet.title] = Sheet(
reader=self,
Expand Down Expand Up @@ -98,16 +113,16 @@ def __init__(self, spreadsheet_id):
.execute()
)

self.sheets = {}
self._sheets = {}
for sheet in result.get("valueRanges", []):
name = sheet.get("range", "").split("!")[0]
if name.startswith("'") and name.endswith("'"):
name = name[1:-1]
content = sheet.get("values", [])
if name in self.sheets:
if name in self._sheets:
raise ValueError(f"Warning: Duplicate sheet name: {name}")
else:
self.sheets[name] = Sheet(
self._sheets[name] = Sheet(
reader=self,
name=name,
table=self._table_from_content(content),
Expand Down Expand Up @@ -182,5 +197,11 @@ def load_csv(path):
return tablib.import_set(csv, format="csv")


def load_json(path):
with open(path, mode="r", encoding="utf-8") as fjson:
data = json.load(fjson)
return data


def pad(row, n):
return row + ([""] * (n - len(row)))
Loading

0 comments on commit a98cb34

Please sign in to comment.