diff --git a/docs/source/APIs/data_preparation.rst b/docs/source/APIs/data_preparation.rst deleted file mode 100644 index 0fb734973..000000000 --- a/docs/source/APIs/data_preparation.rst +++ /dev/null @@ -1,15 +0,0 @@ -Data Preparation -============ - -These APIs are used for writing data preparation jobs and loading data into Sycamore. For more information, visit the :doc:`Sycamore data preparation jobs concepts<../data_ingestion_and_preparation/data_preparation_concepts.md>` page in the documentation. - -.. toctree:: - :maxdepth: 2 - - /APIs/data_preparation/docset.rst - /APIs/data_preparation/context.rst - /APIs/data_preparation/node.rst - /APIs/data_preparation/docsetreader.rst - /APIs/data_preparation/docsetwriter.rst - /APIs/data_preparation/document.rst - /APIs/data_preparation/functions.rst diff --git a/docs/source/APIs/transforms.rst b/docs/source/APIs/transforms.rst deleted file mode 100644 index 263a1e5c3..000000000 --- a/docs/source/APIs/transforms.rst +++ /dev/null @@ -1,32 +0,0 @@ -.. _Ref-Transforms: - -Low-Level Transforms (for Sycamore development) -=========== - -.. note:: - Most users of Sycamore won't need to interact with these classes. The transform classes are primarily of interest to - developers looking to extend Sycamore or contribute to the project. - -.. toctree:: - :maxdepth: 2 - - /APIs/transforms/augment_text.rst - /APIs/transforms/basics.rst - /APIs/transforms/bbox_merge.rst - /APIs/transforms/embed.rst - /APIs/transforms/explode.rst - /APIs/transforms/extract_entity.rst - /APIs/transforms/extract_schema.rst - /APIs/transforms/extract_table.rst - /APIs/transforms/map.rst - /APIs/transforms/mark_misc.rst - /APIs/transforms/merge_elements.rst - /APIs/transforms/partition.rst - /APIs/transforms/query.rst - /APIs/transforms/random_sample.rst - /APIs/transforms/regex_replace.rst - /APIs/transforms/sketcher.rst - /APIs/transforms/split_elements.rst - /APIs/transforms/spread_properties.rst - /APIs/transforms/summarize.rst - /APIs/transforms/summarize_images.rst diff --git a/docs/source/aryn_cloud/APIs.rst b/docs/source/aryn_cloud/APIs.rst new file mode 100644 index 000000000..077a0ae7d --- /dev/null +++ b/docs/source/aryn_cloud/APIs.rst @@ -0,0 +1,10 @@ +Aryn Partitioning Service APIs +============= + +This is the API reference for the Aryn-SDK, which is used to interact with the Aryn Partitioning Service. + +.. toctree:: + :maxdepth: 1 + + ./APIs/aryn-sdk.rst + diff --git a/docs/source/APIs/aryn-sdk.rst b/docs/source/aryn_cloud/APIs/aryn-sdk.rst similarity index 80% rename from docs/source/APIs/aryn-sdk.rst rename to docs/source/aryn_cloud/APIs/aryn-sdk.rst index dc8dc86d8..6a9aba1d6 100644 --- a/docs/source/APIs/aryn-sdk.rst +++ b/docs/source/aryn_cloud/APIs/aryn-sdk.rst @@ -6,4 +6,4 @@ Aryn SDK .. toctree:: :maxdepth: 2 - /APIs/aryn-sdk/partition.rst + ./aryn-sdk/partition.rst diff --git a/docs/source/APIs/aryn-sdk/partition.rst b/docs/source/aryn_cloud/APIs/aryn-sdk/partition.rst similarity index 100% rename from docs/source/APIs/aryn-sdk/partition.rst rename to docs/source/aryn_cloud/APIs/aryn-sdk/partition.rst diff --git a/docs/source/APIs/gen b/docs/source/aryn_cloud/APIs/gen similarity index 100% rename from docs/source/APIs/gen rename to docs/source/aryn_cloud/APIs/gen diff --git a/docs/source/index.rst b/docs/source/index.rst index 2e5ab8758..6facac02a 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -8,7 +8,7 @@ Sycamore is a document processing engine covered under the Apache v2.0 license. Sycamore uses LLM-powered transforms, and you can choose the model to leverage. It can handle complex documents with embedded tables, figures, graphs, and other infographics. For ETL use cases, Sycamore reliably generates vector embeddings with the model of your choice, and loads vector databases and search engines like Pinecone, OpenSearch, Weaviate, Elasticsearch, and more. -.. image:: images/ArynArchitecture_APS%2BSycamorev2.png +.. image:: images/ArynArchitecture_APS+Sycamorev2.png **Key Features** @@ -57,7 +57,7 @@ Next, you can: .. -You can specify additional options (e.g. table extraction), and a list of these options is :doc:`here `_ +You can specify additional options (e.g. table extraction), and a list of these options is :doc: `here ` | @@ -91,6 +91,7 @@ More Resources :hidden: /aryn_cloud/aryn_partitioning_service.rst + /aryn_cloud/APIs.rst .. toctree:: :caption: Sycamore @@ -103,14 +104,4 @@ More Resources /sycamore/transforms.rst /sycamore/connectors.rst /sycamore/tutorials.rst - - -.. toctree:: - :caption: APIs - :maxdepth: 2 - :hidden: - - /APIs/data_preparation.rst - /APIs/conversation_memory.rst - /APIs/transforms.rst - /APIs/aryn-sdk.rst + /sycamore/APIs.rst diff --git a/docs/source/sycamore/APIs.rst b/docs/source/sycamore/APIs.rst new file mode 100644 index 000000000..209bdc341 --- /dev/null +++ b/docs/source/sycamore/APIs.rst @@ -0,0 +1,17 @@ +Sycamore APIs +============= + +This is the API reference for Sycamore, and it contains the functions you can use when writing Sycamore scripts to process data. If you are interested in contributing new transforms to the Sycamore project, please visit the Low-Level Transforms section in the API docs. + +.. toctree:: + :maxdepth: 1 + + ./APIs/config.rst + ./APIs/context.rst + ./APIs/docset.rst + ./APIs/docsetreader.rst + ./APIs/docsetwriter.rst + ./APIs/document.rst + ./APIs/functions.rst + ./APIs/node.rst + ./APIs/low_level_transforms.rst diff --git a/docs/source/APIs/data_preparation/config.rst b/docs/source/sycamore/APIs/config.rst similarity index 100% rename from docs/source/APIs/data_preparation/config.rst rename to docs/source/sycamore/APIs/config.rst diff --git a/docs/source/APIs/data_preparation/context.rst b/docs/source/sycamore/APIs/context.rst similarity index 100% rename from docs/source/APIs/data_preparation/context.rst rename to docs/source/sycamore/APIs/context.rst diff --git a/docs/source/APIs/data_preparation/docset.rst b/docs/source/sycamore/APIs/docset.rst similarity index 100% rename from docs/source/APIs/data_preparation/docset.rst rename to docs/source/sycamore/APIs/docset.rst diff --git a/docs/source/APIs/data_preparation/docsetreader.rst b/docs/source/sycamore/APIs/docsetreader.rst similarity index 100% rename from docs/source/APIs/data_preparation/docsetreader.rst rename to docs/source/sycamore/APIs/docsetreader.rst diff --git a/docs/source/APIs/data_preparation/docsetwriter.rst b/docs/source/sycamore/APIs/docsetwriter.rst similarity index 100% rename from docs/source/APIs/data_preparation/docsetwriter.rst rename to docs/source/sycamore/APIs/docsetwriter.rst diff --git a/docs/source/APIs/data_preparation/document.rst b/docs/source/sycamore/APIs/document.rst similarity index 100% rename from docs/source/APIs/data_preparation/document.rst rename to docs/source/sycamore/APIs/document.rst diff --git a/docs/source/APIs/data_preparation/functions.rst b/docs/source/sycamore/APIs/functions.rst similarity index 100% rename from docs/source/APIs/data_preparation/functions.rst rename to docs/source/sycamore/APIs/functions.rst diff --git a/docs/source/sycamore/APIs/gen b/docs/source/sycamore/APIs/gen new file mode 100755 index 000000000..5c44fe26a --- /dev/null +++ b/docs/source/sycamore/APIs/gen @@ -0,0 +1,63 @@ +#!/usr/bin/python3 + +""" +Auto-generate RST files from Python source. + +Usage: ./gen +""" + +import os +import sys +import ast + + +srcRoot = "../../../lib/sycamore/sycamore" +docRoot = "." + + +def shouldEmit(node): + if not isinstance(node, ast.ClassDef): + return False + if ast.get_docstring(node): + return True + for base in node.bases: + if base.id == "ABC": + return False # skip abstract base classes + return True + + +def doFile(name, dir, ent): + with open(f"{dir}/{ent}") as fp: + top = ast.parse(fp.read()) + + ary = [] + base = ent[:-3] + for node in top.body: # iterate module-level nodes only + if shouldEmit(node): + ary.append(f"sycamore.{name}.{base}.{node.name}") + + if ary: + with open(f"{docRoot}/{name}/{base}.rst", "w") as fp: + title = base.replace("_", " ").title() + line = "=" * len(title) + fp.write(f"{title}\n{line}\n\n") + for sym in sorted(ary): + fp.write(f".. autoclass:: {sym}\n :members:\n :show-inheritance:\n") + print(f" /APIs/{name}/{base}.rst") + + +def doDir(name): + dir = f"{srcRoot}/{name}" + for ent in sorted(os.listdir(dir)): + if not ent.endswith(".py"): + continue + doFile(name, dir, ent) + + +def main(): + doDir("transforms") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/docs/source/sycamore/APIs/low_level_transforms.rst b/docs/source/sycamore/APIs/low_level_transforms.rst new file mode 100644 index 000000000..d5839879e --- /dev/null +++ b/docs/source/sycamore/APIs/low_level_transforms.rst @@ -0,0 +1,31 @@ +.. _Ref-low_level_Transforms: + +Low-Level Transforms (for Sycamore development) +=========== + +.. note:: + Users of Sycamore won't need to interact with these classes and should instead use the classes in the top-level API docs. These transform classes are primarily of interest to developers looking to extend Sycamore or contribute to the project. + +.. toctree:: + :maxdepth: 2 + + ./low_level_transforms/augment_text.rst + ./low_level_transforms/basics.rst + ./low_level_transforms/bbox_merge.rst + ./low_level_transforms/embed.rst + ./low_level_transforms/explode.rst + ./low_level_transforms/extract_entity.rst + ./low_level_transforms/extract_schema.rst + ./low_level_transforms/extract_table.rst + ./low_level_transforms/map.rst + ./low_level_transforms/mark_misc.rst + ./low_level_transforms/merge_elements.rst + ./low_level_transforms/partition.rst + ./low_level_transforms/query.rst + ./low_level_transforms/random_sample.rst + ./low_level_transforms/regex_replace.rst + ./low_level_transforms/sketcher.rst + ./low_level_transforms/split_elements.rst + ./low_level_transforms/spread_properties.rst + ./low_level_transforms/summarize.rst + ./low_level_transforms/summarize_images.rst diff --git a/docs/source/APIs/transforms/augment_text.rst b/docs/source/sycamore/APIs/low_level_transforms/augment_text.rst similarity index 100% rename from docs/source/APIs/transforms/augment_text.rst rename to docs/source/sycamore/APIs/low_level_transforms/augment_text.rst diff --git a/docs/source/APIs/transforms/basics.rst b/docs/source/sycamore/APIs/low_level_transforms/basics.rst similarity index 100% rename from docs/source/APIs/transforms/basics.rst rename to docs/source/sycamore/APIs/low_level_transforms/basics.rst diff --git a/docs/source/APIs/transforms/bbox_merge.rst b/docs/source/sycamore/APIs/low_level_transforms/bbox_merge.rst similarity index 100% rename from docs/source/APIs/transforms/bbox_merge.rst rename to docs/source/sycamore/APIs/low_level_transforms/bbox_merge.rst diff --git a/docs/source/APIs/transforms/detr_partitioner.rst b/docs/source/sycamore/APIs/low_level_transforms/detr_partitioner.rst similarity index 100% rename from docs/source/APIs/transforms/detr_partitioner.rst rename to docs/source/sycamore/APIs/low_level_transforms/detr_partitioner.rst diff --git a/docs/source/APIs/transforms/embed.rst b/docs/source/sycamore/APIs/low_level_transforms/embed.rst similarity index 100% rename from docs/source/APIs/transforms/embed.rst rename to docs/source/sycamore/APIs/low_level_transforms/embed.rst diff --git a/docs/source/APIs/transforms/explode.rst b/docs/source/sycamore/APIs/low_level_transforms/explode.rst similarity index 100% rename from docs/source/APIs/transforms/explode.rst rename to docs/source/sycamore/APIs/low_level_transforms/explode.rst diff --git a/docs/source/APIs/transforms/extract_entity.rst b/docs/source/sycamore/APIs/low_level_transforms/extract_entity.rst similarity index 100% rename from docs/source/APIs/transforms/extract_entity.rst rename to docs/source/sycamore/APIs/low_level_transforms/extract_entity.rst diff --git a/docs/source/APIs/transforms/extract_schema.rst b/docs/source/sycamore/APIs/low_level_transforms/extract_schema.rst similarity index 100% rename from docs/source/APIs/transforms/extract_schema.rst rename to docs/source/sycamore/APIs/low_level_transforms/extract_schema.rst diff --git a/docs/source/APIs/transforms/extract_table.rst b/docs/source/sycamore/APIs/low_level_transforms/extract_table.rst similarity index 100% rename from docs/source/APIs/transforms/extract_table.rst rename to docs/source/sycamore/APIs/low_level_transforms/extract_table.rst diff --git a/docs/source/APIs/transforms/map.rst b/docs/source/sycamore/APIs/low_level_transforms/map.rst similarity index 100% rename from docs/source/APIs/transforms/map.rst rename to docs/source/sycamore/APIs/low_level_transforms/map.rst diff --git a/docs/source/APIs/transforms/mark_misc.rst b/docs/source/sycamore/APIs/low_level_transforms/mark_misc.rst similarity index 100% rename from docs/source/APIs/transforms/mark_misc.rst rename to docs/source/sycamore/APIs/low_level_transforms/mark_misc.rst diff --git a/docs/source/APIs/transforms/merge_elements.rst b/docs/source/sycamore/APIs/low_level_transforms/merge_elements.rst similarity index 100% rename from docs/source/APIs/transforms/merge_elements.rst rename to docs/source/sycamore/APIs/low_level_transforms/merge_elements.rst diff --git a/docs/source/APIs/transforms/partition.rst b/docs/source/sycamore/APIs/low_level_transforms/partition.rst similarity index 100% rename from docs/source/APIs/transforms/partition.rst rename to docs/source/sycamore/APIs/low_level_transforms/partition.rst diff --git a/docs/source/APIs/transforms/query.rst b/docs/source/sycamore/APIs/low_level_transforms/query.rst similarity index 100% rename from docs/source/APIs/transforms/query.rst rename to docs/source/sycamore/APIs/low_level_transforms/query.rst diff --git a/docs/source/APIs/transforms/random_sample.rst b/docs/source/sycamore/APIs/low_level_transforms/random_sample.rst similarity index 100% rename from docs/source/APIs/transforms/random_sample.rst rename to docs/source/sycamore/APIs/low_level_transforms/random_sample.rst diff --git a/docs/source/APIs/transforms/regex_replace.rst b/docs/source/sycamore/APIs/low_level_transforms/regex_replace.rst similarity index 100% rename from docs/source/APIs/transforms/regex_replace.rst rename to docs/source/sycamore/APIs/low_level_transforms/regex_replace.rst diff --git a/docs/source/APIs/transforms/sketcher.rst b/docs/source/sycamore/APIs/low_level_transforms/sketcher.rst similarity index 100% rename from docs/source/APIs/transforms/sketcher.rst rename to docs/source/sycamore/APIs/low_level_transforms/sketcher.rst diff --git a/docs/source/APIs/transforms/split_elements.rst b/docs/source/sycamore/APIs/low_level_transforms/split_elements.rst similarity index 100% rename from docs/source/APIs/transforms/split_elements.rst rename to docs/source/sycamore/APIs/low_level_transforms/split_elements.rst diff --git a/docs/source/APIs/transforms/spread_properties.rst b/docs/source/sycamore/APIs/low_level_transforms/spread_properties.rst similarity index 100% rename from docs/source/APIs/transforms/spread_properties.rst rename to docs/source/sycamore/APIs/low_level_transforms/spread_properties.rst diff --git a/docs/source/APIs/transforms/summarize.rst b/docs/source/sycamore/APIs/low_level_transforms/summarize.rst similarity index 100% rename from docs/source/APIs/transforms/summarize.rst rename to docs/source/sycamore/APIs/low_level_transforms/summarize.rst diff --git a/docs/source/APIs/transforms/summarize_images.rst b/docs/source/sycamore/APIs/low_level_transforms/summarize_images.rst similarity index 100% rename from docs/source/APIs/transforms/summarize_images.rst rename to docs/source/sycamore/APIs/low_level_transforms/summarize_images.rst diff --git a/docs/source/APIs/data_preparation/node.rst b/docs/source/sycamore/APIs/node.rst similarity index 100% rename from docs/source/APIs/data_preparation/node.rst rename to docs/source/sycamore/APIs/node.rst diff --git a/docs/source/APIs/conversation_memory.rst b/docs/source/sycamore/querying_data/using_aryn_opensearch_stack/APIs/conversation_memory.rst similarity index 100% rename from docs/source/APIs/conversation_memory.rst rename to docs/source/sycamore/querying_data/using_aryn_opensearch_stack/APIs/conversation_memory.rst diff --git a/docs/source/APIs/conversation_memory/functions.md b/docs/source/sycamore/querying_data/using_aryn_opensearch_stack/APIs/conversation_memory/functions.md similarity index 100% rename from docs/source/APIs/conversation_memory/functions.md rename to docs/source/sycamore/querying_data/using_aryn_opensearch_stack/APIs/conversation_memory/functions.md diff --git a/docs/source/sycamore/querying_data/using_aryn_opensearch_stack/APIs/gen b/docs/source/sycamore/querying_data/using_aryn_opensearch_stack/APIs/gen new file mode 100755 index 000000000..5c44fe26a --- /dev/null +++ b/docs/source/sycamore/querying_data/using_aryn_opensearch_stack/APIs/gen @@ -0,0 +1,63 @@ +#!/usr/bin/python3 + +""" +Auto-generate RST files from Python source. + +Usage: ./gen +""" + +import os +import sys +import ast + + +srcRoot = "../../../lib/sycamore/sycamore" +docRoot = "." + + +def shouldEmit(node): + if not isinstance(node, ast.ClassDef): + return False + if ast.get_docstring(node): + return True + for base in node.bases: + if base.id == "ABC": + return False # skip abstract base classes + return True + + +def doFile(name, dir, ent): + with open(f"{dir}/{ent}") as fp: + top = ast.parse(fp.read()) + + ary = [] + base = ent[:-3] + for node in top.body: # iterate module-level nodes only + if shouldEmit(node): + ary.append(f"sycamore.{name}.{base}.{node.name}") + + if ary: + with open(f"{docRoot}/{name}/{base}.rst", "w") as fp: + title = base.replace("_", " ").title() + line = "=" * len(title) + fp.write(f"{title}\n{line}\n\n") + for sym in sorted(ary): + fp.write(f".. autoclass:: {sym}\n :members:\n :show-inheritance:\n") + print(f" /APIs/{name}/{base}.rst") + + +def doDir(name): + dir = f"{srcRoot}/{name}" + for ent in sorted(os.listdir(dir)): + if not ent.endswith(".py"): + continue + doFile(name, dir, ent) + + +def main(): + doDir("transforms") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/docs/source/sycamore/tutorials.rst b/docs/source/sycamore/tutorials.rst index 835d7342d..360c1bad9 100644 --- a/docs/source/sycamore/tutorials.rst +++ b/docs/source/sycamore/tutorials.rst @@ -1,7 +1,7 @@ Tutorials ============= -Learn how to write Sycamore scrips +Learn how to write Sycamore scripts -------------------------------------- Now that you've learned about Sycamore concepts, transforms, and connectors, let's put it all together with some tutorials showing how to write Sycamore processing jobs.