Skip to content

Commit

Permalink
Address reviewer comments
Browse files Browse the repository at this point in the history
  • Loading branch information
austintlee committed Feb 3, 2025
1 parent 192ec9d commit 7bdfd50
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 12 deletions.
16 changes: 10 additions & 6 deletions lib/sycamore/sycamore/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,12 +635,14 @@ def qdrant(self, client_params: dict, query_params: dict, **kwargs) -> DocSet:
)
return DocSet(self._context, wr)

@context_params
def aryn(self, docset_id: str, **kwargs) -> DocSet:
def aryn(self, docset_id: str, aryn_api_key: Optional[str] = None, aryn_url: Optional[str] = None, **kwargs) -> DocSet:
"""
Reads the contents of a Aryn collection into a DocSet.
Reads the contents of an Aryn docset into a DocSet.
Args:
docset_id: The ID of the Aryn docset to read from.
aryn_api_key: (Optional) The Aryn API key to use for authentication.
aryn_url: (Optional) The URL of the Aryn instance to read from.
kwargs: Keyword arguments to pass to the underlying execution engine.
"""
from sycamore.connectors.aryn.ArynReader import (
Expand All @@ -649,8 +651,10 @@ def aryn(self, docset_id: str, **kwargs) -> DocSet:
ArynQueryParams,
)

api_key = ArynConfig.get_aryn_api_key()
aryn_url = ArynConfig.get_aryn_url()
if aryn_api_key is None:
aryn_api_key = ArynConfig.get_aryn_api_key()
if aryn_url is None:
aryn_url = ArynConfig.get_aryn_url()

dr = ArynReader(client_params=ArynClientParams(aryn_url, api_key), query_params=ArynQueryParams(docset_id), **kwargs)
dr = ArynReader(client_params=ArynClientParams(aryn_url, aryn_api_key), query_params=ArynQueryParams(docset_id), **kwargs)
return DocSet(self._context, dr)
26 changes: 20 additions & 6 deletions lib/sycamore/sycamore/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -807,12 +807,20 @@ def json(
def aryn(
self,
docset_id: Optional[str] = None,
create_new_docset: Optional[bool] = False,
name: Optional[str] = None,
aryn_api_key: Optional[str] = None,
aryn_url: Optional[str] = None,
**kwargs,
) -> Optional["DocSet"]:
"""
Writes all documents of a DocSet to Aryn.
Args:
docset_id: The id of the docset to write to. If not provided, a new docset will be created.
create_new_docset: If true, a new docset will be created. If false, the docset with the provided id will be used.
name: The name of the new docset to create. Required if create_new_docset is true.
aryn_api_key: The api key to use for authentication. If not provided, the api key from the config file will be used.
aryn_url: The url of the Aryn instance to write to. If not provided, the url from the config file will be used.
"""

from sycamore.connectors.aryn.ArynWriter import (
Expand All @@ -821,16 +829,22 @@ def aryn(
ArynWriterTargetParams,
)

api_key = ArynConfig.get_aryn_api_key()
aryn_url = ArynConfig.get_aryn_url()
if docset_id is None and create_new_docset and name is not None:
if aryn_api_key is None:
aryn_api_key = ArynConfig.get_aryn_api_key()
if aryn_url is None:
aryn_url = ArynConfig.get_aryn_url()

if docset_id is None and name is None:
raise ValueError("Either docset_id or name must be provided")

if docset_id is None and name is not None:
headers = {
"Authorization": f"Bearer {api_key}"
"Authorization": f"Bearer {aryn_api_key}"
}
res = requests.post(url=f"{aryn_url}/docsets", data={"name": name}, headers=headers)
docset_id = res.json()["docset_id"]

client_params = ArynWriterClientParams(aryn_url, api_key)
client_params = ArynWriterClientParams(aryn_url, aryn_api_key)
target_params = ArynWriterTargetParams(docset_id)
ds = ArynWriter(self.plan, client_params=client_params, target_params=target_params, **kwargs)

Expand Down

0 comments on commit 7bdfd50

Please sign in to comment.