diff --git a/.github/workflows/aryn-sdk_release.yml b/.github/workflows/aryn-sdk_release.yml index 1e41e0bf8..6046d2028 100644 --- a/.github/workflows/aryn-sdk_release.yml +++ b/.github/workflows/aryn-sdk_release.yml @@ -11,7 +11,7 @@ permissions: jobs: release: name: Upload to PyPI - runs-on: blacksmith-4vcpu-ubuntu-2204 + runs-on: ubuntu-latest environment: name: PyPI url: https://pypi.org/project/aryn-sdk/ @@ -22,7 +22,7 @@ jobs: uses: actions/checkout@v4 - name: Install poetry run: pipx install poetry - - uses: useblacksmith/setup-python@v6 + - uses: actions/setup-python@v4 with: python-version: "3.11" cache: "poetry" diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index cc924cc57..29d87a585 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -21,7 +21,7 @@ on: jobs: analyze: - runs-on: 'blacksmith' + runs-on: 'ubuntu-latest' permissions: # required for all workflows security-events: write diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml index bcc14e5a6..59b08c6a1 100644 --- a/.github/workflows/dependency-review.yml +++ b/.github/workflows/dependency-review.yml @@ -18,7 +18,7 @@ permissions: jobs: dependency-review: - runs-on: blacksmith + runs-on: ubuntu-latest steps: - name: 'Checkout repository' uses: actions/checkout@v4 diff --git a/.github/workflows/doclint.yml b/.github/workflows/doclint.yml index 4c97b2c4f..075d7ba9e 100644 --- a/.github/workflows/doclint.yml +++ b/.github/workflows/doclint.yml @@ -9,7 +9,7 @@ on: jobs: doclint: - runs-on: blacksmith-4vcpu-ubuntu-2204 + runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v4 diff --git a/.github/workflows/draft_release.yml b/.github/workflows/draft_release.yml index b081c32af..916f4e220 100644 --- a/.github/workflows/draft_release.yml +++ b/.github/workflows/draft_release.yml @@ -10,13 +10,13 @@ permissions: jobs: draft-release: - runs-on: blacksmith-4vcpu-ubuntu-2204 + runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v4 - name: Install poetry run: pipx install poetry - - uses: useblacksmith/setup-python@v6 + - uses: actions/setup-python@v4 with: python-version: '3.11' cache: 'poetry' diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 9bfb409e4..0cb1c7169 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -11,7 +11,7 @@ on: # specific. jobs: mypy: - runs-on: blacksmith-4vcpu-ubuntu-2204 + runs-on: ubuntu-latest steps: - name: DF-1 run: df -h @@ -29,7 +29,7 @@ jobs: run: df -h - name: Install poetry run: pipx install poetry - - uses: useblacksmith/setup-python@v6 + - uses: actions/setup-python@v4 with: python-version: '3.9' cache: 'poetry' @@ -45,13 +45,13 @@ jobs: poetry run mypy --install-types --non-interactive . lint: - runs-on: blacksmith-4vcpu-ubuntu-2204 + runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v4 - name: Install poetry run: pipx install poetry - - uses: useblacksmith/setup-python@v6 + - uses: actions/setup-python@v4 with: python-version: '3.9' cache: 'poetry' diff --git a/.github/workflows/pypi_release.yml b/.github/workflows/pypi_release.yml index a58a18246..5d0c0f1d4 100644 --- a/.github/workflows/pypi_release.yml +++ b/.github/workflows/pypi_release.yml @@ -5,7 +5,7 @@ on: jobs: test-pypi-release: name: Upload to PyPI - runs-on: blacksmith-4vcpu-ubuntu-2204 + runs-on: ubuntu-latest environment: name: PyPI url: https://pypi.org/project/sycamore-ai/ @@ -16,7 +16,7 @@ jobs: uses: actions/checkout@v4 - name: Install poetry run: pipx install poetry - - uses: useblacksmith/setup-python@v6 + - uses: actions/setup-python@v4 with: python-version: "3.11" cache: "poetry" diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 584f2ad96..d2ba78bff 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -12,6 +12,7 @@ on: env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }} SYCAMORE_S3_TEMP_PATH: s3://aryn-sycamore-integ-temp/ MODEL_SERVER_KEY: ${{ secrets.MODEL_SERVER_KEY }} @@ -30,7 +31,7 @@ permissions: jobs: sycamore-unit-tests: - runs-on: blacksmith-8vcpu-ubuntu-2204 + runs-on: ubuntu-latest strategy: matrix: python-version: ["3.9", "3.10", "3.11", "3.12"] @@ -53,7 +54,7 @@ jobs: - name: Install poetry run: pipx install poetry - name: Set up Python ${{ matrix.python-version }} - uses: useblacksmith/setup-python@v6 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} cache: "poetry" diff --git a/lib/sycamore/poetry.lock b/lib/sycamore/poetry.lock index 5dae06d42..80fee5f95 100644 --- a/lib/sycamore/poetry.lock +++ b/lib/sycamore/poetry.lock @@ -2459,6 +2459,22 @@ pyopenssl = ["cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"] reauth = ["pyu2f (>=0.1.5)"] requests = ["requests (>=2.20.0,<3.0.0.dev0)"] +[[package]] +name = "google-genai" +version = "1.1.0" +description = "GenAI Python SDK" +optional = true +python-versions = ">=3.9" +files = [ + {file = "google_genai-1.1.0-py3-none-any.whl", hash = "sha256:c48ac44612ad6aadc0bf96b12fa4314756baa16382c890fff793bcb53e9a9cc8"}, +] + +[package.dependencies] +google-auth = ">=2.14.1,<3.0.0dev" +pydantic = ">=2.0.0,<3.0.0dev" +requests = ">=2.28.1,<3.0.0dev" +websockets = ">=13.0,<15.0dev" + [[package]] name = "googleapis-common-protos" version = "1.66.0" @@ -9422,6 +9438,84 @@ docs = ["Sphinx (>=6.0)", "myst-parser (>=2.0.0)", "sphinx-rtd-theme (>=1.1.0)"] optional = ["python-socks", "wsaccel"] test = ["websockets"] +[[package]] +name = "websockets" +version = "14.2" +description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" +optional = true +python-versions = ">=3.9" +files = [ + {file = "websockets-14.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e8179f95323b9ab1c11723e5d91a89403903f7b001828161b480a7810b334885"}, + {file = "websockets-14.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0d8c3e2cdb38f31d8bd7d9d28908005f6fa9def3324edb9bf336d7e4266fd397"}, + {file = "websockets-14.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:714a9b682deb4339d39ffa674f7b674230227d981a37d5d174a4a83e3978a610"}, + {file = "websockets-14.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2e53c72052f2596fb792a7acd9704cbc549bf70fcde8a99e899311455974ca3"}, + {file = "websockets-14.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e3fbd68850c837e57373d95c8fe352203a512b6e49eaae4c2f4088ef8cf21980"}, + {file = "websockets-14.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b27ece32f63150c268593d5fdb82819584831a83a3f5809b7521df0685cd5d8"}, + {file = "websockets-14.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4daa0faea5424d8713142b33825fff03c736f781690d90652d2c8b053345b0e7"}, + {file = "websockets-14.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:bc63cee8596a6ec84d9753fd0fcfa0452ee12f317afe4beae6b157f0070c6c7f"}, + {file = "websockets-14.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7a570862c325af2111343cc9b0257b7119b904823c675b22d4ac547163088d0d"}, + {file = "websockets-14.2-cp310-cp310-win32.whl", hash = "sha256:75862126b3d2d505e895893e3deac0a9339ce750bd27b4ba515f008b5acf832d"}, + {file = "websockets-14.2-cp310-cp310-win_amd64.whl", hash = "sha256:cc45afb9c9b2dc0852d5c8b5321759cf825f82a31bfaf506b65bf4668c96f8b2"}, + {file = "websockets-14.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3bdc8c692c866ce5fefcaf07d2b55c91d6922ac397e031ef9b774e5b9ea42166"}, + {file = "websockets-14.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c93215fac5dadc63e51bcc6dceca72e72267c11def401d6668622b47675b097f"}, + {file = "websockets-14.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1c9b6535c0e2cf8a6bf938064fb754aaceb1e6a4a51a80d884cd5db569886910"}, + {file = "websockets-14.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a52a6d7cf6938e04e9dceb949d35fbdf58ac14deea26e685ab6368e73744e4c"}, + {file = "websockets-14.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9f05702e93203a6ff5226e21d9b40c037761b2cfb637187c9802c10f58e40473"}, + {file = "websockets-14.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22441c81a6748a53bfcb98951d58d1af0661ab47a536af08920d129b4d1c3473"}, + {file = "websockets-14.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:efd9b868d78b194790e6236d9cbc46d68aba4b75b22497eb4ab64fa640c3af56"}, + {file = "websockets-14.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1a5a20d5843886d34ff8c57424cc65a1deda4375729cbca4cb6b3353f3ce4142"}, + {file = "websockets-14.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:34277a29f5303d54ec6468fb525d99c99938607bc96b8d72d675dee2b9f5bf1d"}, + {file = "websockets-14.2-cp311-cp311-win32.whl", hash = "sha256:02687db35dbc7d25fd541a602b5f8e451a238ffa033030b172ff86a93cb5dc2a"}, + {file = "websockets-14.2-cp311-cp311-win_amd64.whl", hash = "sha256:862e9967b46c07d4dcd2532e9e8e3c2825e004ffbf91a5ef9dde519ee2effb0b"}, + {file = "websockets-14.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1f20522e624d7ffbdbe259c6b6a65d73c895045f76a93719aa10cd93b3de100c"}, + {file = "websockets-14.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:647b573f7d3ada919fd60e64d533409a79dcf1ea21daeb4542d1d996519ca967"}, + {file = "websockets-14.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6af99a38e49f66be5a64b1e890208ad026cda49355661549c507152113049990"}, + {file = "websockets-14.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:091ab63dfc8cea748cc22c1db2814eadb77ccbf82829bac6b2fbe3401d548eda"}, + {file = "websockets-14.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b374e8953ad477d17e4851cdc66d83fdc2db88d9e73abf755c94510ebddceb95"}, + {file = "websockets-14.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a39d7eceeea35db85b85e1169011bb4321c32e673920ae9c1b6e0978590012a3"}, + {file = "websockets-14.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0a6f3efd47ffd0d12080594f434faf1cd2549b31e54870b8470b28cc1d3817d9"}, + {file = "websockets-14.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:065ce275e7c4ffb42cb738dd6b20726ac26ac9ad0a2a48e33ca632351a737267"}, + {file = "websockets-14.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e9d0e53530ba7b8b5e389c02282f9d2aa47581514bd6049d3a7cffe1385cf5fe"}, + {file = "websockets-14.2-cp312-cp312-win32.whl", hash = "sha256:20e6dd0984d7ca3037afcb4494e48c74ffb51e8013cac71cf607fffe11df7205"}, + {file = "websockets-14.2-cp312-cp312-win_amd64.whl", hash = "sha256:44bba1a956c2c9d268bdcdf234d5e5ff4c9b6dc3e300545cbe99af59dda9dcce"}, + {file = "websockets-14.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6f1372e511c7409a542291bce92d6c83320e02c9cf392223272287ce55bc224e"}, + {file = "websockets-14.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4da98b72009836179bb596a92297b1a61bb5a830c0e483a7d0766d45070a08ad"}, + {file = "websockets-14.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f8a86a269759026d2bde227652b87be79f8a734e582debf64c9d302faa1e9f03"}, + {file = "websockets-14.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:86cf1aaeca909bf6815ea714d5c5736c8d6dd3a13770e885aafe062ecbd04f1f"}, + {file = "websockets-14.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9b0f6c3ba3b1240f602ebb3971d45b02cc12bd1845466dd783496b3b05783a5"}, + {file = "websockets-14.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:669c3e101c246aa85bc8534e495952e2ca208bd87994650b90a23d745902db9a"}, + {file = "websockets-14.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eabdb28b972f3729348e632ab08f2a7b616c7e53d5414c12108c29972e655b20"}, + {file = "websockets-14.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2066dc4cbcc19f32c12a5a0e8cc1b7ac734e5b64ac0a325ff8353451c4b15ef2"}, + {file = "websockets-14.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ab95d357cd471df61873dadf66dd05dd4709cae001dd6342edafc8dc6382f307"}, + {file = "websockets-14.2-cp313-cp313-win32.whl", hash = "sha256:a9e72fb63e5f3feacdcf5b4ff53199ec8c18d66e325c34ee4c551ca748623bbc"}, + {file = "websockets-14.2-cp313-cp313-win_amd64.whl", hash = "sha256:b439ea828c4ba99bb3176dc8d9b933392a2413c0f6b149fdcba48393f573377f"}, + {file = "websockets-14.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7cd5706caec1686c5d233bc76243ff64b1c0dc445339bd538f30547e787c11fe"}, + {file = "websockets-14.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ec607328ce95a2f12b595f7ae4c5d71bf502212bddcea528290b35c286932b12"}, + {file = "websockets-14.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:da85651270c6bfb630136423037dd4975199e5d4114cae6d3066641adcc9d1c7"}, + {file = "websockets-14.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3ecadc7ce90accf39903815697917643f5b7cfb73c96702318a096c00aa71f5"}, + {file = "websockets-14.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1979bee04af6a78608024bad6dfcc0cc930ce819f9e10342a29a05b5320355d0"}, + {file = "websockets-14.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dddacad58e2614a24938a50b85969d56f88e620e3f897b7d80ac0d8a5800258"}, + {file = "websockets-14.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:89a71173caaf75fa71a09a5f614f450ba3ec84ad9fca47cb2422a860676716f0"}, + {file = "websockets-14.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:6af6a4b26eea4fc06c6818a6b962a952441e0e39548b44773502761ded8cc1d4"}, + {file = "websockets-14.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:80c8efa38957f20bba0117b48737993643204645e9ec45512579132508477cfc"}, + {file = "websockets-14.2-cp39-cp39-win32.whl", hash = "sha256:2e20c5f517e2163d76e2729104abc42639c41cf91f7b1839295be43302713661"}, + {file = "websockets-14.2-cp39-cp39-win_amd64.whl", hash = "sha256:b4c8cef610e8d7c70dea92e62b6814a8cd24fbd01d7103cc89308d2bfe1659ef"}, + {file = "websockets-14.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:d7d9cafbccba46e768be8a8ad4635fa3eae1ffac4c6e7cb4eb276ba41297ed29"}, + {file = "websockets-14.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:c76193c1c044bd1e9b3316dcc34b174bbf9664598791e6fb606d8d29000e070c"}, + {file = "websockets-14.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd475a974d5352390baf865309fe37dec6831aafc3014ffac1eea99e84e83fc2"}, + {file = "websockets-14.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2c6c0097a41968b2e2b54ed3424739aab0b762ca92af2379f152c1aef0187e1c"}, + {file = "websockets-14.2-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d7ff794c8b36bc402f2e07c0b2ceb4a2424147ed4785ff03e2a7af03711d60a"}, + {file = "websockets-14.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:dec254fcabc7bd488dab64846f588fc5b6fe0d78f641180030f8ea27b76d72c3"}, + {file = "websockets-14.2-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:bbe03eb853e17fd5b15448328b4ec7fb2407d45fb0245036d06a3af251f8e48f"}, + {file = "websockets-14.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a3c4aa3428b904d5404a0ed85f3644d37e2cb25996b7f096d77caeb0e96a3b42"}, + {file = "websockets-14.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:577a4cebf1ceaf0b65ffc42c54856214165fb8ceeba3935852fc33f6b0c55e7f"}, + {file = "websockets-14.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ad1c1d02357b7665e700eca43a31d52814ad9ad9b89b58118bdabc365454b574"}, + {file = "websockets-14.2-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f390024a47d904613577df83ba700bd189eedc09c57af0a904e5c39624621270"}, + {file = "websockets-14.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:3c1426c021c38cf92b453cdf371228d3430acd775edee6bac5a4d577efc72365"}, + {file = "websockets-14.2-py3-none-any.whl", hash = "sha256:7a6ceec4ea84469f15cf15807a747e9efe57e369c384fa86e022b3bea679b79b"}, + {file = "websockets-14.2.tar.gz", hash = "sha256:5059ed9c54945efb321f097084b4c7e52c246f2c869815876a69d1efc4ad6eb5"}, +] + [[package]] name = "wget" version = "3.2" @@ -9849,6 +9943,7 @@ docs = ["furo", "myst-parser", "sphinx"] duckdb = ["duckdb"] elasticsearch = ["elasticsearch"] eval = ["apted", "datasets", "rouge"] +google-genai = ["google-genai"] legacy-partitioners = ["nltk", "python-pptx", "unstructured"] local-inference = ["easyocr", "paddleocr", "pdfminer-six", "pytesseract", "sentence-transformers", "timm", "torch", "torchvision", "transformers"] neo4j = ["neo4j"] @@ -9860,4 +9955,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.13" -content-hash = "fda09c7256e9192143850b2748859c429b88eb8d2db0a957d489a7abe954409d" +content-hash = "d8ce4686a3c23cb77033c0c0d64402451de6ef501d04083fcd3deeafc1fc0897" diff --git a/lib/sycamore/pyproject.toml b/lib/sycamore/pyproject.toml index 8594f247d..1e37404bd 100644 --- a/lib/sycamore/pyproject.toml +++ b/lib/sycamore/pyproject.toml @@ -88,6 +88,7 @@ nltk = { version = "^3.9.0", optional = true } # LLM Dependencies anthropic = { version = "^0.42.0", optional = true } +google-genai = {version = "^1.0", optional = true} tiktoken = "^0.8.0" [tool.poetry.group.test.dependencies] @@ -143,6 +144,7 @@ legacy-partitioners = ["unstructured", "python-pptx", "nltk"] # LLM extras anthropic = ["anthropic"] +google-genai = ["google-genai"] [tool.ruff] line-length = 120 diff --git a/lib/sycamore/sycamore/llms/__init__.py b/lib/sycamore/sycamore/llms/__init__.py index 8515fe4bb..a05c3f752 100644 --- a/lib/sycamore/sycamore/llms/__init__.py +++ b/lib/sycamore/sycamore/llms/__init__.py @@ -4,6 +4,7 @@ from sycamore.llms.openai import OpenAI, OpenAIClientType, OpenAIModels, OpenAIClientParameters, OpenAIClientWrapper from sycamore.llms.bedrock import Bedrock, BedrockModels from sycamore.llms.anthropic import Anthropic, AnthropicModels +from sycamore.llms.gemini import Gemini, GeminiModels # Register the model constructors. MODELS: Dict[str, Callable[..., LLM]] = {} @@ -16,6 +17,7 @@ MODELS.update( {f"anthropic.{model.value}": lambda **kwargs: Anthropic(model.value, **kwargs) for model in AnthropicModels} ) +MODELS.update({f"gemini.{model.value}": lambda **kwargs: Gemini(model.value.name, **kwargs) for model in GeminiModels}) def get_llm(model_name: str) -> Callable[..., LLM]: @@ -38,4 +40,6 @@ def get_llm(model_name: str) -> Callable[..., LLM]: "BedrockModels", "Anthropic", "AnthropicModels", + "Gemini", + "GeminiModels", ] diff --git a/lib/sycamore/sycamore/llms/gemini.py b/lib/sycamore/sycamore/llms/gemini.py new file mode 100644 index 000000000..b4dc14607 --- /dev/null +++ b/lib/sycamore/sycamore/llms/gemini.py @@ -0,0 +1,136 @@ +from dataclasses import dataclass +import datetime +from enum import Enum +from typing import Any, Optional, Union +import os + +from sycamore.llms.llms import LLM +from sycamore.llms.prompts.prompts import RenderedPrompt +from sycamore.utils.cache import Cache +from sycamore.utils.import_utils import requires_modules + +DEFAULT_MAX_TOKENS = 1024 + + +@dataclass +class GeminiModel: + name: str + is_chat: bool = False + + +class GeminiModels(Enum): + """Represents available Gemini models. More info: https://googleapis.github.io/python-genai/""" + + # Note that the models available on a given Gemini account may vary. + GEMINI_2_FLASH = GeminiModel(name="gemini-2.0-flash-exp", is_chat=True) + GEMINI_2_FLASH_LITE = GeminiModel(name="gemini-2.0-flash-lite-preview-02-05", is_chat=True) + GEMINI_2_FLASH_THINKING = GeminiModel(name="gemini-2.0-flash-thinking-exp", is_chat=True) + GEMINI_2_PRO = GeminiModel(name="gemini-2.0-pro-exp", is_chat=True) + + @classmethod + def from_name(cls, name: str): + for m in iter(cls): + if m.value.name == name: + return m + return None + + +class Gemini(LLM): + """This is an LLM implementation that uses the Google Gemini API to generate text. + + Args: + model_name: The name of the Gemini model to use. + cache: A cache object to use for caching results. + """ + + @requires_modules("google.genai", extra="google-genai") + def __init__( + self, + model_name: Union[GeminiModels, str], + cache: Optional[Cache] = None, + api_key: Optional[str] = None, + ): + from google.genai import Client + + self.model_name = model_name + + if isinstance(model_name, GeminiModels): + self.model = model_name.value + elif isinstance(model_name, str): + self.model = GeminiModel(name=model_name) + api_key = api_key if api_key else os.getenv("GEMINI_API_KEY") + self._client = Client(api_key=api_key) + super().__init__(self.model.name, cache) + + def __reduce__(self): + def deserializer(kwargs): + return Gemini(**kwargs) + + kwargs = {"model_name": self.model_name, "cache": self._cache} + return deserializer, (kwargs,) + + def is_chat_mode(self) -> bool: + """Returns True if the LLM is in chat mode, False otherwise.""" + return True + + def get_generate_kwargs(self, prompt: RenderedPrompt, llm_kwargs: Optional[dict] = None) -> dict: + from google.genai import types + + kwargs: dict[str, Any] = {} + config = { + "temperature": 0, + "candidate_count": 1, + **(llm_kwargs or {}), + } + config["max_output_tokens"] = config.get("max_output_tokens", DEFAULT_MAX_TOKENS) + if prompt.response_format: + config["response_mime_type"] = "application/json" + config["response_schema"] = prompt.response_format + content_list = [] + for message in prompt.messages: + if message.role == "system": + config["system_message"] = message.content + continue + role = "model" if message.role == "assistant" else "user" + content = types.Content(parts=[types.Part.from_text(text=message.content)], role=role) + if message.images: + for image in message.images: + image_bytes = image.convert("RGB").tobytes() + content.parts.append(types.Part.from_bytes(image_bytes, media_type="image/png")) + content_list.append(content) + kwargs["config"] = None + if config: + kwargs["config"] = types.GenerateContentConfig(**config) + kwargs["content"] = content + return kwargs + + def generate_metadata(self, *, prompt: RenderedPrompt, llm_kwargs: Optional[dict] = None) -> dict: + ret = self._llm_cache_get(prompt, llm_kwargs) + if isinstance(ret, dict): + return ret + assert ret is None + + kwargs = self.get_generate_kwargs(prompt, llm_kwargs) + + start = datetime.datetime.now() + response = self._client.models.generate_content( + model=self.model.name, contents=kwargs["content"], config=kwargs["config"] + ) + wall_latency = datetime.datetime.now() - start + md = response.usage_metadata + in_tokens = int(md.prompt_token_count) if md and md.prompt_token_count else 0 + out_tokens = int(md.candidates_token_count) if md and md.candidates_token_count else 0 + output = " ".join(part.text if part else "" for part in response.candidates[0].content.parts) + ret = { + "output": output, + "wall_latency": wall_latency, + "in_tokens": in_tokens, + "out_tokens": out_tokens, + } + self.add_llm_metadata(kwargs, output, wall_latency, in_tokens, out_tokens) + self._llm_cache_set(prompt, llm_kwargs, ret) + return ret + + def generate(self, *, prompt: RenderedPrompt, llm_kwargs: Optional[dict] = None) -> str: + d = self.generate_metadata(prompt=prompt, llm_kwargs=llm_kwargs) + return d["output"] diff --git a/lib/sycamore/sycamore/tests/integration/llms/test_gemini.py b/lib/sycamore/sycamore/tests/integration/llms/test_gemini.py new file mode 100644 index 000000000..1994ca447 --- /dev/null +++ b/lib/sycamore/sycamore/tests/integration/llms/test_gemini.py @@ -0,0 +1,153 @@ +from pathlib import Path +from typing import Any +import base64 +import pickle + +from sycamore.llms import Gemini, GeminiModels +from sycamore.llms.prompts.prompts import RenderedPrompt, RenderedMessage +from sycamore.utils.cache import DiskCache + + +def cacheget(cache: DiskCache, key: str): + hit = cache.get(key) + return pickle.loads(base64.b64decode(hit)) # type: ignore + + +def cacheset(cache: DiskCache, key: str, data: Any): + databytes = pickle.dumps(data) + cache.set(key, base64.b64encode(databytes).decode("utf-8")) + + +def test_gemini_defaults(): + llm = Gemini(GeminiModels.GEMINI_2_FLASH) + prompt = RenderedPrompt( + messages=[RenderedMessage(role="user", content="Write a limerick about large language models.")] + ) + + res = llm.generate(prompt=prompt, llm_kwargs={}) + + assert len(res) > 0 + + +def test_gemini_messages_defaults(): + llm = Gemini(GeminiModels.GEMINI_2_FLASH) + messages = [ + RenderedMessage( + role="user", + content="Write a caption for a recent trip to a sunny beach", + ), + ] + prompt = RenderedPrompt(messages=messages) + + res = llm.generate(prompt=prompt, llm_kwargs={}) + + assert len(res) > 0 + + +def test_cached_gemini(tmp_path: Path): + cache = DiskCache(str(tmp_path)) + llm = Gemini(GeminiModels.GEMINI_2_FLASH, cache=cache) + prompt = RenderedPrompt( + messages=[RenderedMessage(role="user", content="Write a limerick about large language models.")] + ) + + # pylint: disable=protected-access + key = llm._llm_cache_key(prompt, {}) + + res = llm.generate(prompt=prompt, llm_kwargs={}) + + # assert result is cached + assert cacheget(cache, key).get("result")["output"] == res + assert cacheget(cache, key).get("prompt") == prompt + assert cacheget(cache, key).get("prompt.response_format") is None + assert cacheget(cache, key).get("llm_kwargs") == {} + assert cacheget(cache, key).get("model_name") == GeminiModels.GEMINI_2_FLASH.value.name + + # assert llm.generate is using cached result + custom_output: dict[str, Any] = { + "result": {"output": "This is a custom response"}, + "prompt": prompt, + "prompt.response_format": None, + "llm_kwargs": {}, + "model_name": GeminiModels.GEMINI_2_FLASH.value.name, + } + cacheset(cache, key, custom_output) + + assert llm.generate(prompt=prompt, llm_kwargs={}) == custom_output["result"]["output"] + + +def test_cached_gemini_different_prompts(tmp_path: Path): + cache = DiskCache(str(tmp_path)) + llm = Gemini(GeminiModels.GEMINI_2_FLASH, cache=cache) + prompt_1 = RenderedPrompt( + messages=[RenderedMessage(role="user", content="Write a limerick about large language models.")] + ) + prompt_2 = RenderedPrompt( + messages=[RenderedMessage(role="user", content="Write a short limerick about large language models.")] + ) + prompt_3 = RenderedPrompt( + messages=[RenderedMessage(role="user", content="Write a poem about large language models.")] + ) + prompt_4 = RenderedPrompt( + messages=[RenderedMessage(role="user", content="Write a short poem about large language models.")] + ) + + key_1 = llm._llm_cache_key(prompt_1, {}) + key_2 = llm._llm_cache_key(prompt_2, {}) + key_3 = llm._llm_cache_key(prompt_3, {}) + key_4 = llm._llm_cache_key(prompt_4, {}) + keys = [key_1, key_2, key_3, key_4] + + assert len(keys) == len( + set(keys) + ), f""" + Cached query keys are not unique: + key_1: {key_1} + key_2: {key_2} + key_3: {key_3} + key_4: {key_4} + """ + + +def test_cached_gemini_different_models(tmp_path: Path): + cache = DiskCache(str(tmp_path)) + llm_FLASH = Gemini(GeminiModels.GEMINI_2_FLASH, cache=cache) + llm_FLASH_LITE = Gemini(GeminiModels.GEMINI_2_FLASH_LITE, cache=cache) + + prompt = RenderedPrompt( + messages=[RenderedMessage(role="user", content="Write a limerick about large language models.")] + ) + + # populate cache + key_FLASH = llm_FLASH._llm_cache_key(prompt, {}) + res_FLASH = llm_FLASH.generate(prompt=prompt, llm_kwargs={}) + key_FLASH_LITE = llm_FLASH_LITE._llm_cache_key(prompt, {}) + res_FLASH_LITE = llm_FLASH_LITE.generate(prompt=prompt, llm_kwargs={}) + + # check proper cached results + assert cacheget(cache, key_FLASH_LITE).get("result")["output"] == res_FLASH_LITE + assert cacheget(cache, key_FLASH_LITE).get("prompt") == prompt + assert cacheget(cache, key_FLASH_LITE).get("llm_kwargs") == {} + assert cacheget(cache, key_FLASH_LITE).get("model_name") == GeminiModels.GEMINI_2_FLASH_LITE.value.name + assert cacheget(cache, key_FLASH).get("result")["output"] == res_FLASH + assert cacheget(cache, key_FLASH).get("prompt") == prompt + assert cacheget(cache, key_FLASH).get("llm_kwargs") == {} + assert cacheget(cache, key_FLASH).get("model_name") == GeminiModels.GEMINI_2_FLASH.value.name + + # check for difference with model change + assert key_FLASH != key_FLASH_LITE + assert res_FLASH != res_FLASH_LITE + + +def test_metadata(): + llm = Gemini(GeminiModels.GEMINI_2_FLASH) + prompt = RenderedPrompt( + messages=[RenderedMessage(role="user", content="Write a limerick about large language models.")] + ) + + res = llm.generate_metadata(prompt=prompt, llm_kwargs={}) + + assert "output" in res + assert "wall_latency" in res + assert "in_tokens" in res + assert "out_tokens" in res diff --git a/lib/sycamore/sycamore/transforms/summarize_images.py b/lib/sycamore/sycamore/transforms/summarize_images.py index 7be07fb40..dfe237211 100644 --- a/lib/sycamore/sycamore/transforms/summarize_images.py +++ b/lib/sycamore/sycamore/transforms/summarize_images.py @@ -1,10 +1,11 @@ from typing import Optional +from PIL import Image from sycamore.data import Document, Element -from sycamore.llms.openai import LLM, OpenAI, OpenAIClientWrapper, OpenAIModels +from sycamore.llms import LLM, OpenAI, OpenAIClientWrapper, OpenAIModels, Gemini, GeminiModels from sycamore.llms.prompts.default_prompts import SummarizeImagesJinjaPrompt -from sycamore.llms.prompts.prompts import SycamorePrompt +from sycamore.llms.prompts.prompts import SycamorePrompt, RenderedMessage, RenderedPrompt from sycamore.plan_nodes import Node from sycamore.transforms.base import CompositeTransform from sycamore.transforms.base_llm import LLMMapElements @@ -81,6 +82,23 @@ def __init__(self, llm: LLM, prompt: Optional[str] = None, include_context: bool self.prompt = prompt self.include_context = include_context + def summarize_image(self, image: Image.Image, context: Optional[str]) -> str: + """Summarize the image using the LLM. Helper method to use this class without creating an instance. + + Args: + image: The image to summarize. + context: The context to use for summarization. + + Returns: + The summarized image as a string. + """ + messages = [] + if context is not None: + messages = [RenderedMessage(role="system", content=context)] + messages.append(RenderedMessage(role="user", content=self.prompt, images=[image])) + + return self.llm.generate(prompt=RenderedPrompt(messages=messages)) + class OpenAIImageSummarizer(LLMImageSummarizer): """Implementation of the LLMImageSummarizer for OpenAI models. @@ -110,6 +128,28 @@ def __init__( super().__init__(llm=openai, prompt=prompt, include_context=include_context) +class GeminiImageSummarizer(LLMImageSummarizer): + """Implementation of the LLMImageSummarizer for Gemini models. + + Args: + gemini_model: The Gemini instance to use. If not set, one will be created. + prompt: The prompt to use to pass to the model, as a string. + include_context: Whether to include the immediately preceding and following text elements as context. + """ + + model = GeminiModels.GEMINI_2_FLASH + + def __init__( + self, + gemini_model: Optional[Gemini] = None, + prompt: Optional[str] = None, + include_context: bool = True, + ): + if gemini_model is None: + gemini_model = Gemini(model_name=self.model) + super().__init__(llm=gemini_model, prompt=prompt, include_context=include_context) + + class SummarizeImages(CompositeTransform): """SummarizeImages is a transform for summarizing context into text using an LLM. diff --git a/poetry.lock b/poetry.lock index 26fb92ed7..5fc4c37ab 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2735,6 +2735,23 @@ files = [ [package.extras] testing = ["pytest"] +[[package]] +name = "google-genai" +version = "1.2.0" +description = "GenAI Python SDK" +optional = false +python-versions = ">=3.9" +files = [ + {file = "google_genai-1.2.0-py3-none-any.whl", hash = "sha256:609d61bee73f1a6ae5b47e9c7dd4b469d50318f050c5ceacf835b0f80f79d2d9"}, +] + +[package.dependencies] +google-auth = ">=2.14.1,<3.0.0dev" +pydantic = ">=2.0.0,<3.0.0dev" +requests = ">=2.28.1,<3.0.0dev" +typing-extensions = ">=4.11.0,<5.0.0dev" +websockets = ">=13.0,<15.0dev" + [[package]] name = "google-resumable-media" version = "2.7.2" @@ -8668,6 +8685,7 @@ elasticsearch = {version = "8.14.0", optional = true} fasteners = "^0.19" fsspec = "2024.2.0" furo = {version = "^2023.9.10", optional = true} +google-genai = {version = "^1.0", optional = true} jinja2 = "^3.1.5" myst-parser = {version = "^2.0.0", optional = true} nanoid = "^2.0.0" @@ -8714,6 +8732,7 @@ docs = ["furo (>=2023.9.10,<2024.0.0)", "myst-parser (>=2.0.0,<3.0.0)", "sphinx duckdb = ["duckdb (>=1.1.1,<2.0.0)"] elasticsearch = ["elasticsearch (==8.14.0)"] eval = ["apted (>=1.0.3,<2.0.0)", "datasets (>=2.16.1,<3.0.0)", "rouge (>=1.0.1,<2.0.0)"] +google-genai = ["google-genai (>=1.0,<2.0)"] legacy-partitioners = ["nltk (>=3.9.0,<4.0.0)", "python-pptx (>=0.6.22,<0.7.0)", "unstructured (==0.10.20)"] local-inference = ["easyocr (>=1.7.1,<2.0.0)", "paddleocr (>=2.8.1,<3.0.0)", "pdfminer-six (==20221105)", "pytesseract (>=0.3.10,<0.4.0)", "sentence-transformers (>=3.0.1,<4.0.0)", "timm (>=0.9.12,<0.10.0)", "torch (>=2.3.0,<3.0.0)", "torchvision (>=0.18.1,<0.19.0)", "transformers (>=4.43.1,<5.0.0)"] neo4j = ["neo4j (>=5.21.0,<6.0.0)"] @@ -9747,6 +9766,84 @@ docs = ["Sphinx (>=6.0)", "myst-parser (>=2.0.0)", "sphinx-rtd-theme (>=1.1.0)"] optional = ["python-socks", "wsaccel"] test = ["websockets"] +[[package]] +name = "websockets" +version = "14.2" +description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" +optional = false +python-versions = ">=3.9" +files = [ + {file = "websockets-14.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e8179f95323b9ab1c11723e5d91a89403903f7b001828161b480a7810b334885"}, + {file = "websockets-14.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0d8c3e2cdb38f31d8bd7d9d28908005f6fa9def3324edb9bf336d7e4266fd397"}, + {file = "websockets-14.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:714a9b682deb4339d39ffa674f7b674230227d981a37d5d174a4a83e3978a610"}, + {file = "websockets-14.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2e53c72052f2596fb792a7acd9704cbc549bf70fcde8a99e899311455974ca3"}, + {file = "websockets-14.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e3fbd68850c837e57373d95c8fe352203a512b6e49eaae4c2f4088ef8cf21980"}, + {file = "websockets-14.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b27ece32f63150c268593d5fdb82819584831a83a3f5809b7521df0685cd5d8"}, + {file = "websockets-14.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4daa0faea5424d8713142b33825fff03c736f781690d90652d2c8b053345b0e7"}, + {file = "websockets-14.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:bc63cee8596a6ec84d9753fd0fcfa0452ee12f317afe4beae6b157f0070c6c7f"}, + {file = "websockets-14.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7a570862c325af2111343cc9b0257b7119b904823c675b22d4ac547163088d0d"}, + {file = "websockets-14.2-cp310-cp310-win32.whl", hash = "sha256:75862126b3d2d505e895893e3deac0a9339ce750bd27b4ba515f008b5acf832d"}, + {file = "websockets-14.2-cp310-cp310-win_amd64.whl", hash = "sha256:cc45afb9c9b2dc0852d5c8b5321759cf825f82a31bfaf506b65bf4668c96f8b2"}, + {file = "websockets-14.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3bdc8c692c866ce5fefcaf07d2b55c91d6922ac397e031ef9b774e5b9ea42166"}, + {file = "websockets-14.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c93215fac5dadc63e51bcc6dceca72e72267c11def401d6668622b47675b097f"}, + {file = "websockets-14.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1c9b6535c0e2cf8a6bf938064fb754aaceb1e6a4a51a80d884cd5db569886910"}, + {file = "websockets-14.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a52a6d7cf6938e04e9dceb949d35fbdf58ac14deea26e685ab6368e73744e4c"}, + {file = "websockets-14.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9f05702e93203a6ff5226e21d9b40c037761b2cfb637187c9802c10f58e40473"}, + {file = "websockets-14.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22441c81a6748a53bfcb98951d58d1af0661ab47a536af08920d129b4d1c3473"}, + {file = "websockets-14.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:efd9b868d78b194790e6236d9cbc46d68aba4b75b22497eb4ab64fa640c3af56"}, + {file = "websockets-14.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1a5a20d5843886d34ff8c57424cc65a1deda4375729cbca4cb6b3353f3ce4142"}, + {file = "websockets-14.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:34277a29f5303d54ec6468fb525d99c99938607bc96b8d72d675dee2b9f5bf1d"}, + {file = "websockets-14.2-cp311-cp311-win32.whl", hash = "sha256:02687db35dbc7d25fd541a602b5f8e451a238ffa033030b172ff86a93cb5dc2a"}, + {file = "websockets-14.2-cp311-cp311-win_amd64.whl", hash = "sha256:862e9967b46c07d4dcd2532e9e8e3c2825e004ffbf91a5ef9dde519ee2effb0b"}, + {file = "websockets-14.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1f20522e624d7ffbdbe259c6b6a65d73c895045f76a93719aa10cd93b3de100c"}, + {file = "websockets-14.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:647b573f7d3ada919fd60e64d533409a79dcf1ea21daeb4542d1d996519ca967"}, + {file = "websockets-14.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6af99a38e49f66be5a64b1e890208ad026cda49355661549c507152113049990"}, + {file = "websockets-14.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:091ab63dfc8cea748cc22c1db2814eadb77ccbf82829bac6b2fbe3401d548eda"}, + {file = "websockets-14.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b374e8953ad477d17e4851cdc66d83fdc2db88d9e73abf755c94510ebddceb95"}, + {file = "websockets-14.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a39d7eceeea35db85b85e1169011bb4321c32e673920ae9c1b6e0978590012a3"}, + {file = "websockets-14.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0a6f3efd47ffd0d12080594f434faf1cd2549b31e54870b8470b28cc1d3817d9"}, + {file = "websockets-14.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:065ce275e7c4ffb42cb738dd6b20726ac26ac9ad0a2a48e33ca632351a737267"}, + {file = "websockets-14.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e9d0e53530ba7b8b5e389c02282f9d2aa47581514bd6049d3a7cffe1385cf5fe"}, + {file = "websockets-14.2-cp312-cp312-win32.whl", hash = "sha256:20e6dd0984d7ca3037afcb4494e48c74ffb51e8013cac71cf607fffe11df7205"}, + {file = "websockets-14.2-cp312-cp312-win_amd64.whl", hash = "sha256:44bba1a956c2c9d268bdcdf234d5e5ff4c9b6dc3e300545cbe99af59dda9dcce"}, + {file = "websockets-14.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6f1372e511c7409a542291bce92d6c83320e02c9cf392223272287ce55bc224e"}, + {file = "websockets-14.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4da98b72009836179bb596a92297b1a61bb5a830c0e483a7d0766d45070a08ad"}, + {file = "websockets-14.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f8a86a269759026d2bde227652b87be79f8a734e582debf64c9d302faa1e9f03"}, + {file = "websockets-14.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:86cf1aaeca909bf6815ea714d5c5736c8d6dd3a13770e885aafe062ecbd04f1f"}, + {file = "websockets-14.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9b0f6c3ba3b1240f602ebb3971d45b02cc12bd1845466dd783496b3b05783a5"}, + {file = "websockets-14.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:669c3e101c246aa85bc8534e495952e2ca208bd87994650b90a23d745902db9a"}, + {file = "websockets-14.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eabdb28b972f3729348e632ab08f2a7b616c7e53d5414c12108c29972e655b20"}, + {file = "websockets-14.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2066dc4cbcc19f32c12a5a0e8cc1b7ac734e5b64ac0a325ff8353451c4b15ef2"}, + {file = "websockets-14.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ab95d357cd471df61873dadf66dd05dd4709cae001dd6342edafc8dc6382f307"}, + {file = "websockets-14.2-cp313-cp313-win32.whl", hash = "sha256:a9e72fb63e5f3feacdcf5b4ff53199ec8c18d66e325c34ee4c551ca748623bbc"}, + {file = "websockets-14.2-cp313-cp313-win_amd64.whl", hash = "sha256:b439ea828c4ba99bb3176dc8d9b933392a2413c0f6b149fdcba48393f573377f"}, + {file = "websockets-14.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7cd5706caec1686c5d233bc76243ff64b1c0dc445339bd538f30547e787c11fe"}, + {file = "websockets-14.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ec607328ce95a2f12b595f7ae4c5d71bf502212bddcea528290b35c286932b12"}, + {file = "websockets-14.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:da85651270c6bfb630136423037dd4975199e5d4114cae6d3066641adcc9d1c7"}, + {file = "websockets-14.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3ecadc7ce90accf39903815697917643f5b7cfb73c96702318a096c00aa71f5"}, + {file = "websockets-14.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1979bee04af6a78608024bad6dfcc0cc930ce819f9e10342a29a05b5320355d0"}, + {file = "websockets-14.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dddacad58e2614a24938a50b85969d56f88e620e3f897b7d80ac0d8a5800258"}, + {file = "websockets-14.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:89a71173caaf75fa71a09a5f614f450ba3ec84ad9fca47cb2422a860676716f0"}, + {file = "websockets-14.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:6af6a4b26eea4fc06c6818a6b962a952441e0e39548b44773502761ded8cc1d4"}, + {file = "websockets-14.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:80c8efa38957f20bba0117b48737993643204645e9ec45512579132508477cfc"}, + {file = "websockets-14.2-cp39-cp39-win32.whl", hash = "sha256:2e20c5f517e2163d76e2729104abc42639c41cf91f7b1839295be43302713661"}, + {file = "websockets-14.2-cp39-cp39-win_amd64.whl", hash = "sha256:b4c8cef610e8d7c70dea92e62b6814a8cd24fbd01d7103cc89308d2bfe1659ef"}, + {file = "websockets-14.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:d7d9cafbccba46e768be8a8ad4635fa3eae1ffac4c6e7cb4eb276ba41297ed29"}, + {file = "websockets-14.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:c76193c1c044bd1e9b3316dcc34b174bbf9664598791e6fb606d8d29000e070c"}, + {file = "websockets-14.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd475a974d5352390baf865309fe37dec6831aafc3014ffac1eea99e84e83fc2"}, + {file = "websockets-14.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2c6c0097a41968b2e2b54ed3424739aab0b762ca92af2379f152c1aef0187e1c"}, + {file = "websockets-14.2-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d7ff794c8b36bc402f2e07c0b2ceb4a2424147ed4785ff03e2a7af03711d60a"}, + {file = "websockets-14.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:dec254fcabc7bd488dab64846f588fc5b6fe0d78f641180030f8ea27b76d72c3"}, + {file = "websockets-14.2-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:bbe03eb853e17fd5b15448328b4ec7fb2407d45fb0245036d06a3af251f8e48f"}, + {file = "websockets-14.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a3c4aa3428b904d5404a0ed85f3644d37e2cb25996b7f096d77caeb0e96a3b42"}, + {file = "websockets-14.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:577a4cebf1ceaf0b65ffc42c54856214165fb8ceeba3935852fc33f6b0c55e7f"}, + {file = "websockets-14.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ad1c1d02357b7665e700eca43a31d52814ad9ad9b89b58118bdabc365454b574"}, + {file = "websockets-14.2-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f390024a47d904613577df83ba700bd189eedc09c57af0a904e5c39624621270"}, + {file = "websockets-14.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:3c1426c021c38cf92b453cdf371228d3430acd775edee6bac5a4d577efc72365"}, + {file = "websockets-14.2-py3-none-any.whl", hash = "sha256:7a6ceec4ea84469f15cf15807a747e9efe57e369c384fa86e022b3bea679b79b"}, + {file = "websockets-14.2.tar.gz", hash = "sha256:5059ed9c54945efb321f097084b4c7e52c246f2c869815876a69d1efc4ad6eb5"}, +] + [[package]] name = "wget" version = "3.2" @@ -10174,4 +10271,4 @@ docs = ["furo", "myst-parser", "sphinx"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.9.7 || >3.9.7,<3.13" -content-hash = "3d009dae5d60345a770686b4bb252378c4be106e06b0050f07200e8d3d63daef" +content-hash = "0768fecd2fa721235599a3bb1c07d636b71f7da568ccee6b09e816a54337a4e3" diff --git a/pyproject.toml b/pyproject.toml index 1434984cb..fad9d629c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ pytest = "^7.4" pytest-mock = "^3.11" [tool.poetry.group.dev.dependencies] -sycamore-ai = { path = "lib/sycamore", develop = true, extras=["eval", "duckdb", "elasticsearch", "neo4j", "opensearch", "pinecone", "weaviate", "local-inference", "legacy-partitioners", "anthropic"] } +sycamore-ai = { path = "lib/sycamore", develop = true, extras=["eval", "duckdb", "elasticsearch", "neo4j", "opensearch", "pinecone", "weaviate", "local-inference", "legacy-partitioners", "anthropic", "google-genai"] } remote-processors = { path = "lib/remote-processors", develop = true } integration = { path = "apps/integration", develop = true } sycamore-poetry-lock = { path = "lib/poetry-lock", develop = true }