diff --git a/.vscode/settings.json b/.vscode/settings.json index 2127f57..bdd09f3 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,13 +1,14 @@ { - "python.formatting.provider": "black", - "python.linting.enabled": true, - "python.linting.flake8Enabled": true, - "python.linting.flake8Args": [ - "--max-line-length=88" - ], + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter", + "editor.formatOnSave": true + }, + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, "python.testing.pytestArgs": [ "tests" ], - "python.testing.unittestEnabled": false, - "python.testing.pytestEnabled": true, + "flake8.args": [ + "--max-line-length=120" + ] } \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 065ddde..4e4f1b5 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -6,7 +6,16 @@ To try this out locally: - `source .venv/bin/activate` to use that virtual environment. - `poetry install` to install project dependencies. -VSCode is recommended for development. You can try [these instructions](https://www.pythoncheatsheet.org/blog/python-projects-with-poetry-and-vscode-part-1) +VSCode is recommended for development. +- For formatting, the project uses the [Black](https://github.com/psf/black) code formatter, and the +[Black Formatter VSCode extension](https://marketplace.visualstudio.com/items?itemName=ms-python.black-formatter) +is recommended. +- For linting, the project uses the [Flake8](https://flake8.pycqa.org/en/latest/) linter and the +[Flake8 extension](https://marketplace.visualstudio.com/items?itemName=ms-python.flake8) is recommended. +These tools are included in the project by pyproject.toml and the settings are in .vscode/settings.json. + +You can also get additional information at +[these instructions](https://www.pythoncheatsheet.org/blog/python-projects-with-poetry-and-vscode-part-1) for getting setup in VSCode with linting and formatting enabled. ## Running the tests @@ -67,6 +76,18 @@ project's documentation: python -i shell/docs.py + +## Testing updates in a different local project +If you are using this in another project and making changes for it, use the following command to make the +changes to this local project immediately reflected in a dependent project: +```poetry add /marklogic-python-client/``` + +Using this method will allow you to very easily test changes to this project, in a different local project. + +Keep in mind that you probably do not want to check that version of the pyproject.toml file into version +control since it is only useful locally. + + ## Testing the documentation locally The docs for this project are stored in the `./docs` directory as a set of Markdown files. These are published via diff --git a/Jenkinsfile b/Jenkinsfile new file mode 100644 index 0000000..22f33da --- /dev/null +++ b/Jenkinsfile @@ -0,0 +1,39 @@ +@Library('shared-libraries') _ +pipeline{ + agent none; + environment{ + JAVA_HOME_DIR="/home/builder/java/jdk-11.0.2" + GRADLE_DIR =".gradle" + } + options { + checkoutToSubdirectory 'marklogic-python-client' + buildDiscarder logRotator(artifactDaysToKeepStr: '7', artifactNumToKeepStr: '', daysToKeepStr: '30', numToKeepStr: '') + } + stages{ + stage('tests'){ + agent {label 'devExpLinuxPool'} + steps{ + script{ + copyRPM 'Latest','11' + setUpML '$WORKSPACE/xdmp/src/Mark*.rpm' + sh label:'deploy project', script: '''#!/bin/bash + export JAVA_HOME=$JAVA_HOME_DIR + export GRADLE_USER_HOME=$WORKSPACE/$GRADLE_DIR + export PATH=$GRADLE_USER_HOME:$JAVA_HOME/bin:$PATH + cd marklogic-python-client/test-app + ./gradlew -i mlDeploy -PmlPassword=admin + ''' + sh label:'Run tests', script: '''#!/bin/bash + cd marklogic-python-client + python -m venv .venv; + source .venv/bin/activate; + pip install poetry; + poetry install; + pytest --junitxml=TestReport.xml || true + ''' + junit 'marklogic-python-client/TestReport.xml' + } + } + } + } +} \ No newline at end of file diff --git a/docs/creating-client.md b/docs/creating-client.md index 35ee844..b8fee41 100644 --- a/docs/creating-client.md +++ b/docs/creating-client.md @@ -11,6 +11,12 @@ The `Client` class is the primary API to interact with in the MarkLogic Python c found in both the `Session` class and the `requests` API. You can therefore use a `Client` object in the same manner as you'd use either the `Session` class or the `requests` API. +## Table of contents +{: .no_toc .text-delta } + +- TOC +{:toc} + ## Creating a client A `Client` instance can be created either by providing a base URL for all requests along with authentication: diff --git a/docs/eval.md b/docs/eval.md index 90eab98..b701baa 100644 --- a/docs/eval.md +++ b/docs/eval.md @@ -9,6 +9,12 @@ execution of custom code, whether via an inline script or an existing module in The MarkLogic Python client supports execution of custom code by simplifying the submission of custom code and converting the multipart response into more useful Python data types. +## Table of contents +{: .no_toc .text-delta } + +- TOC +{:toc} + ## Setup The examples below all depend on the instructions in the [setup guide](example-setup.md) having already been performed. @@ -117,3 +123,17 @@ processing of the response or debugging requests. The `client.eval` and `client.invoke` functions both support referencing a [REST API transaction](https://docs.marklogic.com/REST/client/transaction-management) via the `tx` argument. See [the guide on transactions](transactions.md) for further information. + +## Providing additional arguments + +The `client.eval` and `client.invoke` methods each provide a `**kwargs` argument, so you can pass in any other arguments you would +normally pass to `requests`. For example: + +``` +client.eval(javascript="fn.currentDateTime()", params={"database": "Documents"}) +client.invoke("/sample.sjs", params={"database": "Documents"}) +``` + +Please see [the eval endpoint documentation](https://docs.marklogic.com/REST/POST/v1/eval) +and [the invoke endpoint documentation](https://docs.marklogic.com/REST/POST/v1/invoke) for +information on additional parameters. diff --git a/docs/managing-documents/reading.md b/docs/managing-documents/reading.md index a9e2ee4..428a385 100644 --- a/docs/managing-documents/reading.md +++ b/docs/managing-documents/reading.md @@ -10,6 +10,12 @@ The [GET /v1/documents](https://docs.marklogic.com/REST/GET/v1/documents) endpoi reading multiple documents with metadata via a multipart/mixed HTTP response. The MarkLogic Python client simplifies handling the response by converting it into a list of `Document` instances via the `client.documents.read` method. +## Table of contents +{: .no_toc .text-delta } + +- TOC +{:toc} + ## Setup for examples The examples below all assume that you have created a new MarkLogic user named "python-user" as described in the diff --git a/docs/managing-documents/searching.md b/docs/managing-documents/searching.md index 64d026d..5396970 100644 --- a/docs/managing-documents/searching.md +++ b/docs/managing-documents/searching.md @@ -12,6 +12,12 @@ returning content and metadata for each matching document. Similar to reading mu HTTP response. The MarkLogic Python client simplifies use of this operation by returning a list of `Document` instances via the `client.documents.search` method. +## Table of contents +{: .no_toc .text-delta } + +- TOC +{:toc} + ## Setup for examples The examples below all assume that you have created a new MarkLogic user named "python-user" as described in the diff --git a/docs/managing-documents/writing.md b/docs/managing-documents/writing.md index 10d208f..fb4f8da 100644 --- a/docs/managing-documents/writing.md +++ b/docs/managing-documents/writing.md @@ -11,6 +11,14 @@ writing multiple documents with metadata via a multipart HTTP request. The MarkL simplifies the use of this endpoint via the `client.documents.write` method and the `Document` class. +## Table of contents +{: .no_toc .text-delta } + +- TOC +{:toc} + +## Setup + The examples below all assume that you have created a new MarkLogic user named "python-user" as described in the [setup guide](../example-setup.md). In addition, each of the examples below requires the following `Client` instance to be created first: diff --git a/docs/rows.md b/docs/rows.md index 052ae78..98616b7 100644 --- a/docs/rows.md +++ b/docs/rows.md @@ -9,6 +9,12 @@ The [MarkLogic REST rows service](https://docs.marklogic.com/REST/client/row-man operations for querying for rows via several query languages. The MarkLogic Python client simplifies submitting queries for rows and converting responses into useful data structures. +## Table of contents +{: .no_toc .text-delta } + +- TOC +{:toc} + ## Setup The examples below require documents to be loaded along with a @@ -178,4 +184,18 @@ Printing the `df` object will yield the following: 1 Davis Miles 1926-05-26 2 Armstrong Louis 1901-08-04 3 Coltrane John 1926-09-23 -``` \ No newline at end of file +``` + +## Providing additional arguments + +The `client.rows.query` method provides a `**kwargs` argument, so you can pass in any other arguments you would +normally pass to `requests`. For example: + +``` +response = client.rows.query("op.fromView('example', 'musician')", params={"database": "Documents"}) +``` + +Please see [the rows endpoint documentation](https://docs.marklogic.com/REST/POST/v1/rows) for +information on additional parameters. If you are submitting a GraphQL query, then see +[the GraphQL endpoint documentation](https://docs.marklogic.com/REST/POST/v1/rows/graphql) for +information on parameters for that endpoint. diff --git a/docs/transactions.md b/docs/transactions.md index 585a3e0..335c8df 100644 --- a/docs/transactions.md +++ b/docs/transactions.md @@ -13,6 +13,14 @@ via a `Transaction` class that is also a thereby allowing it to handle committing or rolling back the transaction without any user involvement. +## Table of contents +{: .no_toc .text-delta } + +- TOC +{:toc} + +## Using a transaction + The following example demonstrates writing documents via multiple calls to MarkLogic, all within the same REST API transaction; the example depends on first following the instructions in the [setup guide](example-setup.md): diff --git a/marklogic/documents.py b/marklogic/documents.py index 945f97f..ea6aa71 100644 --- a/marklogic/documents.py +++ b/marklogic/documents.py @@ -1,5 +1,6 @@ import json from collections import OrderedDict +from email.message import Message from typing import Union from marklogic.transactions import Transaction @@ -262,23 +263,29 @@ def _extract_values_from_header(part) -> dict: Returns a dict containing values about the document content or metadata. """ encoding = part.encoding - disposition = part.headers["Content-Disposition".encode(encoding)].decode(encoding) - disposition_values = {} - for item in disposition.split(";"): - tokens = item.split("=") - # The first item will be "attachment" and can be ignored. - if len(tokens) == 2: - disposition_values[tokens[0].strip()] = tokens[1] + disposition = part.headers["Content-Disposition".encode(encoding)].decode( + encoding + ) content_type = None if part.headers.get("Content-Type".encode(encoding)): - content_type = part.headers["Content-Type".encode(encoding)].decode(encoding) + content_type = part.headers["Content-Type".encode(encoding)].decode( + encoding + ) - uri = disposition_values["filename"] - if uri.startswith('"'): - uri = uri[1:] - if uri.endswith('"'): - uri = uri[:-1] + content_disposition_header = part.headers[ + "Content-Disposition".encode(encoding) + ].decode(encoding) + msg = Message() + msg["content-disposition"] = content_disposition_header + uri = msg.get_filename() + + disposition_values = {} + for item in disposition.replace(uri, "").split(";"): + tokens = item.split("=") + key = tokens[0].strip() + if key in ["category", "versionId"]: + disposition_values[key] = tokens[1] return { "uri": uri, diff --git a/marklogic/rows.py b/marklogic/rows.py index 65a74ef..80c8852 100644 --- a/marklogic/rows.py +++ b/marklogic/rows.py @@ -67,6 +67,63 @@ def query( not 2xx, then the entire response is always returned. """ path = "v1/rows/graphql" if graphql else "v1/rows" + return self.__send_request( + path, + dsl, + plan, + sql, + sparql, + graphql, + format, + tx, + return_response, + **kwargs, + ) + + def update( + self, + dsl: str = None, + plan: dict = None, + format: str = "json", + tx: Transaction = None, + return_response: bool = False, + **kwargs, + ): + """ + Sends an update query to an endpoint at the MarkLogic rows service defined at + https://docs.marklogic.com/REST/client/row-management. One of 'dsl' or + 'plan' must be defined. This feature requires the use of MarkLogic version + 11.2 or later. + + For more information about Optic Update and using the Optic DSL, + see https://docs.marklogic.com/guide/app-dev/OpticAPI. + + :param dsl: an Optic DSL query + :param plan: a serialized Optic query + :param tx: optional REST transaction in which to service this request. + :param return_response: boolean specifying if the entire original response + object should be returned (True) or if only the data should be returned (False) + upon a success (2xx) response. Note that if the status code of the response is + not 2xx, then the entire response is always returned. + """ + path = "v1/rows/update" + return self.__send_request( + path, dsl, plan, None, None, None, format, tx, return_response, **kwargs + ) + + def __send_request( + self, + path: str = None, + dsl: str = None, + plan: dict = None, + sql: str = None, + sparql: str = None, + graphql: str = None, + format: str = "json", + tx: Transaction = None, + return_response: bool = False, + **kwargs, + ): headers = kwargs.pop("headers", {}) data = None if graphql: diff --git a/pyproject.toml b/pyproject.toml index 1c5b2d9..1db4b73 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "marklogic-python-client" -version = "1.1.0" +version = "1.1.1" description = "Python client for MarkLogic, built on the requests library" authors = ["MarkLogic "] readme = "README.md" diff --git a/test-app/.gitignore b/test-app/.gitignore index 3efdcd5..fb99400 100644 --- a/test-app/.gitignore +++ b/test-app/.gitignore @@ -1,3 +1,4 @@ .gradle gradle-local.properties build +docker \ No newline at end of file diff --git a/test-app/docker-compose.yml b/test-app/docker-compose.yml new file mode 100644 index 0000000..77703a1 --- /dev/null +++ b/test-app/docker-compose.yml @@ -0,0 +1,18 @@ +version: '3.8' +name: marklogic_python + +services: + + marklogic: + image: "marklogicdb/marklogic-db:11.2.0-centos-1.1.2" + platform: linux/amd64 + environment: + - INSTALL_CONVERTERS=true + - MARKLOGIC_INIT=true + - MARKLOGIC_ADMIN_USERNAME=admin + - MARKLOGIC_ADMIN_PASSWORD=admin + volumes: + - ./docker/marklogic/logs:/var/opt/MarkLogic/Logs + ports: + - "8000-8002:8000-8002" + - "8030-8031:8030-8031" diff --git a/test-app/src/main/ml-data/doc2;copy.xml b/test-app/src/main/ml-data/doc2;copy.xml new file mode 100644 index 0000000..b234976 --- /dev/null +++ b/test-app/src/main/ml-data/doc2;copy.xml @@ -0,0 +1 @@ +semicolon \ No newline at end of file diff --git a/test-app/src/main/ml-data/doc2=copy.xml b/test-app/src/main/ml-data/doc2=copy.xml new file mode 100644 index 0000000..db3647b --- /dev/null +++ b/test-app/src/main/ml-data/doc2=copy.xml @@ -0,0 +1 @@ +equal \ No newline at end of file diff --git a/tests/remove-uri-plan.json b/tests/remove-uri-plan.json new file mode 100644 index 0000000..75fee4a --- /dev/null +++ b/tests/remove-uri-plan.json @@ -0,0 +1,31 @@ +{ + "$optic" : { + "ns" : "op", + "fn" : "operators", + "args" : [ { + "ns" : "op", + "fn" : "from-doc-uris", + "args" : [ { + "ns" : "cts", + "fn" : "document-query", + "args" : [ [ { + "ns" : "xs", + "fn" : "string", + "args" : [ "/temp/doc2.json" ] + } ] ] + }, null ] + }, { + "ns" : "op", + "fn" : "remove", + "args" : [ { + "ns" : "op", + "fn" : "col", + "args" : [ { + "ns" : "xs", + "fn" : "string", + "args" : [ "uri" ] + } ] + } ] + } ] + } +} \ No newline at end of file diff --git a/tests/test_cloud.py b/tests/test_cloud.py index a92fd2e..8cc122d 100644 --- a/tests/test_cloud.py +++ b/tests/test_cloud.py @@ -42,13 +42,13 @@ def test_base_url_used_instead_of_host(cloud_config): def test_invalid_host(): with pytest.raises(ValueError) as err: Client( - host="marklogic.com", + host="localhost", + port=8031, + verify=False, cloud_api_key="doesnt-matter-for-this-test", base_path=DEFAULT_BASE_PATH, ) - assert str(err.value).startswith( - "Unable to generate token; status code: 403; cause: " - ) + assert "Unable to generate token; status code: 401; cause: " in str(err.value) def test_invalid_api_key(cloud_config): diff --git a/tests/test_rows_update.py b/tests/test_rows_update.py new file mode 100644 index 0000000..b98d6b4 --- /dev/null +++ b/tests/test_rows_update.py @@ -0,0 +1,74 @@ +import json +from marklogic.documents import Document + + +def test_update_dsl_fromDocDescriptors(client): + doc_uri = "/temp/doc1.json" + doc_contents = {"hello": "doc1"} + doc_permissions = [ + {"capability": "read", "roleName": "python-tester"}, + {"capability": "update", "roleName": "python-tester"}, + ] + update_query_fromDocDescriptors = f""" + const docDescriptors = [ + {{ + uri:"{doc_uri}", + doc:'{json.dumps(doc_contents)}', + permissions: {json.dumps(doc_permissions)} + }} + ]; + op.fromDocDescriptors(docDescriptors).write() + """ + response = client.rows.update(update_query_fromDocDescriptors, return_response=True) + assert 200 == response.status_code + + docs = client.documents.read([doc_uri]) + doc1 = next(doc for doc in docs if doc.uri == doc_uri) + assert "application/json" == doc1.content_type + assert doc1.version_id is not None + assert doc_contents == doc1.content + + +def test_update_dsl_remove(admin_client): + DEFAULT_PERMS = {"python-tester": ["read", "update"]} + DOC_URI = "/temp/doc1.json" + response = admin_client.documents.write( + [Document(DOC_URI, {"doc": 1}, permissions=DEFAULT_PERMS)] + ) + + update_query_remove = 'op.fromDocUris("' + DOC_URI + '").lockForUpdate().remove()' + response = admin_client.rows.update(update_query_remove, return_response=True) + assert 200 == response.status_code + + docs = admin_client.documents.read([DOC_URI]) + assert 0 == len(docs) + + +def test_update_dsl_wrong_path(admin_client): + DEFAULT_PERMS = {"python-tester": ["read", "update"]} + DOC_URI = "/temp/doc1.json" + response = admin_client.documents.write( + [Document(DOC_URI, {"doc": 1}, permissions=DEFAULT_PERMS)] + ) + + update_query_remove = 'op.fromDocUris("' + DOC_URI + '").lockForUpdate().remove()' + response = admin_client.rows.query(update_query_remove, return_response=True) + assert 500 == response.status_code + assert ( + "Optic plans that perform updates must be sent via HTTP POST to the v1/rows/update endpoint." + in response.content.decode("utf-8") + ) + + +def test_update_via_serialized_plan(client): + DEFAULT_PERMS = {"python-tester": ["read", "update"]} + DOC_URI = "/temp/doc2.json" + client.documents.write([Document(DOC_URI, {"doc": 1}, permissions=DEFAULT_PERMS)]) + docs = client.documents.read(DOC_URI) + assert 1 == len(docs) + + plan = open("tests/remove-uri-plan.json", "rb") + response = client.rows.update(plan=plan, return_response=True) + assert 200 == response.status_code + docs = client.documents.read(DOC_URI) + assert 0 == len(docs) diff --git a/tests/test_search_docs.py b/tests/test_search_docs.py index cb8bdee..b549d3e 100644 --- a/tests/test_search_docs.py +++ b/tests/test_search_docs.py @@ -84,10 +84,11 @@ def test_search_with_original_response(client: Client): def test_collection(client: Client): + docs = client.documents.search( categories=["content", "collections"], collections=["search-test"] ) - assert len(docs) == 2 + assert len(docs) == 4 doc1 = next(doc for doc in docs if doc.uri == "/doc1.json") assert doc1.content is not None @@ -101,6 +102,18 @@ def test_collection(client: Client): assert "test-data" in doc1.collections assert "search-test" in doc1.collections + doc3 = next(doc for doc in docs if doc.uri == "/doc2;copy.xml") + assert doc3.content is not None + assert len(doc3.collections) == 2 + assert "test-data" in doc3.collections + assert "search-test" in doc3.collections + + doc4 = next(doc for doc in docs if doc.uri == "/doc2=copy.xml") + assert doc4.content is not None + assert len(doc4.collections) == 2 + assert "test-data" in doc4.collections + assert "search-test" in doc4.collections + def test_not_rest_user(not_rest_user_client: Client): response: Response = not_rest_user_client.documents.search(q="hello") @@ -109,3 +122,27 @@ def test_not_rest_user(not_rest_user_client: Client): ), """The user does not have the rest-reader privilege, so MarkLogic is expected to return a 403. And the documents.search method is then expected to return the Response so that the user has access to everything in it.""" + + +def test_version_id(client: Client): + equalSignEtag = ( + client.get("v1/documents?uri=/doc2=copy.xml") + .headers["ETag"] + .replace('"', "") + ) + + semicolonEtag = ( + client.get("v1/documents?uri=/doc2;copy.xml") + .headers["ETag"] + .replace('"', "") + ) + + docs = client.documents.search( + categories=["content", "collections"], collections=["search-test"] + ) + + doc1 = next(doc for doc in docs if doc.uri == "/doc2=copy.xml") + assert doc1.version_id == equalSignEtag + + doc2 = next(doc for doc in docs if doc.uri == "/doc2;copy.xml") + assert doc2.version_id == semicolonEtag