diff --git a/docs/source/_build/API_REFERENCE_LINKS.yml b/docs/source/_build/API_REFERENCE_LINKS.yml index f86b46d2bf8e..b5feaf67aff1 100644 --- a/docs/source/_build/API_REFERENCE_LINKS.yml +++ b/docs/source/_build/API_REFERENCE_LINKS.yml @@ -181,6 +181,13 @@ python: write_json: https://docs.pola.rs/api/python/stable/reference/api/polars.DataFrame.write_json.html write_ndjson: https://docs.pola.rs/api/python/stable/reference/api/polars.DataFrame.write_ndjson.html write_parquet: https://docs.pola.rs/api/python/stable/reference/api/polars.DataFrame.write_parquet.html + Workspace: https://docs.cloud.pola.rs/reference/workspace/workspace.html + ComputeContext: https://docs.cloud.pola.rs/reference/compute/compute.html + LazyFrameExt : https://docs.cloud.pola.rs/reference/query/lazyframeext.html + QueryResult : https://docs.cloud.pola.rs/reference/query/query_result.html + InteractiveQuery : https://docs.cloud.pola.rs/reference/query/interactive_query.html + BatchQuery: https://docs.cloud.pola.rs/reference/query/batch_query.html + login: https://docs.cloud.pola.rs/reference/auth/api/polars_cloud.login.html rust: agg: https://docs.rs/polars/latest/polars/prelude/struct.LazyGroupBy.html#method.agg diff --git a/docs/source/_build/css/extra.css b/docs/source/_build/css/extra.css index 4f9cd5638a55..0256afdc1926 100644 --- a/docs/source/_build/css/extra.css +++ b/docs/source/_build/css/extra.css @@ -1,7 +1,5 @@ :root { - --md-primary-fg-color: #0B7189; - --md-primary-fg-color--light: #C2CCD6; - --md-primary-fg-color--dark: #103547; + --md-primary-fg-color: #0075ff; --md-text-font: 'Proxima Nova', sans-serif; } diff --git a/docs/source/api/index.md b/docs/source/api/reference.md similarity index 90% rename from docs/source/api/index.md rename to docs/source/api/reference.md index d0b8e51dbd71..199401411f8c 100644 --- a/docs/source/api/index.md +++ b/docs/source/api/reference.md @@ -1,9 +1,4 @@ ---- -hide: - - navigation ---- - -# API reference +# Reference guide The API reference contains detailed descriptions of all public functions and objects. It's the best place to look if you need information on a specific function. diff --git a/docs/source/polars-cloud/assets/aws-infra.png b/docs/source/polars-cloud/assets/aws-infra.png new file mode 100644 index 000000000000..e0027e7d940a Binary files /dev/null and b/docs/source/polars-cloud/assets/aws-infra.png differ diff --git a/docs/source/polars-cloud/assets/connect-cloud/cloudformation.png b/docs/source/polars-cloud/assets/connect-cloud/cloudformation.png new file mode 100644 index 000000000000..d2c1fb2debd8 Binary files /dev/null and b/docs/source/polars-cloud/assets/connect-cloud/cloudformation.png differ diff --git a/docs/source/polars-cloud/assets/connect-cloud/dashboard.png b/docs/source/polars-cloud/assets/connect-cloud/dashboard.png new file mode 100644 index 000000000000..7b739c763c40 Binary files /dev/null and b/docs/source/polars-cloud/assets/connect-cloud/dashboard.png differ diff --git a/docs/source/polars-cloud/assets/connect-cloud/progress-page.png b/docs/source/polars-cloud/assets/connect-cloud/progress-page.png new file mode 100644 index 000000000000..85706e14b820 Binary files /dev/null and b/docs/source/polars-cloud/assets/connect-cloud/progress-page.png differ diff --git a/docs/source/polars-cloud/assets/connect-cloud/successful-setup.png b/docs/source/polars-cloud/assets/connect-cloud/successful-setup.png new file mode 100644 index 000000000000..254583e31451 Binary files /dev/null and b/docs/source/polars-cloud/assets/connect-cloud/successful-setup.png differ diff --git a/docs/source/polars-cloud/assets/connect-cloud/workspace-naming.png b/docs/source/polars-cloud/assets/connect-cloud/workspace-naming.png new file mode 100644 index 000000000000..ea0b6d72c672 Binary files /dev/null and b/docs/source/polars-cloud/assets/connect-cloud/workspace-naming.png differ diff --git a/docs/source/polars-cloud/cli.md b/docs/source/polars-cloud/cli.md new file mode 100644 index 000000000000..6da6e29bb689 --- /dev/null +++ b/docs/source/polars-cloud/cli.md @@ -0,0 +1,55 @@ +# CLI + +Polars cloud comes with a command line interface (CLI) out of the box. This allows you to interact +with polars cloud resources from the terminal. + +```bash +pc --help +``` + +``` +usage: pc [-h] [-v] [-V] {login,workspace,compute} ... + +positional arguments: +{login,workspace,compute} +login Authenticate with Polars Cloud by logging in through the browser +workspace Manage Polars Cloud workspaces. +compute Manage Polars Cloud compute clusters. + +options: +-h, --help show this help message and exit +-v, --verbose Output debug logging messages. +-V, --version Display the version of the Polars Cloud client. +``` + +### Authentication + +You can authenticate with Polars Cloud from the CLI using + +```bash +pc login +``` + +This refreshes your access token and saves it to disk. + +### Workspaces + +Create and setup a new workspace + +```bash +pc workspace setup +``` + +List all workspaces + +```bash +pc workspace list +``` + +``` +NAME ID STATUS +test-workspace 0194ac0e-5122-7a90-af5e-b1f60b1989f4 Active +polars-ci-2025… 0194287a-e0a5-7642-8058-0f79a39f5b98 Uninitialized +``` + +### Compute diff --git a/docs/source/polars-cloud/connect-cloud.md b/docs/source/polars-cloud/connect-cloud.md new file mode 100644 index 000000000000..ad92335cecc2 --- /dev/null +++ b/docs/source/polars-cloud/connect-cloud.md @@ -0,0 +1,46 @@ +# Connect cloud environment + +To use Polars Cloud, you have to connect all your workspaces a cloud environment. + +If you login to the Polars Cloud dashboard for the first time, you will notice a blue bar at the top of the screen. When you created a new account that is not connected to a cloud environment, you can explore Polars Cloud but you cannot execute any queries yet. + +![An overview of the Polars Cloud dashboard showing a button to connect your cloud environment](../assets/connect-cloud/dashboard.png) + +When you click the blue bar you will be redirected to the start of the set up flow. In this first step you can name your workspace. + +## 1. Set workspace name + +In the first step of the setup flow you give a name to your workspace. You can keep the name "Personal Workspace" or use the name of your team/department. This workspace name will be required by the compute context to execute a query remote. + +!!! tip "Naming your workspace" + If you are not sure you can use a temporary name, You can change the name of workspace under the workspace settings at any moment. + +![Connect your cloud screen where you can input a workspace name](../assets/connect-cloud/workspace-naming.png) + +## 2. Deploy to AWS + +When you have entered a name, you can click "Deploy to Amazon". This will open a screen in AWS with a CloudFormation template that is required to install the required roles in your AWS environment. + +![CloudFormation stack image as step of the setupflow](../assets/connect-cloud/cloudformation.png) + +If you want to learn more about what Polars Cloud installs in your environment, you can read more on [the AWS Infrastructure page](../providers/aws/infra). + +!!! info "No permissions to deploy the stack in AWS" + If you don't have the required persmissions to deploy CloudFormation stacks in your AWS environment, you can copy the URL and share it with your operations team or someone with the permissions. With the URL they can deploy the stack for you. + +## 3. Deploying the environment + +After you have "Create stack", the CloudFormation stack will be deployed in your environment. This will take around 5 minutes. You can follow the progress in your AWS environment or in the Polars set up flow. + +![Progress screen in the set up flow](../assets/connect-cloud/progress-page.png) + +When the CloudFormation stack is deployed you will see a confirmation message. + +![Final screen of the set up flow indication successful deployment](../assets/connect-cloud/successful-setup.png) + +If you click "Start exploring", you will be redirected to the Polars Cloud dashboard. + +You can now run your Polars query remotely in the cloud. Go to the [getting started section](../quickstart) to your first query in minutes, [learn more how to run queries remote](../run/compute-context) or manage your workspace to invite your team. + +!!! info "Only connect a workspace once" + You only have to connect your workspace once. If you invite your team to a workspace that is connected to a cloud environment they can immediately run queries remotely. diff --git a/docs/source/polars-cloud/explain/authentication.md b/docs/source/polars-cloud/explain/authentication.md new file mode 100644 index 000000000000..1bb2fdf734a6 --- /dev/null +++ b/docs/source/polars-cloud/explain/authentication.md @@ -0,0 +1,38 @@ +# Authentication + +Polars cloud allows authentication through short-lived authentication tokens. There are two ways you +can obtain an access token: + +- command line interface +- python client + +After a successful `login` Polars Cloud stores the token in `{$HOME}/.polars`. You can alter this +path by setting the environment variable `POLARS_CLOUD_ACCESS_TOKEN_PATH`. + +### Command Line Interface (CLI) + +Authenticate with CLI using the following command + +```bash +pc login +``` + +### Python client + +Authenticate with the Polars Cloud using + +{{code_block('polars-cloud/authentication','login',['login'])}} + +Both methods redirect you to the browser where you can provide your login credentials and continue +the sign in process. + +## Service accounts + +Both flows described above are for interactive logins where a person is present in the process. For +non-interactive workflows such as orchestration tools there are service accounts. These allow you to +login programmatically. + +To create a service account go to the Polars Cloud dashboard under Settings and service accounts. +Here you can create a new service account for your workspace. To authenticate set the +`POLARS_CLOUD_CLIENT_ID` and `POLARS_CLOUD_CLIENT_SECRET` environment variables. Polars Cloud will +automatically pick these up if there are no access tokens present in the path. diff --git a/docs/source/polars-cloud/faq.md b/docs/source/polars-cloud/faq.md new file mode 100644 index 000000000000..b90964fa009c --- /dev/null +++ b/docs/source/polars-cloud/faq.md @@ -0,0 +1,46 @@ +# FAQ + +On this page you can find answers to some frequently asked questions around Polars Cloud. + +## Who is behind Polars Cloud? + +Polars Cloud is built by the organization behind the open source Polars project. Polars has grown to 10M+ monthly downloads and more than 125M+ total downloads since the first commit in 2020. We are building Polars Cloud, on top of the open source Polars project, to offer a service that is more aligned with the needs of organizations that use or are looking to use Polars. + +By offering our managed service we get to drive even greater adoption and invest more resources into the open source Polars project, supporting further improvements and long term development. + +## Where does the compute run? + +All compute runs in your own cloud environment. The main reason is that this ensures that your data never leaves your environment and that the compute is always close to your data. + +You can learn more about how this setup in [the infrastructure section of the documentation](providers/aws/infra.md). + +## Can you run Polars Cloud on-premise? + +Currently, Polars Cloud is only available to organizations that are on AWS. Support for on-premise infrastructure is on our roadmap and will become available soon. + +## What does Polars Cloud offer me beyond Polars? + +Polars Cloud offers a managed service that enables scalable data processing with the flexibility and expressiveness of the Polars API. It extends the open source Polars project with the following capabilities: + +- Distributed engine to efficiently handle terabyte to petabyte scale workloads through parallel processing across multiple nodes +- Cost-optimized serverless architecture that automatically scales compute resources +- Built-in fault tolerance mechanisms ensuring query completion even during hardware failures or system interruptions +- Comprehensive monitoring and analytics tools providing detailed insights into query performance and resource utilization. + +## What are the main use cases for Polars Cloud? + +Polars Cloud offers both a batch as an interactive mode to users. Batch mode can be used for ETL workloads or one-off large scale analytic jobs. Interactive mode is for users that are looking to do data exploration on a larger scale data processing that requires more compute than their own machine can offer. + +## How can Polars Cloud integrate with my workflow? + +One of our key priorities is ensuring that running remote queries feels as native and seamless as running them locally. Every user should be able to scale their queries effortlessly. + +Polars Cloud is completely environment agnostic. This allows you to run your queries from anywhere such as your own machine, Jupyter/Marimo notebooks, Airflow DAGs, AWS Lambda functions, or your servers. By not tying you to a specific platform, Polars Cloud gives you the flexibility to execute your queries wherever it best fits your workflow. + +## What is the pricing model of Polars Cloud? + +Polars Cloud is available at no additional cost in this early stage. You only pay for the resources you use in your own cloud environment. We are exploring different usage based pricing models that are geared towards running queries as fast and efficient as possible. + +## Will the distributed engine be available in open source? + +The distributed engine is only available in Polars Cloud. There are no plans to make it available in the open source project. Polars is focused on single machines, as it makes efficient use of the available resources. Users already report utilizing Polars to process hundres of gigabytes of data on single (large) compute instance. The distributed engine is gear towards teams and organizations that are I/O bound or want to scale their Polars queries beyond single machines and required a solution process workloads at terabyte and even petabyte scale. diff --git a/docs/source/polars-cloud/glossary.md b/docs/source/polars-cloud/glossary.md new file mode 100644 index 000000000000..f49342523ba0 --- /dev/null +++ b/docs/source/polars-cloud/glossary.md @@ -0,0 +1,114 @@ +# Concepts + + + +This section covers the main concepts present in Polars Cloud. + +## Workspaces + +A workspace is a logical container in which other resources within Polars Cloud live. All resources +(e.g. compute, queries, ...) are bound to a specific workspace. + +!!! Access Control Everyone within a workspace has the same access there is no notion of roles (e.g. +admin, user) in the current version. This means users within the same workspace can view each others +clusters and queries. A workspace has a single IAM role in the cloud and runs under the same +permissions. A user can't send queries to a compute cluster of another user. + +{{code_block('polars-cloud/concepts','workspace',['Workspace'])}} + +## Compute Context + +The compute context describes the underlying hardware. You can start a compute context by specifying +the instance requirements in terms of cpu's and memory or by directly specifying the AWS EC2 +instance type. + +By instance type + +{{code_block('polars-cloud/concepts','compute',['ComputeContext'])}} + +By instance requirements {{code_block('polars-cloud/concepts','compute2',['ComputeContext'])}} + +When specifying with instance requirements Polars will search for the cheapest available instance +type with at least the requested values. In the cloud not all options are available. The following +example `pc.ComputeContext(cpus = 1, memory = 32)` there is no machine with 1 core that has 32 GB. +In this case Polars Cloud will find the cheapest available machine that has at least 32 GB of RAM. + +Below are the various options which you can specify + +| Parameter | Type | Description | +| -------------------------------- | ------------ | ------------------------------------------------------------------------------------------------------- | +| `workspace_name` | string | The name of the workspace | +| `cpus` | number | The minimum number of CPUs the compute cluster should have access to. | +| `memory` | number | The minimum amount of RAM (in GB) the compute cluster should have access to. | +| `instance_type` | string | The AWS instance type (e.g. `t2.micro`). This parameter can not be used together with memory or cpus | +| `storage` | number | The amount of local disk space (in GB) each node in the compute cluster has access to. Defaults to `16` | +| `cluster_size` | number | The number of machines to spin up in the cluster. Defaults to `1`. | +| `interactive` | bool | Activate interactive mode | +| `labels` | List[string] | Labels of the compute context | +| `log_level` | string | Override the log level of the cluster for debug purposes. One of `"info", "debug", "trace"`. | + +!!! warning "Distributed Engine" We are currently developing our distributed engine. This engine +will run on top of the new open source streaming engine and is exclusive to Polars Cloud. It is +still in an experimental phase. + +### Interactive vs Batch + +A compute context can either run in interactive or batch mode. + +The batch mode is meant for queries that are run periodically. In this mode, clients send there +queries to Polars Cloud to be queued. If ready, the compute context will poll and run each query. +Metadata around the query (e.g. status, query plan, logs, metrics) are send back to Polars Cloud for +reporting purposes. The actual result data is not shared for privacy reasons. + +This process of queuing and polling leads to some seconds delay, although negligible when running a +lot of queries interactively this can lower the developer experience. Additionally, for exploratory +work it is not always necessary or valuable to save the metadata of the query. In these cases the +interactive mode is better. In interactive mode you as a client directly communicate with the +compute cluster. This way there is no delay and queries run immediately. An additional option is +that data can be shared securely, for example to view (a part of) the result. + +### Default Context + +It is recommended to explicitly specify the compute context when running a query. However to +simplify manners it is possible to use a default context. Under `Settings` in your workspace you'll +find the option to specify default parameters for `memory`, `cpus`, `cluster_size` etc. If you run a +query without a context Polars Cloud will spin up a compute cluster with these default parameters. + +## Queries + +Queries represent a single `LazyFrame` being executed in Polars Cloud. This can either be in Batch +or interactive mode. + +{{code_block('polars-cloud/concepts','query',[])}} + +Running a query remotely is as simple as calling `remote` while passing the compute context to it. +Depending on the mode of Compute this either returns a `InteractiveQuery` or a `BatchQuery`. + +### Interactive + +In interactive mode you can directly communicate to the compute context. The communication is +securely encrypted using mTLS between your client and the compute server. Queries send to an +interactive compute context return a `InteractiveQuery` which can be awaited or cancelled. Queries +executed in interactive mode do not show up on the polars cloud dashboard. + +{{code_block('polars-cloud/concepts','interactive',['QueryResult','InteractiveQuery'])}} + +In this example we create a `LazyFrame` called `lf` and we execute it on Polars Cloud. We can +continue on the result by calling `lazy()` on the result which leads to a `LazyFrame` . + + + +!!! info "Interactive mode" +If you want to continue on a existing query / query result you must use +`write_parquet` to S3 as an intermediate storage location. We are adding a `.execute` (or similar) +to our API which allows you to skip specifying this location. + + + +### Batch + +Running a query in batch mode gives a `BatchQuery` which has the same API as its interactive +counterpart. The main differences are that queries go through the control plane and metadata on the +query is stored in the dashboard for reporting purposes. + +{{code_block('polars-cloud/concepts','interactive',['QueryResult','BatchQuery'])}} diff --git a/docs/source/polars-cloud/index.md b/docs/source/polars-cloud/index.md new file mode 100644 index 000000000000..b377e8cbfc20 --- /dev/null +++ b/docs/source/polars-cloud/index.md @@ -0,0 +1,3 @@ +# Introduction + +what is polars cloud diff --git a/docs/source/polars-cloud/integrations/airflow.md b/docs/source/polars-cloud/integrations/airflow.md new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/docs/source/polars-cloud/providers/aws/infra.md b/docs/source/polars-cloud/providers/aws/infra.md new file mode 100644 index 000000000000..9cee68265b99 --- /dev/null +++ b/docs/source/polars-cloud/providers/aws/infra.md @@ -0,0 +1,69 @@ +# Infrastructure + +Polars Cloud manages the hardware for you by spinning up and down raw EC2 instances. In order to do +this it needs permissions in your own cloud environment. None of the resources below have costs +associated with them. While no compute clusters are running Polars Cloud will not create any AWS +costs. The recommended way of doing this is running `plc workspace setup`. + +## Recommended setup + +When you deploy Polars Cloud the following infrastructure is setup. + +
+![AWS infrastructure](../../assets/aws-infra.png) +
+ +1. A `VPC` and `subnet` in which Polars EC2 workers can run. +1. Two `security groups`. One for batch mode which does not have any public ports and one for + interactive mode which allows direct communication between your local environment and the + cluster. +1. `PolarsWorker` IAM role. Polars EC2 workers run under this IAM role. +1. `UserInitiated` & `Unattended` IAM role. The `UserInitiated` role has the permissions to start + Polars EC2 workers in your environment. The `Unattended` role can terminate unused compute + clusters that you might have forgot about. + +## Security + +By design Polars Cloud never has access to the data inside your cloud environment. The data never +leaves your environment. + +### IAM permissions + +The list below show an overview of the required permissions for each of the roles. + +??? User Initiated + + - ec2:CreateTags + - ec2:RunInstances + - ec2:DescribeInstances + - ec2:DescribeInstanceTypeOfferings + - ec2:DescribeInstanceTypes + - ec2:TerminateInstances + - ec2:CreateFleet + - ec2:CreateLaunchTemplate + - ec2:CreateLaunchTemplateVersion + - ec2:DescribeLaunchTemplates + +??? Unattended + + - ec2:DescribeInstances + - ec2:TerminateInstances + - ec2:DescribeFleets + - ec2:DeleteLaunchTemplate + - ec2:DeleteLaunchTemplateVersions + - ec2:DeleteFleets + - sts:GetCallerIdentity + - sts:TagSession + - cloudwatch:GetMetricData + - logs:GetLogEvents + - logs:FilterLogEvents + - logs:DescribeLogStreams + +??? Worker - logs:CreateLogGroup - logs:PutRetentionPolicy - cloudwatch:PutMetricData + +## Custom setup + +Depending on your enterprise needs or existing infrastructure you may not require certain components +(e.g. VPC, subnet) of the default setup of Polars Cloud or have additional security requirements in +place. Together with our team of engineers we can integrate Polars Cloud with your existing +infrastructure. Please contact us directly. diff --git a/docs/source/polars-cloud/providers/aws/permissions.md b/docs/source/polars-cloud/providers/aws/permissions.md new file mode 100644 index 000000000000..c480468a4631 --- /dev/null +++ b/docs/source/polars-cloud/providers/aws/permissions.md @@ -0,0 +1,16 @@ +# Permissions + +The workspace is an isolation for all resources living within your cloud environment. Every +workspace has a single instance profile which defines the permissions for the compute. This profile +is attached to the compute within your environment. By default, the profile can read and write from +S3, but you can easily adjust depending on your own infrastructure stack. + +## Adding or removing permissions + +If you want Polars Cloud to be able to read from other data sources than `S3` within your cloud +environment you must provide the access control from directly within AWS. To do this go to `IAM` +within the aws console and locate the role called `polars--IAMWorkerRole-`. +Here you can adjust the permissions of the workspace for instance: + +- [Narrow down the S3 access to certain buckets](https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_examples_s3_deny-except-bucket.html) +- [Provide IAM access to rds database](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/UsingWithRDS.IAMDBAuth.IAMPolicy.html) diff --git a/docs/source/polars-cloud/quickstart.md b/docs/source/polars-cloud/quickstart.md new file mode 100644 index 000000000000..2b3f9bc021aa --- /dev/null +++ b/docs/source/polars-cloud/quickstart.md @@ -0,0 +1,74 @@ +# Getting started + + + +!!! tip "Polars Cloud alpha launch" +Polars cloud has recently been made available to a select group of individuals / companies for early alpha testing. You can read more about Polars Cloud and its ambitious goals in our blog post. + + + +Polars Cloud is a managed compute platform for your Polars queries. It allows you to effortlessly +run your local queries in your cloud environment, both in an interactive setting as well as for ETL +or batch jobs. By working in a 'Bring your own Cloud' model the data never leaves your environment. + +## Installation + +Install the Polars Cloud python library in your environment + +```bash +pip install polars polars-cloud +``` + +Create an account and login by running the command below. + +```bash +pc login +``` + +## Connect your cloud + +Polars Cloud currently exclusively supports AWS as a cloud provider. + +Polars Cloud needs permission to spin up & down hardware in your environment. This is done by +deploying our cloudformation template. See our [infrastructure](providers/aws/infra.md) section for +more details. + +To connect your cloud run: + +```bash +pc setup workspace -n +``` + +This redirects you to the browser where you can connect Polars to your AWS environment. +Alternatively, you can follow the steps in the browser and create the workspace there. + +## Run your queries + +Now that we are done with the setup, we can start running queries. The general principle here is +writing Polars like you're always used to and calling `remote` on your `LazyFrame`. The following +example shows how to create a compute cluster and run a simple Polars query. + +{{code_block('polars-cloud/quickstart','general',['ComputeContext','LazyFrameExt'])}} + +Let us go through the code line by line. First we need to define the hardware the cluster will run +on. This can be in terms of cpu & memory or the exact instance type in AWS. + +```python +ctx = pc.ComputeContext(memory = 8, cpus = 2 , cluster_size = 1) +``` + +Then we write a regular lazy Polars query. In this simple example we compute the maximum of column +`a` over column `b`. + +```python +df = pl.DataFrame({ + "a": [1, 2, 3], + "b": [4, 4, 5] +}) +lf = df.lazy().with_columns(pl.col("a").max().over("b").alias("c")) +``` + +Then we are going to run our query on the compute cluster. We use `remote` to signify that we want +to run the query remotely. This gives back a special version of the `LazyFrame` with extension +methods. Up until this point nothing has executed yet, calling `write_parquet` sends the query to +the compute context and writes the result to S3. diff --git a/docs/source/polars-cloud/run/compute-context.md b/docs/source/polars-cloud/run/compute-context.md new file mode 100644 index 000000000000..c98676c8f291 --- /dev/null +++ b/docs/source/polars-cloud/run/compute-context.md @@ -0,0 +1,5 @@ +# Set compute context + +explain the compute context in more detail + +include examples diff --git a/docs/source/polars-cloud/run/distributed-engine.md b/docs/source/polars-cloud/run/distributed-engine.md new file mode 100644 index 000000000000..3b572d0fef7e --- /dev/null +++ b/docs/source/polars-cloud/run/distributed-engine.md @@ -0,0 +1,3 @@ +# Distriubted execution + +More information about the distributed engine. What is supported at the moment and how to use? diff --git a/docs/source/polars-cloud/run/interactive-batch.md b/docs/source/polars-cloud/run/interactive-batch.md new file mode 100644 index 000000000000..ca741deb7726 --- /dev/null +++ b/docs/source/polars-cloud/run/interactive-batch.md @@ -0,0 +1,3 @@ +# Execute in interactive or batch mode + +explain difference and give examples how to use diff --git a/docs/source/src/python/polars-cloud/authentication.py b/docs/source/src/python/polars-cloud/authentication.py new file mode 100644 index 000000000000..3c0c02dc0aa8 --- /dev/null +++ b/docs/source/src/python/polars-cloud/authentication.py @@ -0,0 +1,4 @@ +# --8<-- [start:login] +import polars_cloud as pc +workspace = pc.login() +# --8<-- [end:login] diff --git a/docs/source/src/python/polars-cloud/concepts.py b/docs/source/src/python/polars-cloud/concepts.py new file mode 100644 index 000000000000..2a0a6a67eb9d --- /dev/null +++ b/docs/source/src/python/polars-cloud/concepts.py @@ -0,0 +1,27 @@ +# ruff: noqa +# --8<-- [start:workspace] +import polars_cloud as pc +workspace = pc.Workspace(name="my-workspace") +# --8<-- [end:workspace] + +# --8<-- [start:compute] +ctx = pc.ComputeContext(instance_type = "t2.large") +# --8<-- [end:compute] + +# --8<-- [start:compute2] +ctx = pc.ComputeContext(cpus = 2, memory = 8) +# --8<-- [end:compute2] + +# --8<-- [start:query] +ctx = pc.ComputeContext(instance_type = "t2.large") +lf = .... # A LazyFrame +lf.remote(context=ctx).write_parquet(...) +# --8<-- [end:query] + +# --8<-- [start:interactive] +lf = .... # A LazyFrame +ctx = pc.ComputeContext(instance_type = "t2.large", interactive=True) +query = lf.remote(context=ctx).write_parquet() +result = query.await_result() +lf2 = result.lazy().with_columns(...) +# --8<-- [end:interactive] diff --git a/docs/source/src/python/polars-cloud/quickstart.py b/docs/source/src/python/polars-cloud/quickstart.py new file mode 100644 index 000000000000..d5f8536a7737 --- /dev/null +++ b/docs/source/src/python/polars-cloud/quickstart.py @@ -0,0 +1,13 @@ +# ruff: noqa +# --8<-- [start:general] +import polars_cloud as pc +import polars as pl + +ctx = pc.ComputeContext(memory=8, cpus=2, cluster_size=1) +df = pl.DataFrame({ + "a": [1, 2, 3], + "b": [4, 4, 5] +}) +lf = df.lazy().with_columns(pl.col("a").max().over("b").alias("c")) +lf.remote(context = ctx).write_parquet(uri="s3://my-bucket/result.parquet") +# --8<-- [end:general] diff --git a/mkdocs.yml b/mkdocs.yml index eb5ff399bf94..1119c410ba13 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -10,188 +10,211 @@ docs_dir: docs/source # Documentation layout nav: - - User guide: - - index.md - - user-guide/getting-started.md - - user-guide/installation.md - - Concepts: - - user-guide/concepts/index.md - - user-guide/concepts/data-types-and-structures.md - - user-guide/concepts/expressions-and-contexts.md - - user-guide/concepts/lazy-api.md - - Expressions: - - user-guide/expressions/index.md - - user-guide/expressions/basic-operations.md - - user-guide/expressions/expression-expansion.md - - user-guide/expressions/casting.md - - user-guide/expressions/strings.md - - user-guide/expressions/lists-and-arrays.md - - user-guide/expressions/categorical-data-and-enums.md - - user-guide/expressions/structs.md - - user-guide/expressions/missing-data.md - - user-guide/expressions/aggregation.md - - user-guide/expressions/window-functions.md - - user-guide/expressions/folds.md - - user-guide/expressions/user-defined-python-functions.md - - user-guide/expressions/numpy-functions.md - - Transformations: - - user-guide/transformations/index.md - - user-guide/transformations/joins.md - - user-guide/transformations/concatenation.md - - user-guide/transformations/pivot.md - - user-guide/transformations/unpivot.md - - Time series: - - user-guide/transformations/time-series/parsing.md - - user-guide/transformations/time-series/filter.md - - user-guide/transformations/time-series/rolling.md - - user-guide/transformations/time-series/resampling.md - - user-guide/transformations/time-series/timezones.md - - Lazy API: - - user-guide/lazy/index.md - - user-guide/lazy/using.md - - user-guide/lazy/optimizations.md - - user-guide/lazy/schemas.md - - user-guide/lazy/query-plan.md - - user-guide/lazy/execution.md - - user-guide/lazy/streaming.md - - user-guide/lazy/gpu.md - - IO: - - user-guide/io/index.md - - user-guide/io/csv.md - - user-guide/io/excel.md - - user-guide/io/parquet.md - - user-guide/io/json.md - - user-guide/io/multiple.md - - user-guide/io/hive.md - - user-guide/io/database.md - - user-guide/io/cloud-storage.md - - user-guide/io/bigquery.md - - user-guide/io/hugging-face.md - - Plugins: - - user-guide/plugins/index.md - - user-guide/plugins/expr_plugins.md - - user-guide/plugins/io_plugins.md - - SQL: - - user-guide/sql/intro.md - - user-guide/sql/show.md - - user-guide/sql/select.md - - user-guide/sql/create.md - - user-guide/sql/cte.md - - Migrating: - - user-guide/migration/pandas.md - - user-guide/migration/spark.md - - user-guide/ecosystem.md - - Misc: - - user-guide/misc/multiprocessing.md - - user-guide/misc/visualization.md - - user-guide/misc/styling.md - - user-guide/misc/comparison.md - - user-guide/misc/arrow.md - - user-guide/gpu-support.md - - - API reference: api/index.md - - - Development: - - Contributing: - - development/contributing/index.md - - development/contributing/ide.md - - development/contributing/test.md - - development/contributing/ci.md - - development/contributing/code-style.md - - development/versioning.md - - - Releases: - - releases/changelog.md - - Upgrade guides: - - releases/upgrade/index.md - - releases/upgrade/1.md - - releases/upgrade/0.20.md - - releases/upgrade/0.19.md - + - Polars: + - User guide: + - index.md + - user-guide/getting-started.md + - user-guide/installation.md + - Concepts: + - user-guide/concepts/index.md + - user-guide/concepts/data-types-and-structures.md + - user-guide/concepts/expressions-and-contexts.md + - user-guide/concepts/lazy-api.md + - Expressions: + - user-guide/expressions/index.md + - user-guide/expressions/basic-operations.md + - user-guide/expressions/expression-expansion.md + - user-guide/expressions/casting.md + - user-guide/expressions/strings.md + - user-guide/expressions/lists-and-arrays.md + - user-guide/expressions/categorical-data-and-enums.md + - user-guide/expressions/structs.md + - user-guide/expressions/missing-data.md + - user-guide/expressions/aggregation.md + - user-guide/expressions/window-functions.md + - user-guide/expressions/folds.md + - user-guide/expressions/user-defined-python-functions.md + - user-guide/expressions/numpy-functions.md + - Transformations: + - user-guide/transformations/index.md + - user-guide/transformations/joins.md + - user-guide/transformations/concatenation.md + - user-guide/transformations/pivot.md + - user-guide/transformations/unpivot.md + - Time series: + - user-guide/transformations/time-series/parsing.md + - user-guide/transformations/time-series/filter.md + - user-guide/transformations/time-series/rolling.md + - user-guide/transformations/time-series/resampling.md + - user-guide/transformations/time-series/timezones.md + - Lazy API: + - user-guide/lazy/index.md + - user-guide/lazy/using.md + - user-guide/lazy/optimizations.md + - user-guide/lazy/schemas.md + - user-guide/lazy/query-plan.md + - user-guide/lazy/execution.md + - user-guide/lazy/streaming.md + - user-guide/lazy/gpu.md + - IO: + - user-guide/io/index.md + - user-guide/io/csv.md + - user-guide/io/excel.md + - user-guide/io/parquet.md + - user-guide/io/json.md + - user-guide/io/multiple.md + - user-guide/io/hive.md + - user-guide/io/database.md + - user-guide/io/cloud-storage.md + - user-guide/io/bigquery.md + - user-guide/io/hugging-face.md + - Plugins: + - user-guide/plugins/index.md + - user-guide/plugins/expr_plugins.md + - user-guide/plugins/io_plugins.md + - SQL: + - user-guide/sql/intro.md + - user-guide/sql/show.md + - user-guide/sql/select.md + - user-guide/sql/create.md + - user-guide/sql/cte.md + - Migrating: + - user-guide/migration/pandas.md + - user-guide/migration/spark.md + - user-guide/ecosystem.md + - Misc: + - user-guide/misc/multiprocessing.md + - user-guide/misc/visualization.md + - user-guide/misc/styling.md + - user-guide/misc/comparison.md + - user-guide/misc/arrow.md + - user-guide/gpu-support.md + - API: + - api/reference.md + - Development: + - Contributing: + - development/contributing/index.md + - development/contributing/ide.md + - development/contributing/test.md + - development/contributing/ci.md + - development/contributing/code-style.md + - development/versioning.md + - Releases: + - releases/changelog.md + - Upgrade guides: + - releases/upgrade/index.md + - releases/upgrade/1.md + - releases/upgrade/0.20.md + - releases/upgrade/0.19.md + - Polars Cloud: + - polars-cloud/index.md + - polars-cloud/quickstart.md + - polars-cloud/connect-cloud.md + - Run queries remote: + - polars-cloud/run/compute-context.md + - polars-cloud/run/interactive-batch.md + - polars-cloud/run/distributed-engine.md + - User Guide: + - polars-cloud/explain/authentication.md + - Providers: + - AWS: + - polars-cloud/providers/aws/infra.md + - polars-cloud/providers/aws/permissions.md + - Integrations: + - polars-cloud/integrations/airflow.md + - Misc: + - polars-cloud/glossary.md + - polars-cloud/cli.md + - polars-cloud/faq.md + - API Reference: https://docs.cloud.pola.rs not_in_nav: | - /_build/ + /_build/ validation: - links: - # Allow an absolute link to the features page for our code snippets - absolute_links: ignore + links: + # Allow an absolute link to the features page for our code snippets + absolute_links: ignore # Build directories theme: - name: material - locale: en - custom_dir: docs/source/_build/overrides - palette: - # Palette toggle for light mode - - media: "(prefers-color-scheme: light)" - scheme: default - toggle: - icon: material/brightness-7 - name: Switch to dark mode - # Palette toggle for dark mode - - media: "(prefers-color-scheme: dark)" - scheme: slate - toggle: - icon: material/brightness-4 - name: Switch to light mode - logo: _build/assets/logo.png - features: - - navigation.tracking - - navigation.instant - - navigation.tabs - - navigation.tabs.sticky - - navigation.footer - - navigation.indexes - - content.tabs.link - - content.code.copy - icon: - repo: fontawesome/brands/github + name: material + locale: en + custom_dir: docs/source/_build/overrides + palette: + # Palette toggle for light mode + - media: "(prefers-color-scheme: light)" + scheme: default + primary: custom + toggle: + icon: material/brightness-7 + name: Switch to dark mode + # Palette toggle for dark mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + primary: custom + toggle: + icon: material/brightness-4 + name: Switch to light mode + logo: _build/assets/logo.png + features: + - navigation.tracking + - navigation.sections + - navigation.instant + - navigation.tabs + - navigation.tabs.sticky + - navigation.footer + - navigation.indexes + - content.tabs.link + - content.code.copy + icon: + repo: fontawesome/brands/github extra_javascript: - - _build/js/mathjax.js - - https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js + - _build/js/mathjax.js + - https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js extra_css: - - _build/css/extra.css + - _build/css/extra.css extra: - analytics: - provider: plausible - domain: guide.pola.rs,combined.pola.rs + analytics: + provider: plausible + domain: guide.pola.rs,combined.pola.rs # Preview controls strict: true # Formatting options markdown_extensions: - - admonition - - pymdownx.details - - attr_list - - pymdownx.emoji: - emoji_index: !!python/name:material.extensions.emoji.twemoji - emoji_generator: !!python/name:material.extensions.emoji.to_svg - - pymdownx.superfences - - pymdownx.tabbed: - alternate_style: true - - pymdownx.snippets: - base_path: ['.','docs/source/src/'] - check_paths: true - dedent_subsections: true - - footnotes - - pymdownx.arithmatex: - generic: true + - admonition + - md_in_html + - pymdownx.details + - attr_list + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg + - pymdownx.superfences + - pymdownx.tabbed: + alternate_style: true + - pymdownx.snippets: + base_path: [".", "docs/source/src/"] + check_paths: true + dedent_subsections: true + - footnotes + - pymdownx.arithmatex: + generic: true hooks: - - docs/source/_build/scripts/people.py + - docs/source/_build/scripts/people.py plugins: - - search: - lang: en - - markdown-exec - - material-plausible - - macros: - module_name: docs/source/_build/scripts/macro - - redirects: - redirect_maps: - 'user-guide/index.md': 'index.md' - 'user-guide/basics/index.md': 'user-guide/getting-started.md' - 'user-guide/basics/reading-writing.md': 'user-guide/getting-started.md' - 'user-guide/basics/expressions.md': 'user-guide/getting-started.md' - 'user-guide/basics/joins.md': 'user-guide/getting-started.md' + - search: + lang: en + - markdown-exec + - material-plausible + - macros: + module_name: docs/source/_build/scripts/macro + - redirects: + redirect_maps: + "user-guide/index.md": "index.md" + "user-guide/basics/index.md": "user-guide/getting-started.md" + "user-guide/basics/reading-writing.md": "user-guide/getting-started.md" + "user-guide/basics/expressions.md": "user-guide/getting-started.md" + "user-guide/basics/joins.md": "user-guide/getting-started.md"