From 36d289dde43015cb4f6885dfc917a6f13104badb Mon Sep 17 00:00:00 2001 From: "Jalpreet Singh Nanda (:imjalpreet)" Date: Mon, 3 Jun 2024 07:39:00 +0530 Subject: [PATCH] Update Resource Group Documentation and remove erroneous information --- .../sphinx/admin/resource-groups-example1.sql | 50 ---------- .../src/main/sphinx/admin/resource-groups.rst | 99 +++++++++++++------ 2 files changed, 67 insertions(+), 82 deletions(-) delete mode 100644 presto-docs/src/main/sphinx/admin/resource-groups-example1.sql diff --git a/presto-docs/src/main/sphinx/admin/resource-groups-example1.sql b/presto-docs/src/main/sphinx/admin/resource-groups-example1.sql deleted file mode 100644 index 49efc84e97d7c..0000000000000 --- a/presto-docs/src/main/sphinx/admin/resource-groups-example1.sql +++ /dev/null @@ -1,50 +0,0 @@ ---This script first creates a database named presto_resource_groups and then ---creates the resource_groups_global_properties, resource_groups, and selectors tables within that database. ---It then inserts some example data into these tables. - ---Please remember to replace 'user' with the actual username in your environment. --- Also note that this is a simple example and may not cover all your use cases. --- Always refer to the official PrestoDB documentation for the most accurate and up-to-date information. -CREATE DATABASE IF NOT EXISTS presto_resource_groups; -USE presto_resource_groups; - -CREATE TABLE IF NOT EXISTS resource_groups_global_properties ( - name VARCHAR(128) NOT NULL PRIMARY KEY, - value VARCHAR(512) NOT NULL, - UNIQUE (name) - ); - -INSERT INTO resource_groups_global_properties (name, value) -VALUES ('cpu_quota_period', '1h'); - -CREATE TABLE IF NOT EXISTS resource_groups ( - id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY, - name VARCHAR(128) NOT NULL, - soft_memory_limit VARCHAR(128) NOT NULL, - hard_concurrency_limit INT NOT NULL, - max_queued INT NOT NULL, - jmx_export BOOLEAN NOT NULL, - soft_cpu_limit VARCHAR(128), - hard_cpu_limit VARCHAR(128), - scheduling_policy VARCHAR(128), - scheduling_weight INT, - parent_id BIGINT, - environment VARCHAR(128), - UNIQUE (name, environment) - ); - -INSERT INTO resource_groups (name, soft_memory_limit, hard_concurrency_limit, max_queued, jmx_export) -VALUES ('global', '80%', 100, 1000, true); - -CREATE TABLE IF NOT EXISTS selectors ( - id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY, - resource_group_id BIGINT NOT NULL, - user_regex VARCHAR(512), - source_regex VARCHAR(512), - query_type VARCHAR(512), - priority INT NOT NULL, - UNIQUE (resource_group_id, priority) - ); - -INSERT INTO selectors (resource_group_id, user_regex, priority) -VALUES (LAST_INSERT_ID(), 'user', 1); \ No newline at end of file diff --git a/presto-docs/src/main/sphinx/admin/resource-groups.rst b/presto-docs/src/main/sphinx/admin/resource-groups.rst index d4c0196e6a6f4..b2693bc224daa 100644 --- a/presto-docs/src/main/sphinx/admin/resource-groups.rst +++ b/presto-docs/src/main/sphinx/admin/resource-groups.rst @@ -119,14 +119,21 @@ To set up a database-based resource group manager: .. code-block:: text resource-groups.configuration-manager=db - resource-groups.config-db-url=jdbc:mysql://localhost:3306/resource_groups - resource-groups.config-db-user=username - resource-groups.config-db-password=password + resource-groups.config-db-url=jdbc:mysql://localhost:3306/resource_groups?user=&password= + +Replace ```` and ```` with the actual username and password. With the Database Resource Group Manager, changes to the configuration in the database take effect immediately and do not require a restart of the Presto server. This allows for more flexibility and dynamic changes to the resource group configurations. +The resource group configuration must be populated through tables +``resource_groups_global_properties``, ``resource_groups``, and ``selectors``. If any of the tables +do not exist when Presto starts, they are created automatically. + +The rules in the ``selectors`` table are processed in descending order of the values in the +``priority`` field. + Database Resource Group Manager Properties ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -140,19 +147,10 @@ Database Resource Group Manager Properties * - ``resource-groups.config-db-url`` - Database URL to load configuration from. - ``none`` - * - ``resource-groups.config-db-user`` - - Database user to connect with. - - ``none`` - * - ``resource-groups.config-db-password`` - - Password for database user to connect with. - - ``none`` * - ``resource-groups.max-refresh-interval`` - The maximum time period for which the cluster will continue to accept queries after refresh failures, causing configuration to become stale. - ``1h`` - * - ``resource-groups.refresh-interval`` - - How often the cluster reloads from the database - - ``1s`` * - ``resource-groups.exact-match-selector-enabled`` - Setting this flag enables usage of an additional ``exact_match_source_selectors`` table to configure resource group @@ -213,6 +211,23 @@ Here are the key properties that can be set for a Resource Group: * ``jmxExport`` (optional): If set to ``true``, the statistics of the resource group will be exported via JMX. Defaults to ``false``. +* ``perQueryLimits`` (optional): specifies max resources that each query in a + resource group may consume before being killed. These limits are not inherited from parent groups. + May set three types of limits: + + - ``executionTimeLimit`` (optional): Specify an absolute value (for example, ``1h``) + for the maximum time a query may take to execute. + + - ``totalMemoryLimit`` (optional): Specify an absolute value (for example, ``1GB``) + for the maximum distributed memory a query may consume. + + - ``cpuTimeLimit`` (optional): Specify Specify an absolute value (for example, ``1h``) + for the maximum CPU time a query may use. + +* ``workerPerQueryLimit`` (optional): specifies the minimum number of workers that have to + be available for each query. Intended to be used in elastic clusters where number of workers + varies over time. + * ``subGroups`` (optional): list of sub-groups. A list of sub-groups within the resource group. Each sub-group can have its own set of properties. @@ -266,6 +281,16 @@ Here are the key components of selector rules in PrestoDB: * ``clientTags`` (optional): List of tags. To match, every tag in this list must be in the list of client-provided tags associated with the query. +* ``selectorResourceEstimate`` (optional): Resource Group Selection based on resource estimates. + - ``executionTime`` + - ``peakMemory`` + - ``cpuTime`` + +* ``clientInfo`` (optional): String to match against client info. + +* ``principal`` (optional): This is a regular expression that matches the principal who is submitting the query. + +* ``schema`` (optional): This matches the session schema of the query. Selectors are processed sequentially and the first one that matches will be used. @@ -368,26 +393,14 @@ File Resource Group Manager Database Resource Group Manager ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -This example is for a MySQL database. - -.. literalinclude:: resource-groups-example1.sql - :language: sql - .. code-block:: sql -- global properties + INSERT INTO resource_groups_global_properties (name, value) VALUES ('cpu_quota_period', '1h'); - -- get ID of 'other' group - SELECT resource_group_id FROM resource_groups WHERE name = 'other'; -- 4 - - -- create '${USER}' group with 'other' as parent. - INSERT INTO resource_groups (name, soft_memory_limit, hard_concurrency_limit, max_queued, environment, parent) VALUES ('${USER}', '10%', 1, 100, 'test_environment', 4); - - -- create 'bi-${toolname}' group with 'adhoc' as parent - INSERT INTO resource_groups (name, soft_memory_limit, hard_concurrency_limit, max_queued, scheduling_weight, scheduling_policy, environment, parent) VALUES ('bi-${toolname}', '10%', 10, 100, 10, 'weighted_fair', 'test_environment', 3); - - -- create 'pipeline' group with 'global' as parent - INSERT INTO resource_groups (name, soft_memory_limit, hard_concurrency_limit, max_queued, scheduling_weight, jmx_export, environment, parent) VALUES ('pipeline', '80%', 45, 100, 1, true, 'test_environment', 1); + -- Every row in resource_groups table indicates a resource group. + -- The enviroment name is 'test_environment', make sure it matches `node.environment` in your cluster. + -- The parent-child relationship is indicated by the ID in 'parent' column. -- create a root group 'global' with NULL parent INSERT INTO resource_groups (name, soft_memory_limit, hard_concurrency_limit, max_queued, scheduling_policy, jmx_export, environment) VALUES ('global', '80%', 100, 1000, 'weighted', true, 'test_environment'); @@ -400,21 +413,43 @@ This example is for a MySQL database. -- get ID of 'adhoc' group SELECT resource_group_id FROM resource_groups WHERE name = 'adhoc'; -- 3 - -- create 'other' group with 'adhoc' as parent INSERT INTO resource_groups (name, soft_memory_limit, hard_concurrency_limit, max_queued, scheduling_weight, scheduling_policy, environment, parent) VALUES ('other', '10%', 2, 1, 10, 'weighted_fair', 'test_environment', 3); + -- get ID of 'other' group + SELECT resource_group_id FROM resource_groups WHERE name = 'other'; -- 4 + -- create '${USER}' group with 'other' as parent. + INSERT INTO resource_groups (name, soft_memory_limit, hard_concurrency_limit, max_queued, environment, parent) VALUES ('${USER}', '10%', 1, 100, 'test_environment', 4); + + -- create 'bi-${toolname}' group with 'adhoc' as parent + INSERT INTO resource_groups (name, soft_memory_limit, hard_concurrency_limit, max_queued, scheduling_weight, scheduling_policy, environment, parent) VALUES ('bi-${toolname}', '10%', 10, 100, 10, 'weighted_fair', 'test_environment', 3); + + -- create 'pipeline' group with 'global' as parent + INSERT INTO resource_groups (name, soft_memory_limit, hard_concurrency_limit, max_queued, scheduling_weight, jmx_export, environment, parent) VALUES ('pipeline', '80%', 45, 100, 1, true, 'test_environment', 1); + + -- get ID of 'pipeline' group + SELECT resource_group_id FROM resource_groups WHERE name = 'pipeline'; -- 7 + -- create 'pipeline_${USER}' group with 'pipeline' as parent + INSERT INTO resource_groups (name, soft_memory_limit, hard_concurrency_limit, max_queued, environment, parent) VALUES ('pipeline_${USER}', '50%', 5, 100, 'test_environment', 7); + + -- create a root group 'admin' with NULL parent + INSERT INTO resource_groups (name, soft_memory_limit, hard_concurrency_limit, max_queued, scheduling_policy, environment, jmx_export) VALUES ('admin', '100%', 50, 100, 'query_priority', 'test_environment', true); + -- Selectors -- use ID of 'admin' resource group for selector INSERT INTO selectors (resource_group_id, user_regex, priority) VALUES ((SELECT resource_group_id FROM resource_groups WHERE name = 'admin'), 'bob', 6); - -- use ID of 'admin' resource group for selector - INSERT INTO selectors (resource_group_id, user_group_regex, priority) VALUES ((SELECT resource_group_id FROM resource_groups WHERE name = 'admin'), 'admin', 5); - -- use ID of 'global.data_definition' resource group for selector INSERT INTO selectors (resource_group_id, source_regex, query_type, priority) VALUES ((SELECT resource_group_id FROM resource_groups WHERE name = 'data_definition'), '.*pipeline.*', 'DATA_DEFINITION', 4); -- use ID of 'global.pipeline.pipeline_${USER}' resource group for selector INSERT INTO selectors (resource_group_id, source_regex, priority) VALUES ((SELECT resource_group_id FROM resource_groups WHERE name = 'pipeline_${USER}'), '.*pipeline.*', 3); + + -- get ID of 'global.adhoc.other.${USER}' resource group for by disambiguating group name using parent ID + SELECT A.resource_group_id self_id, B.resource_group_id parent_id, concat(B.name, '.', A.name) name_with_parent + FROM resource_groups A JOIN resource_groups B ON A.parent = B.resource_group_id + WHERE A.name = '${USER}' AND B.name = 'other'; + -- | 5 | 4 | other.${USER} | + INSERT INTO selectors (resource_group_id, priority) VALUES (5, 1);