|
| 1 | +--- |
| 2 | +title: Querying Staged ORC Files in Stage |
| 3 | +sidebar_label: Querying ORC File |
| 4 | +--- |
| 5 | +import StepsWrap from '@site/src/components/StepsWrap'; |
| 6 | +import StepContent from '@site/src/components/Steps/step-content'; |
| 7 | + |
| 8 | +## Syntax |
| 9 | + |
| 10 | +```sql |
| 11 | +SELECT [<alias>.]<column> [, <column> ...] | [<alias>.]$<col_position> [, $<col_position> ...] |
| 12 | +FROM {@<stage_name>[/<path>] [<table_alias>] | '<uri>' [<table_alias>]} |
| 13 | +[( |
| 14 | + [<connection_parameters>], |
| 15 | + [ PATTERN => '<regex_pattern>'], |
| 16 | + [ FILE_FORMAT => 'ORC | <custom_format_name>'], |
| 17 | + [ FILES => ( '<file_name>' [ , '<file_name>' ] [ , ... ] ) ] |
| 18 | +)] |
| 19 | +``` |
| 20 | + |
| 21 | +## Tutorial |
| 22 | + |
| 23 | +In this tutorial, we will walk you through the process of downloading the Iris dataset in ORC format, uploading it to an Amazon S3 bucket, creating an external stage, and querying the data directly from the ORC file. |
| 24 | + |
| 25 | +<StepsWrap> |
| 26 | +<StepContent number="1"> |
| 27 | + |
| 28 | +### Download Iris Dataset |
| 29 | + |
| 30 | +Download the iris dataset from https://github.com/tensorflow/io/raw/master/tests/test_orc/iris.orc then upload it to your Amazon S3 bucket. |
| 31 | + |
| 32 | +The iris dataset contains 3 classes of 50 instances each, where each class refers to a type of iris plant. It has 4 attributes: (1) sepal length, (2) sepal width, (3) petal length, (4) petal width, and the last column contains the class label. |
| 33 | + |
| 34 | +</StepContent> |
| 35 | +<StepContent number="2"> |
| 36 | + |
| 37 | +### Create External Stage |
| 38 | + |
| 39 | +Create an external stage with your Amazon S3 bucket where your iris dataset file is stored. |
| 40 | + |
| 41 | +```sql |
| 42 | +CREATE STAGE orc_query_stage |
| 43 | + URL = 's3://databend-doc' |
| 44 | + CONNECTION = ( |
| 45 | + AWS_KEY_ID = '<your-key-id>', |
| 46 | + AWS_SECRET_KEY = '<your-secret-key>' |
| 47 | + ); |
| 48 | +``` |
| 49 | + |
| 50 | +</StepContent> |
| 51 | +<StepContent number="3"> |
| 52 | + |
| 53 | +### Query ORC File |
| 54 | + |
| 55 | +```sql |
| 56 | +SELECT * |
| 57 | +FROM @orc_query_stage |
| 58 | +( |
| 59 | + FILE_FORMAT => 'orc', |
| 60 | + PATTERN => '.*[.]orc' |
| 61 | +); |
| 62 | + |
| 63 | +┌──────────────────────────────────────────────────────────────────────────────────────────────────┐ |
| 64 | +│ sepal_length │ sepal_width │ petal_length │ petal_width │ species │ |
| 65 | +├───────────────────┼───────────────────┼───────────────────┼───────────────────┼──────────────────┤ |
| 66 | +│ 5.1 │ 3.5 │ 1.4 │ 0.2 │ setosa │ |
| 67 | +│ 4.9 │ 3 │ 1.4 │ 0.2 │ setosa │ |
| 68 | +│ 4.7 │ 3.2 │ 1.3 │ 0.2 │ setosa │ |
| 69 | +│ 4.6 │ 3.1 │ 1.5 │ 0.2 │ setosa │ |
| 70 | +│ 5 │ 3.6 │ 1.4 │ 0.2 │ setosa │ |
| 71 | +│ 5.4 │ 3.9 │ 1.7 │ 0.4 │ setosa │ |
| 72 | +│ 4.6 │ 3.4 │ 1.4 │ 0.3 │ setosa │ |
| 73 | +│ 5 │ 3.4 │ 1.5 │ 0.2 │ setosa │ |
| 74 | +│ 4.4 │ 2.9 │ 1.4 │ 0.2 │ setosa │ |
| 75 | +│ 4.9 │ 3.1 │ 1.5 │ 0.1 │ setosa │ |
| 76 | +│ 5.4 │ 3.7 │ 1.5 │ 0.2 │ setosa │ |
| 77 | +│ 4.8 │ 3.4 │ 1.6 │ 0.2 │ setosa │ |
| 78 | +│ 4.8 │ 3 │ 1.4 │ 0.1 │ setosa │ |
| 79 | +│ 4.3 │ 3 │ 1.1 │ 0.1 │ setosa │ |
| 80 | +│ 5.8 │ 4 │ 1.2 │ 0.2 │ setosa │ |
| 81 | +│ 5.7 │ 4.4 │ 1.5 │ 0.4 │ setosa │ |
| 82 | +│ 5.4 │ 3.9 │ 1.3 │ 0.4 │ setosa │ |
| 83 | +│ 5.1 │ 3.5 │ 1.4 │ 0.3 │ setosa │ |
| 84 | +│ 5.7 │ 3.8 │ 1.7 │ 0.3 │ setosa │ |
| 85 | +│ 5.1 │ 3.8 │ 1.5 │ 0.3 │ setosa │ |
| 86 | +│ · │ · │ · │ · │ · │ |
| 87 | +│ · │ · │ · │ · │ · │ |
| 88 | +│ · │ · │ · │ · │ · │ |
| 89 | +│ 7.4 │ 2.8 │ 6.1 │ 1.9 │ virginica │ |
| 90 | +│ 7.9 │ 3.8 │ 6.4 │ 2 │ virginica │ |
| 91 | +│ 6.4 │ 2.8 │ 5.6 │ 2.2 │ virginica │ |
| 92 | +│ 6.3 │ 2.8 │ 5.1 │ 1.5 │ virginica │ |
| 93 | +│ 6.1 │ 2.6 │ 5.6 │ 1.4 │ virginica │ |
| 94 | +│ 7.7 │ 3 │ 6.1 │ 2.3 │ virginica │ |
| 95 | +│ 6.3 │ 3.4 │ 5.6 │ 2.4 │ virginica │ |
| 96 | +│ 6.4 │ 3.1 │ 5.5 │ 1.8 │ virginica │ |
| 97 | +│ 6 │ 3 │ 4.8 │ 1.8 │ virginica │ |
| 98 | +│ 6.9 │ 3.1 │ 5.4 │ 2.1 │ virginica │ |
| 99 | +│ 6.7 │ 3.1 │ 5.6 │ 2.4 │ virginica │ |
| 100 | +│ 6.9 │ 3.1 │ 5.1 │ 2.3 │ virginica │ |
| 101 | +│ 5.8 │ 2.7 │ 5.1 │ 1.9 │ virginica │ |
| 102 | +│ 6.8 │ 3.2 │ 5.9 │ 2.3 │ virginica │ |
| 103 | +│ 6.7 │ 3.3 │ 5.7 │ 2.5 │ virginica │ |
| 104 | +│ 6.7 │ 3 │ 5.2 │ 2.3 │ virginica │ |
| 105 | +│ 6.3 │ 2.5 │ 5 │ 1.9 │ virginica │ |
| 106 | +│ 6.5 │ 3 │ 5.2 │ 2 │ virginica │ |
| 107 | +│ 6.2 │ 3.4 │ 5.4 │ 2.3 │ virginica │ |
| 108 | +│ 5.9 │ 3 │ 5.1 │ 1.8 │ virginica │ |
| 109 | +│ 150 rows │ │ │ │ │ |
| 110 | +│ (40 shown) │ │ │ │ │ |
| 111 | +└──────────────────────────────────────────────────────────────────────────────────────────────────┘ |
| 112 | +``` |
| 113 | + |
| 114 | +You can also query the remote ORC file directly: |
| 115 | + |
| 116 | +```sql |
| 117 | +SELECT |
| 118 | + * |
| 119 | +FROM |
| 120 | + 'https://github.com/tensorflow/io/raw/master/tests/test_orc/iris.orc' (file_format = > 'orc'); |
| 121 | +``` |
| 122 | + |
| 123 | +</StepContent> |
| 124 | +</StepsWrap> |
0 commit comments