diff --git a/.gitignore b/.gitignore
index 27c87cc1..e6224405 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,3 +13,4 @@ packages
dist/windows/
_benchmark_data*
*.benchmark-results
+generated-site/
diff --git a/.travis.yml b/.travis.yml
index 07f1415e..d7f8ec46 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,11 +1,136 @@
-language: python
-python:
- - "2.7"
- - "3.6"
-matrix:
+sudo: false
+
+stages:
+ - integration
+ - release
+
+env:
+ global:
+ - CACHE_NAME=${TRAVIS_JOB_NAME}
+
+
+_commands_provider:
+
+ _test: &_test make test
+
+ _lint: &_lint make lint
+
+ _release: &_release make local-release
+
+ _install_requirements: &_install_requirements make dep
+
+ # https://ttcshelbyville.wordpress.com/2012/12/19/disable-remote-differential-compression-form-the-command-line/
+ _disable_windows_compression: &_disable_windows_compression "powershell Disable-WindowsOptionalFeature -Online -FeatureName MSRDC-Infrastructure"
+
+ # https://travis-ci.community/t/yarn-network-troubles/333/7
+ _disable_windows_defender: &_disable_windows_defender "powershell Set-MpPreference -DisableRealtimeMonitoring \\$true"
+
+
+_steps_provider:
+
+ _test: &_step_test
+
+ install:
+ - *_install_requirements
+ before_script: *_lint
+ script: *_test
+
+ _release: &_step_release
+
+ install: *_install_requirements
+ script: *_release
+
+
+
+jobs:
include:
- - python: "3.7"
- dist: xenial # Need for python 3.7
-install: pip install -r requirements.txt
-before_script: flake8 ./bin/q ./test/test-suite --count --select=E901,E999,F821,F822,F823 --show-source --statistics
-script: PYTHONIOENCODING=UTF-8 test/test-all
+ - stage: integration
+ name: py27-macos
+ os: osx
+ language: generic
+ osx_image: xcode7.3
+ env:
+ - PYENV_VERSION=2.7.14
+ before_install: source setup-pyenv.sh
+ <<: *_step_test
+ cache:
+ directories:
+ - ${HOME}/.pyenv_cache
+
+ - stage: integration
+ name: py36-macos
+ os: osx
+ language: generic
+ osx_image: xcode7.3
+ env:
+ - PYENV_VERSION=3.6.4
+ before_install: source setup-pyenv.sh
+ <<: *_step_test
+ cache:
+ directories:
+ - ${HOME}/.pyenv_cache
+
+ - stage: integration
+ name: py37-macos
+ os: osx
+ language: generic
+ osx_image: xcode7.3
+ env:
+ - PYENV_VERSION=3.7.3
+ before_install: source setup-pyenv.sh
+ <<: *_step_test
+ cache:
+ directories:
+ - ${HOME}/.pyenv_cache
+
+ - stage: integration
+ name: py27-linux
+ language: python
+ python: "2.7"
+ <<: *_step_test
+
+ - stage: integration
+ name: py36-linux
+ language: python
+ python: "3.6"
+ <<: *_step_test
+
+ - stage: integration
+ name: py37-linux
+ language: python
+ dist: xenial
+ python: "3.7"
+ <<: *_step_test
+
+ - stage: release
+ name: macos
+ os: osx
+ language: generic
+ osx_image: xcode7.3
+ env:
+ - PYENV_VERSION=3.7.3
+ before_install: source setup-pyenv.sh
+ <<: *_step_release
+ cache:
+ directories:
+ - ${HOME}/.pyenv_cache
+
+ - stage: release
+ name: linux
+ language: python
+ dist: xenial
+ python: "3.7"
+ <<: *_step_release
+
+ - stage: release
+ name: windows
+ os: windows
+ language: shell
+ env:
+ - PATH=/c/Python37:/c/Python37/Scripts:$PATH
+ before_install:
+ - *_disable_windows_compression
+ - *_disable_windows_defender
+ - choco install make
+ - choco install python --version 3.7.3
+ <<: *_step_release
diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..e612a54a
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,37 @@
+SHELL := /bin/bash
+
+PROJECT_NAME=$(shell dirname "$0")
+ROOT_DIR:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
+
+.PHONY: test help
+.DEFAULT_GOAL := ci
+
+ci: lint test ## Equivelant to 'make lint test'
+
+help: ## Show this help message.
+
+ @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
+
+dep: ## Install the dependent libraries.
+
+ pip install -r test-requirements.txt
+ pip install -e .
+
+lint: dep ## Run lint validations.
+
+ flake8 q/ --count --select=E901,E999,F821,F822,F823 --show-source --statistics
+
+test: dep ## Run the unit tests.
+
+ test/test-all
+ ## TODO Bring back pytest
+ ## py.test -rs -c pytest.ini -s -v q/tests/suite.py --rootdir .
+
+release: ## Run release
+ pip install py-ci
+ pyci release --no-wheel-publish --wheel-universal
+
+local-release:
+ pip install py-ci
+ ./do-manual-release.sh
+
diff --git a/README.markdown b/README.markdown
index 9477de3d..c8802d60 100644
--- a/README.markdown
+++ b/README.markdown
@@ -10,7 +10,7 @@ q's web site is [http://harelba.github.io/q/](http://harelba.github.io/q/). It c
## Installation.
Extremely simple.
-Instructions for all OSs are [here](http://harelba.github.io/q/install.html).
+Instructions for all OSs are [here](http://harelba.github.io/q/#installation).
## Examples
@@ -20,18 +20,19 @@ q "SELECT COUNT(*) FROM ./clicks_file.csv WHERE c3 > 32.3"
ps -ef | q -H "SELECT UID, COUNT(*) cnt FROM - GROUP BY UID ORDER BY cnt DESC LIMIT 3"
```
-Go [here](http://harelba.github.io/q/examples.html) for more examples.
+Go [here](http://harelba.github.io/q/#examples) for more examples.
## Python API
A development branch for exposing q's capabilities as a Python module can be viewed here, along with examples of the alpha version of the API.
Existing functionality as a command-line tool will not be affected by this. Your input will be most appreciated.
-## Change log
-Click [here](http://harelba.github.io/q/changelog.html) to see the change log.
-
## Contact
Any feedback/suggestions/complaints regarding this tool would be much appreciated. Contributions are most welcome as well, of course.
-Harel Ben-Attia, harelba@gmail.com, [@harelba](https://twitter.com/harelba) on Twitter
+Linkedin: [Harel Ben Attia](https://www.linkedin.com/in/harelba/)
+
+Twitter [@harelba](https://twitter.com/harelba)
+
+Email [harelba@gmail.com](mailto:harelba@gmail.com)
q on twitter: #qtextasdata
diff --git a/bin/__version__.py b/bin/__version__.py
new file mode 100755
index 00000000..f9aa4a0a
--- /dev/null
+++ b/bin/__version__.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python
+
+q_version = '2.0.12'
+
+
+if __name__ == '__main__':
+ print(q_version)
diff --git a/bin/q b/bin/q.py
similarity index 97%
rename from bin/q
rename to bin/q.py
index fbd58791..11c76e50 100755
--- a/bin/q
+++ b/bin/q.py
@@ -30,8 +30,7 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
-
-q_version = "2.0.6"
+from .__version__ import q_version
__all__ = [ 'QTextAsData' ]
@@ -476,16 +475,18 @@ def __init__(self, mode, expected_column_count, input_delimiter, skip_header=Fal
self.rows = []
self.skip_header = skip_header
self.header_row = None
+ self.header_row_filename = None
self.expected_column_count = expected_column_count
self.input_delimiter = input_delimiter
self.disable_column_type_detection = disable_column_type_detection
- def analyze(self, col_vals):
+ def analyze(self, filename, col_vals):
if self.inferred:
raise Exception("Already inferred columns")
if self.skip_header and self.header_row is None:
self.header_row = col_vals
+ self.header_row_filename = filename
else:
self.rows.append(col_vals)
@@ -905,17 +906,36 @@ def _pre_populate(self,dialect):
mfs = MaterializedFileState(filename,f,self.encoding,dialect,is_stdin)
self.materialized_file_dict[filename] = mfs
+ def _should_skip_extra_headers(self, filenumber, filename, mfs, col_vals):
+ if not self.skip_header:
+ return False
+
+ if filenumber == 0:
+ return False
+
+ header_already_exists = self.column_inferer.header_row is not None
+
+ is_extra_header = self.skip_header and mfs.lines_read == 1 and header_already_exists
+
+ if is_extra_header:
+ if tuple(self.column_inferer.header_row) != tuple(col_vals):
+ raise BadHeaderException("Extra header {} in file {} mismatches original header {} from file {}. Table name is {}".format(",".join(col_vals),mfs.filename,",".join(self.column_inferer.header_row),self.column_inferer.header_row_filename,self.filenames_str))
+
+ return is_extra_header
+
def _populate(self,dialect,stop_after_analysis=False):
total_data_lines_read = 0
# For each match
- for filename in self.materialized_file_list:
+ for filenumber,filename in enumerate(self.materialized_file_list):
mfs = self.materialized_file_dict[filename]
try:
try:
for col_vals in mfs.read_file_using_csv():
- self._insert_row(col_vals)
+ if self._should_skip_extra_headers(filenumber,filename,mfs,col_vals):
+ continue
+ self._insert_row(filename, col_vals)
if stop_after_analysis and self.column_inferer.inferred:
return
if mfs.lines_read == 0 and self.skip_header:
@@ -937,7 +957,7 @@ def _populate(self,dialect,stop_after_analysis=False):
if not self.table_created:
self.column_inferer.force_analysis()
- self._do_create_table()
+ self._do_create_table(filename)
if total_data_lines_read == 0:
@@ -960,20 +980,20 @@ def populate(self,dialect,stop_after_analysis=False):
self.state = TableCreatorState.FULLY_READ
return
- def _flush_pre_creation_rows(self):
+ def _flush_pre_creation_rows(self, filename):
for i, col_vals in enumerate(self.pre_creation_rows):
if self.skip_header and i == 0:
# skip header line
continue
- self._insert_row(col_vals)
+ self._insert_row(filename, col_vals)
self._flush_inserts()
self.pre_creation_rows = []
- def _insert_row(self, col_vals):
+ def _insert_row(self, filename, col_vals):
# If table has not been created yet
if not self.table_created:
# Try to create it along with another "example" line of data
- self.try_to_create_table(col_vals)
+ self.try_to_create_table(filename, col_vals)
# If the table is still not created, then we don't have enough data, just
# store the data and return
@@ -1069,19 +1089,19 @@ def _flush_inserts(self):
# print self.db.execute_and_fetch(self.db.generate_end_transaction())
self.buffered_inserts = []
- def try_to_create_table(self, col_vals):
+ def try_to_create_table(self, filename, col_vals):
if self.table_created:
raise Exception('Table is already created')
# Add that line to the column inferer
- result = self.column_inferer.analyze(col_vals)
+ result = self.column_inferer.analyze(filename, col_vals)
# If inferer succeeded,
if result:
- self._do_create_table()
+ self._do_create_table(filename)
else:
pass # We don't have enough information for creating the table yet
- def _do_create_table(self):
+ def _do_create_table(self,filename):
# Then generate a temp table name
self.table_name = self.db.generate_temp_table_name()
# Get the column definition dict from the inferer
@@ -1101,7 +1121,7 @@ def _do_create_table(self):
self.db.execute_and_fetch(create_table_stmt)
# Mark the table as created
self.table_created = True
- self._flush_pre_creation_rows()
+ self._flush_pre_creation_rows(filename)
def drop_table(self):
if self.table_created:
@@ -1122,7 +1142,8 @@ def determine_max_col_lengths(m,output_field_quoting_func,output_delimiter):
def print_credentials():
print("q version %s" % q_version, file=sys.stderr)
- print("Copyright (C) 2012-2017 Harel Ben-Attia (harelba@gmail.com, @harelba on twitter)", file=sys.stderr)
+ print("Python: %s" % " // ".join([str(x).strip() for x in sys.version.split("\n")]), file=sys.stderr)
+ print("Copyright (C) 2012-2019 Harel Ben-Attia (harelba@gmail.com, @harelba on twitter)", file=sys.stderr)
print("http://harelba.github.io/q/", file=sys.stderr)
print(file=sys.stderr)
@@ -1403,7 +1424,7 @@ def _execute(self,query_str,input_params=None,stdin_file=None,stdin_filename='-'
msg = str(e)
error = QError(e,"query error: %s" % msg,1)
if "no such column" in msg and effective_input_params.skip_header:
- warnings.append(QWarning(e,'Warning - There seems to be a "no such column" error, and -H (header line) exists. Please make sure that you are using the column names from the header line and not the default (cXX) column names'))
+ warnings.append(QWarning(e,'Warning - There seems to be a "no such column" error, and -H (header line) exists. Please make sure that you are using the column names from the header line and not the default (cXX) column names. Another issue might be that the file contains a BOM. Files that are encoded with UTF8 and contain a BOM can be read by specifying `-e utf-9-sig` in the command line. Support for non-UTF8 encoding will be provided in the future.'))
except ColumnCountMismatchException as e:
error = QError(e,e.msg,2)
except (UnicodeDecodeError, UnicodeError) as e:
diff --git a/dist/create-rpm b/dist/create-rpm
index db1a255b..8c247f99 100755
--- a/dist/create-rpm
+++ b/dist/create-rpm
@@ -40,12 +40,12 @@ then
exit 1
fi
-curl -o ${rpm_build_area}/SOURCES/q.tar.gz -L -R "https://github.com/harelba/q/tarball/$BASED_ON_TAG"
+curl -f -o ${rpm_build_area}/SOURCES/q.tar.gz -L -R "https://github.com/harelba/q/tarball/$BASED_ON_TAG"
mkdir -p ${rpm_build_area}/SOURCES
pushd ${rpm_build_area}/SOURCES >/dev/null
tar xvzf ./q.tar.gz --strip-components=1
rm -vf ./q.tar.gz
-curl -o ./bin/q -L -R "https://github.com/harelba/packages-for-q/raw/master/single-binary/x86_64/${VERSION}/q"
+curl -f -o ./bin/q -L -R "https://github.com/harelba/packages-for-q/raw/master/single-binary/x86_64/${VERSION}/q"
chmod +x ./bin/q
popd >/dev/null
find ${rpm_build_area}/ -ls
diff --git a/do-manual-release.sh b/do-manual-release.sh
new file mode 100755
index 00000000..33e68ada
--- /dev/null
+++ b/do-manual-release.sh
@@ -0,0 +1,29 @@
+#!/bin/bash -x
+
+set -e
+
+VERSION=$(bin/__version__.py)
+
+echo "Packing binary for $TRAVIS_OS_NAME"
+
+if [[ "$TRAVIS_OS_NAME" == "osx" || "$TRAVIS_OS_NAME" == "linux" ]]
+then
+ echo "Packing $TRAVIS_OS_NAME installer - packing binary"
+ pyci pack --repo harelba/q --sha $VERSION binary
+ echo "Packing $TRAVIS_OS_NAME installer - uploading"
+ pyci github upload-asset --asset q-$(uname -m)-$(uname -s) --release $VERSION
+else
+ echo "Packing windows installer - packing binary"
+ pyci pack --repo harelba/q --sha $VERSION binary
+ echo "Packing windows installer - listing files"
+ find `pwd` -ls | grep -v \.git/
+ echo "Packing windows installer - packing nsis"
+ BINARY_LOCATION="c:\\Users\\travis\\build\\harelba\\q\\q-AMD64-Windows.exe"
+ pyci pack nsis --program-files-dir q-TextAsData --binary-path $BINARY_LOCATION --version ${VERSION}.0
+ echo "Packing windows installer - uploading"
+ pyci github upload-asset --asset $BINARY_LOCATION --release $VERSION
+ SETUP_LOCATION="c:\\Users\\travis\\build\\harelba\\q\\q-AMD64-Windows-installer.exe"
+ pyci github upload-asset --asset $SETUP_LOCATION --release $VERSION
+fi
+
+echo "done"
diff --git a/mkdocs/docs/.DS_Store b/mkdocs/docs/.DS_Store
new file mode 100644
index 00000000..6f61d6dc
Binary files /dev/null and b/mkdocs/docs/.DS_Store differ
diff --git a/mkdocs/docs/about.md b/mkdocs/docs/about.md
new file mode 100644
index 00000000..b0e09e45
--- /dev/null
+++ b/mkdocs/docs/about.md
@@ -0,0 +1,8 @@
+# About
+
+### Linkedin: [Harel Ben Attia](https://www.linkedin.com/in/harelba/)
+
+### Twitter [@harelba](https://twitter.com/harelba)
+
+### Email [harelba@gmail.com](mailto:harelba@gmail.com)
+
diff --git a/mkdocs/docs/fsg9b9b1.txt b/mkdocs/docs/fsg9b9b1.txt
new file mode 100644
index 00000000..e69de29b
diff --git a/mkdocs/docs/google0efeb4ff0a886e81.html b/mkdocs/docs/google0efeb4ff0a886e81.html
new file mode 100644
index 00000000..24947e8a
--- /dev/null
+++ b/mkdocs/docs/google0efeb4ff0a886e81.html
@@ -0,0 +1 @@
+google-site-verification: google0efeb4ff0a886e81.html
\ No newline at end of file
diff --git a/mkdocs/docs/img/bg_hr.png b/mkdocs/docs/img/bg_hr.png
new file mode 100644
index 00000000..7973bd69
Binary files /dev/null and b/mkdocs/docs/img/bg_hr.png differ
diff --git a/mkdocs/docs/img/blacktocat.png b/mkdocs/docs/img/blacktocat.png
new file mode 100644
index 00000000..6e264fe5
Binary files /dev/null and b/mkdocs/docs/img/blacktocat.png differ
diff --git a/mkdocs/docs/img/icon_download.png b/mkdocs/docs/img/icon_download.png
new file mode 100644
index 00000000..a2a287f6
Binary files /dev/null and b/mkdocs/docs/img/icon_download.png differ
diff --git a/mkdocs/docs/img/q-logo.png b/mkdocs/docs/img/q-logo.png
new file mode 100644
index 00000000..ac9599c3
Binary files /dev/null and b/mkdocs/docs/img/q-logo.png differ
diff --git a/mkdocs/docs/img/q-logo1.ico b/mkdocs/docs/img/q-logo1.ico
new file mode 100644
index 00000000..427a0d96
Binary files /dev/null and b/mkdocs/docs/img/q-logo1.ico differ
diff --git a/mkdocs/docs/img/q-logo1.png b/mkdocs/docs/img/q-logo1.png
new file mode 100644
index 00000000..10380ecf
Binary files /dev/null and b/mkdocs/docs/img/q-logo1.png differ
diff --git a/mkdocs/docs/img/sprite_download.png b/mkdocs/docs/img/sprite_download.png
new file mode 100644
index 00000000..f2babd57
Binary files /dev/null and b/mkdocs/docs/img/sprite_download.png differ
diff --git a/mkdocs/docs/img/sprite_download3.png b/mkdocs/docs/img/sprite_download3.png
new file mode 100644
index 00000000..9fd451ac
Binary files /dev/null and b/mkdocs/docs/img/sprite_download3.png differ
diff --git a/mkdocs/docs/img/sprite_download4.png b/mkdocs/docs/img/sprite_download4.png
new file mode 100644
index 00000000..db6e5186
Binary files /dev/null and b/mkdocs/docs/img/sprite_download4.png differ
diff --git a/mkdocs/docs/img/torii-favicon.ico b/mkdocs/docs/img/torii-favicon.ico
new file mode 100644
index 00000000..86da98b7
Binary files /dev/null and b/mkdocs/docs/img/torii-favicon.ico differ
diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md
new file mode 100644
index 00000000..777d4cc6
--- /dev/null
+++ b/mkdocs/docs/index.md
@@ -0,0 +1,381 @@
+# q - Run SQL directly on CSV or TSV files
+
+[](https://GitHub.com/harelba/q/stargazers/)
+[](https://GitHub.com/harelba/q/network/)
+
+
+## Overview
+q is a command line tool that allows direct execution of SQL-like queries on CSVs/TSVs (and any other tabular text files).
+
+q treats ordinary files as database tables, and supports all SQL constructs, such as WHERE, GROUP BY, JOINs etc. It supports automatic column name and column type detection, and provides full support for multiple encodings.
+
+``` bash
+q "SELECT COUNT(*) FROM ./clicks_file.csv WHERE c3 > 32.3"
+```
+
+``` bash
+ps -ef | q -H "SELECT UID,COUNT(*) cnt FROM - GROUP BY UID ORDER BY cnt DESC LIMIT 3"
+```
+
+Look at some examples [here](#examples), or just download the tool using the links in the [installation](#installation) below and play with it.
+
+| | |
+|:--------------------------------------:|:-----------------------------------------------:|
+| 完全支持所有的字符编码 | すべての文字エンコーディングを完全にサポート |
+| 모든 문자 인코딩이 완벽하게 지원됩니다 | все кодировки символов полностью поддерживаются |
+
+**Non-english users:** q fully supports all types of encoding. Use `-e data-encoding` to set the input data encoding, `-Q query-encoding` to set the query encoding, and use `-E output-encoding` to set the output encoding. Sensible defaults are in place for all three parameters. Please contact me if you encounter any issues and I'd be glad to help.
+
+**Files with BOM:** Files which contain a BOM ([Byte Order Mark](https://en.wikipedia.org/wiki/Byte_order_mark)) are not properly supported inside python's csv module. q contains a workaround that allows reading UTF8 files which contain a BOM - Use `-e utf-8-sig` for this. I plan to separate the BOM handling from the encoding itself, which would allow to support BOMs for all encodings.
+
+## Installation
+
+| Format | Instructions | Comments |
+:---|:---|:---|
+|[OSX](https://github.com/harelba/packages-for-q/raw/master/single-binary/Darwin/2.0.9/q)|Download the executable from the link on the left, make it executable, and use it.|`brew install q` currently downloads the older version `1.7.1`. I'll update it to install the new version soon||
+|[RPM Package](https://github.com/harelba/packages-for-q/raw/master/rpms/q-text-as-data-2.0.9-1.x86_64.rpm)| run `rpm -ivh ` or `rpm -U ` if you already have an older version of q.| A man page is available for this release. Just enter man q.|
+|[DEB Package](https://github.com/harelba/packages-for-q/raw/master/deb/q-text-as-data_2.0.9-2_amd64.deb)| Run `sudo dpkg -i `|A man page is available for this release. Just enter `man q`.|
+|[Windows Installer](https://github.com/harelba/packages-for-q/raw/master/windows/setup-q-2.0.9.exe)|Run the installer executable and hit next next next... q.exe will be added to the PATH so you can access it everywhere.|Windows doesn't update the PATH retroactively for open windows, so you'll need to open a new cmd window after the installation is done.|
+|[tar.gz](https://github.com/harelba/q/archive/2.0.9.tar.gz)|Full source file tree for latest stable version||
+|[zip](https://github.com/harelba/q/archive/2.0.9.zip)|Full source file tree for the latest stable version||
+
+**Older versions can be downloaded [here](https://github.com/harelba/packages-for-q). Please let me know if you plan on using an older version, and why - I know of no reason to use any of them.**
+
+## Requirements
+As of version `2.0.9`, there's no need for any external dependency. Python itself (3.7), and any needed libraries are self-contained inside the installation, isolated from the rest of your system.
+
+## Usage
+
+``` bash
+q ""
+
+ Simplest execution is `q "SELECT * FROM myfile"` which prints the entire file.
+```
+
+q allows performing SQL-like statements on tabular text data. Its purpose is to bring SQL expressive power to the Linux command line and to provide easy access to text as actual data.
+
+Query should be an SQL-like query which contains *filenames instead of table names* (or - for stdin). The query itself should be provided as one parameter to the tool (i.e. enclosed in quotes). Multiple files can be used as one table by either writing them as `filename1+filename2+...` or by using shell wildcards (e.g. `my_files*.csv`).
+
+Use `-H` to signify that the input contains a header line. Column names will be detected automatically in that case, and can be used in the query. If this option is not provided, columns will be named cX, starting with 1 (e.g. `q "SELECT c3,c8 from ..."`).
+
+Use `-d` to specify the input delimiter.
+
+Column types are auto detected by the tool, no casting is needed. Note that there's a flag `--as-text` which forces all columns to be treated as text columns.
+
+Please note that column names that include spaces need to be used in the query with back-ticks, as per the sqlite standard.
+
+Query/Input/Output encodings are fully supported (and q tries to provide out-of-the-box usability in that area). Please use `-e`,`-E` and `-Q` to control encoding if needed.
+
+All sqlite3 SQL constructs are supported, including joins across files (use an alias for each table). Take a look at the [limitations](#limitations) section below for some rarely-used use cases which are not fully supported.
+
+### Query
+Each parameter that q gets is a full SQL query. All queries are executed one after another, outputing the results to standard output. Note that data loading is done only once, so when passing multiple queries on the same command-line, only the first one will take a long time. The rest will starting running almost instantanously, since all the data will already have been loaded. Remeber to double-quote each of the queries - Each parameter is a full SQL query.
+
+Any standard SQL expression, condition (both WHERE and HAVING), GROUP BY, ORDER BY etc. are allowed.
+
+JOINs are supported and Subqueries are supported in the WHERE clause, but unfortunately not in the FROM clause for now. Use table aliases when performing JOINs.
+
+The SQL syntax itself is sqlite's syntax. For details look at http://www.sqlite.org/lang.html or search the net for examples.
+
+NOTE: Full type detection is implemented, so there is no need for any casting or anything.
+
+NOTE2: When using the `-O` output header option, use column name aliases if you want to control the output column names. For example, `q -O -H "select count(*) cnt,sum(*) as mysum from -"` would output `cnt` and `mysum` as the output header column names.
+
+### Flags
+
+``` bash
+Usage:
+ q allows performing SQL-like statements on tabular text data.
+
+ Its purpose is to bring SQL expressive power to manipulating text data using the Linux command line.
+
+ Basic usage is q "" where table names are just regular file names (Use - to read from standard input)
+ When the input contains a header row, use -H, and column names will be set according to the header row content. If there isn't a header row, then columns will automatically be named c1..cN.
+
+ Column types are detected automatically. Use -A in order to see the column name/type analysis.
+
+ Delimiter can be set using the -d (or -t) option. Output delimiter can be set using -D
+
+ All sqlite3 SQL constructs are supported.
+
+ Examples:
+
+ Example 1: ls -ltrd * | q "select c1,count(1) from - group by c1"
+ This example would print a count of each unique permission string in the current folder.
+
+ Example 2: seq 1 1000 | q "select avg(c1),sum(c1) from -"
+ This example would provide the average and the sum of the numbers in the range 1 to 1000
+
+ Example 3: sudo find /tmp -ls | q "select c5,c6,sum(c7)/1024.0/1024 as total from - group by c5,c6 order by total desc"
+ This example will output the total size in MB per user+group in the /tmp subtree
+
+
+ See the help or https://github.com/harelba/q/ for more details.
+
+
+Options:
+ -h, --help show this help message and exit
+ -v, --version Print version
+ -V, --verbose Print debug info in case of problems
+ -S SAVE_DB_TO_DISK_FILENAME, --save-db-to-disk=SAVE_DB_TO_DISK_FILENAME
+ Save database to an sqlite database file
+ --save-db-to-disk-method=SAVE_DB_TO_DISK_METHOD
+ Method to use to save db to disk. 'standard' does not
+ require any deps, 'fast' currenty requires manually
+ running `pip install sqlitebck` on your python
+ installation. Once packing issues are solved, the fast
+ method will be the default.
+
+ Input Data Options:
+ -H, --skip-header Skip header row. This has been changed from earlier
+ version - Only one header row is supported, and the
+ header row is used for column naming
+ -d DELIMITER, --delimiter=DELIMITER
+ Field delimiter. If none specified, then space is used
+ as the delimiter.
+ -t, --tab-delimited
+ Same as -d . Just a shorthand for handling
+ standard tab delimited file You can use $'\t' if you
+ want (this is how Linux expects to provide tabs in the
+ command line
+ -e ENCODING, --encoding=ENCODING
+ Input file encoding. Defaults to UTF-8. set to none
+ for not setting any encoding - faster, but at your own
+ risk...
+ -z, --gzipped Data is gzipped. Useful for reading from stdin. For
+ files, .gz means automatic gunzipping
+ -A, --analyze-only Analyze sample input and provide information about
+ data types
+ -m MODE, --mode=MODE
+ Data parsing mode. fluffy, relaxed and strict. In
+ strict mode, the -c column-count parameter must be
+ supplied as well
+ -c COLUMN_COUNT, --column-count=COLUMN_COUNT
+ Specific column count when using relaxed or strict
+ mode
+ -k, --keep-leading-whitespace
+ Keep leading whitespace in values. Default behavior
+ strips leading whitespace off values, in order to
+ provide out-of-the-box usability for simple use cases.
+ If you need to preserve whitespace, use this flag.
+ --disable-double-double-quoting
+ Disable support for double double-quoting for escaping
+ the double quote character. By default, you can use ""
+ inside double quoted fields to escape double quotes.
+ Mainly for backward compatibility.
+ --disable-escaped-double-quoting
+ Disable support for escaped double-quoting for
+ escaping the double quote character. By default, you
+ can use \" inside double quoted fields to escape
+ double quotes. Mainly for backward compatibility.
+ --as-text Don't detect column types - All columns will be
+ treated as text columns
+ -w INPUT_QUOTING_MODE, --input-quoting-mode=INPUT_QUOTING_MODE
+ Input quoting mode. Possible values are all, minimal
+ and none. Note the slightly misleading parameter name,
+ and see the matching -W parameter for output quoting.
+ -M MAX_COLUMN_LENGTH_LIMIT, --max-column-length-limit=MAX_COLUMN_LENGTH_LIMIT
+ Sets the maximum column length.
+ -U, --with-universal-newlines
+ Expect universal newlines in the data. Limitation: -U
+ works only with regular files for now, stdin or .gz
+ files are not supported yet.
+
+ Output Options:
+ -D OUTPUT_DELIMITER, --output-delimiter=OUTPUT_DELIMITER
+ Field delimiter for output. If none specified, then
+ the -d delimiter is used if present, or space if no
+ delimiter is specified
+ -T, --tab-delimited-output
+ Same as -D . Just a shorthand for outputting tab
+ delimited output. You can use -D $'\t' if you want.
+ -O, --output-header
+ Output header line. Output column-names are determined
+ from the query itself. Use column aliases in order to
+ set your column names in the query. For example,
+ 'select name FirstName,value1/value2 MyCalculation
+ from ...'. This can be used even if there was no
+ header in the input.
+ -b, --beautify Beautify output according to actual values. Might be
+ slow...
+ -f FORMATTING, --formatting=FORMATTING
+ Output-level formatting, in the format X=fmt,Y=fmt
+ etc, where X,Y are output column numbers (e.g. 1 for
+ first SELECT column etc.
+ -E OUTPUT_ENCODING, --output-encoding=OUTPUT_ENCODING
+ Output encoding. Defaults to 'none', leading to
+ selecting the system/terminal encoding
+ -W OUTPUT_QUOTING_MODE, --output-quoting-mode=OUTPUT_QUOTING_MODE
+ Output quoting mode. Possible values are all, minimal,
+ nonnumeric and none. Note the slightly misleading
+ parameter name, and see the matching -w parameter for
+ input quoting.
+
+ Query Related Options:
+ -q QUERY_FILENAME, --query-filename=QUERY_FILENAME
+ Read query from the provided filename instead of the
+ command line, possibly using the provided query
+ encoding (using -Q).
+ -Q QUERY_ENCODING, --query-encoding=QUERY_ENCODING
+ query text encoding. Experimental. Please send your
+ feedback on this
+```
+
+## Examples
+The `-H` flag in the examples below signifies that the file has a header row which is used for naming columns.
+
+The `-t` flag is just a shortcut for saying that the file is a tab-separated file (any delimiter is supported - Use the `-d` flag).
+
+Queries are given using upper case for clarity, but actual query keywords such as SELECT and WHERE are not really case sensitive.
+
+Example List:
+
+* [Example 1 - COUNT DISTINCT values of specific field (uuid of clicks data)](#example-1)
+* [Example 2 - Filter numeric data, controlling ORDERing and LIMITing output](#example-2)
+* [Example 3 - Illustrate GROUP BY](#example-3)
+* [Example 4 - More complex GROUP BY (group by time expression)](#example-4)
+* [Example 5 - Read input from standard input](#example-5)
+* [Example 6 - Use column names from header row](#example-6)
+* [Example 7 - JOIN two files](#example-7)
+
+### Example 1
+Perform a COUNT DISTINCT values of specific field (uuid of clicks data).
+
+``` bash
+q -H -t "SELECT COUNT(DISTINCT(uuid)) FROM ./clicks.csv"
+```
+Output
+``` bash
+229
+```
+### Example 2
+Filter numeric data, controlling ORDERing and LIMITing output
+
+Note that q understands that the column is numeric and filters according to its numeric value (real numeric value comparison, not string comparison).
+
+``` bash
+q -H -t "SELECT request_id,score FROM ./clicks.csv WHERE score > 0.7 ORDER BY score DESC LIMIT 5"
+```
+Output:
+``` bash
+2cfab5ceca922a1a2179dc4687a3b26e 1.0
+f6de737b5aa2c46a3db3208413a54d64 0.986665809568
+766025d25479b95a224bd614141feee5 0.977105183282
+2c09058a1b82c6dbcf9dc463e73eddd2 0.703255121794
+```
+
+### Example 3
+Illustrate GROUP BY
+
+``` bash
+q -t -H "SELECT hashed_source_machine,count(*) FROM ./clicks.csv GROUP BY hashed_source_machine"
+```
+Output:
+``` bash
+47d9087db433b9ba.domain.com 400000
+```
+
+### Example 4
+More complex GROUP BY (group by time expression)
+
+``` bash
+q -t -H "SELECT strftime('%H:%M',date_time) hour_and_minute,count(*) FROM ./clicks.csv GROUP BY hour_and_minute"
+```
+Output:
+``` bash
+07:00 138148
+07:01 140026
+07:02 121826
+```
+
+### Example 5
+Read input from standard input
+
+Calculates the total size per user/group in the /tmp subtree.
+
+``` bash
+sudo find /tmp -ls | q "SELECT c5,c6,sum(c7)/1024.0/1024 AS total FROM - GROUP BY c5,c6 ORDER BY total desc"
+```
+Output:
+``` bash
+mapred hadoop 304.00390625
+root root 8.0431451797485
+smith smith 4.34389972687
+```
+
+### Example 6
+Use column names from header row
+
+Calculate the top 3 user ids with the largest number of owned processes, sorted in descending order.
+
+Note the usage of the autodetected column name UID in the query.
+
+``` bash
+ps -ef | q -H "SELECT UID,COUNT(*) cnt FROM - GROUP BY UID ORDER BY cnt DESC LIMIT 3"
+```
+Output:
+``` bash
+root 152
+harel 119
+avahi 2
+```
+
+### Example 7
+JOIN two files
+
+The following command joins an ls output (exampledatafile) and a file containing rows of group-name,email (group-emails-example) and provides a row of filename,email for each of the emails of the group. For brevity of output, there is also a filter for a specific filename called ppp which is achieved using a WHERE clause.
+
+``` bash
+q "SELECT myfiles.c8,emails.c2 FROM exampledatafile myfiles JOIN group-emails-example emails ON (myfiles.c4 = emails.c1) WHERE myfiles.c8 = 'ppp'"
+```
+Output:
+``` bash
+ppp dip.1@otherdomain.com
+ppp dip.2@otherdomain.com
+```
+
+You can see that the ppp filename appears twice, each time matched to one of the emails of the group dip to which it belongs. Take a look at the files `exampledatafile` and `group-emails-example` for the data.
+
+Column name detection is supported for JOIN scenarios as well. Just specify `-H` in the command line and make sure that the source files contain the header rows.
+
+## Implementation
+The current implementation is written in Python using an in-memory database, in order to prevent the need for external dependencies. The implementation itself supports SELECT statements, including JOINs (Subqueries are supported only in the WHERE clause for now). If you want to do further analysis on the data, you can use the `--save-db-to-disk` option to write the resulting tables to an sqlite database file, and then use `seqlite3` in order to perform queries on the data separately from q itself.
+
+Please note that there is currently no checks and bounds on data size - It's up to the user to make sure things don't get too big.
+
+Please make sure to read the [limitations](#limitations) section as well.
+
+## Development
+
+### Tests
+The code includes a test suite runnable through `test/test-all`. If you're planning on sending a pull request, I'd appreciate if you could make sure that it doesn't fail.
+
+## Limitations
+Here's the list of known limitations. Please contact me if you have a use case that needs any of those missing capabilities.
+
+* `FROM ` is not supported
+* Common Table Expressions (CTE) are not supported
+* Spaces in file names are not supported. Use stdin for piping the data into q, or rename the file
+* Some rare cases of subqueries are not supported yet.
+
+## Rationale
+Have you ever stared at a text file on the screen, hoping it would have been a database so you could ask anything you want about it? I had that feeling many times, and I've finally understood that it's not the database that I want. It's the language - SQL.
+
+SQL is a declarative language for data, and as such it allows me to define what I want without caring about how exactly it's done. This is the reason SQL is so powerful, because it treats data as data and not as bits and bytes (and chars).
+
+The goal of this tool is to provide a bridge between the world of text files and of SQL.
+
+### Why aren't other Linux tools enough?
+The standard Linux tools are amazing and I use them all the time, but the whole idea of Linux is mixing-and-matching the best tools for each part of job. This tool adds the declarative power of SQL to the Linux toolset, without loosing any of the other tools' benefits. In fact, I often use q together with other Linux tools, the same way I pipe awk/sed and grep together all the time.
+
+One additional thing to note is that many Linux tools treat text as text and not as data. In that sense, you can look at q as a meta-tool which provides access to all the data-related tools that SQL provides (e.g. expressions, ordering, grouping, aggregation etc.).
+
+### Philosophy
+This tool has been designed with general Linux/Unix design principles in mind. If you're interested in these general design principles, read this amazing [book](http://catb.org/~esr/writings/taoup/) and specifically [this part](http://catb.org/~esr/writings/taoup/html/ch01s06.html). If you believe that the way this tool works goes strongly against any of the principles, I would love to hear your view about it.
+
+## Future
+
+* Expose python as a python module - Mostly implemented. Requires some internal API changes with regard to handling stdin before exposing it.
+* Allow to use a distributed backend for scaling the computations
+
+
diff --git a/mkdocs/docs/js/google-analytics.js b/mkdocs/docs/js/google-analytics.js
new file mode 100644
index 00000000..4c0829d7
--- /dev/null
+++ b/mkdocs/docs/js/google-analytics.js
@@ -0,0 +1,59 @@
+// Monitor all download links in GA
+
+var dlCnt = 0;
+
+function GAizeDownloadLink(a) {
+ var url = a.href;
+ var x = url.indexOf("?");
+ if (x != -1) {
+ url = url.substr(0, x);
+ }
+ var url_test = url.match(/^https?:\/\/.+(\/rpms\/.*\.rpm|\/deb\/.*\.deb|\/single-binary\/Darwin\/.*\/q|\/archive\/.*\.tar\.gz|\/archive\/.*\.zip|\/windows\/.*\.exe)$/i);
+ if (url_test) {
+ console.log("Converting download link to be GA aware: " + url);
+ if (url_test.length > 1) {
+ var event_action = url_test[1];
+ } else {
+ var event_action = 'unknown_action';
+ }
+ a.event_action = event_action;
+ dlCnt = dlCnt + 1;
+ a.onclick = function() {
+ console.log("Sending GA event for link" + url);
+ var that = this;
+ gtag('event','perform download', { 'event_category': 'Downloads', 'event_label': 'Download ' + this.event_action , 'value': 1 });
+ setTimeout(function() {
+ location.href = that.href;
+ }, 500);
+ return false;
+ };
+ }
+}
+
+function GAizeTOCLink(l) {
+ l.onclick = function() {
+ url_test = l.href.match(/^https?:\/\/.+(#.*)$/i);
+ toc_name = url_test[1];
+ var that = this;
+ console.log("Sending GA event for toc link " + this.href);
+
+ gtag('event','navigate', { 'event_category': 'Navigation', 'event_label': 'go to ' + toc_name, 'value': 1 });
+ setTimeout(function() {
+ location.href = that.href;
+ }, 250);
+ return false;
+ };
+
+}
+
+window.onload = function() {
+ var anchors = document.getElementsByTagName('a');
+ for (i = 0; i < anchors.length; i++) {
+ GAizeDownloadLink(anchors[i]);
+ }
+ var toc_links = document.querySelectorAll('div.md-sidebar[data-md-component=toc] a.md-nav__link');
+ for (i = 0; i < toc_links.length; i++) {
+ GAizeTOCLink(toc_links[i]);
+ }
+ console.log("Converted " + dlCnt + " links to be GA aware");
+}
diff --git a/mkdocs/docs/stylesheets/extra.css b/mkdocs/docs/stylesheets/extra.css
new file mode 100644
index 00000000..74eb969f
--- /dev/null
+++ b/mkdocs/docs/stylesheets/extra.css
@@ -0,0 +1,38 @@
+
+div.md-content pre {
+ background-color: black;
+ color: #41FF00;
+}
+
+.md-typeset code pre {
+ background-color: black;
+ color: #41FF00;
+}
+
+.md-typeset p code {
+ color: rgba(0,0,0,.87);
+}
+
+.md-typeset code.bash {
+ color: #41FF00;
+}
+
+.md-typeset__scrollwrap {
+ text-align: center;
+}
+
+.md-typeset .headerlink {
+ opacity: 50%;
+}
+
+article.md-content__inner.md-typeset>p {
+ text-align: center;
+}
+
+.md-nav__link[data-md-state=blur] {
+ color: rgba(0.3,0.5,0.4,.4)
+}
+
+.md-nav__link[data-md-state=current] {
+ font-weight: 700;
+}
diff --git a/mkdocs/generate-web-site.sh b/mkdocs/generate-web-site.sh
new file mode 100755
index 00000000..0014729b
--- /dev/null
+++ b/mkdocs/generate-web-site.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+mkdocs build -c -s -d ./generated-site
diff --git a/mkdocs/mkdocs.yml b/mkdocs/mkdocs.yml
new file mode 100644
index 00000000..a47cdf34
--- /dev/null
+++ b/mkdocs/mkdocs.yml
@@ -0,0 +1,48 @@
+site_name: q - Text as Data
+site_url: http://harelba.github.io/q/
+repo_url: http://github.com/harelba/q
+edit_uri: ""
+site_description: Text as Data - q is a command line tool that allows direct execution of SQL-like queries on CSVs/TSVs (and any other tabular text files).
+site_author: Harel Ben-Attia
+copyright: 'Copyright © 2012-2019 Harel Ben-Attia'
+google_analytics:
+ - "UA-48316355-1"
+ - "auto"
+nav:
+ - Home: index.md
+ - About: about.md
+theme:
+ name: material
+ language: 'en'
+ palette:
+ primary: purple
+ accent: amber
+ fonts:
+ text: 'Roboto'
+ code: 'Roboto Mono'
+ favicon: 'img/q-logo1.ico'
+ logo: 'img/q-logo1.ico'
+ custom_dir: 'theme'
+extra:
+ social:
+ - type: 'github'
+ link: 'https://github.com/harelba'
+ - type: 'twitter'
+ link: 'https://twitter.com/harelba'
+ - type: 'linkedin'
+ link: 'https://www.linkedin.com/in/harelba'
+extra_css:
+ - 'stylesheets/extra.css'
+extra_javascript:
+ - 'js/google-analytics.js'
+markdown_extensions:
+ - meta
+ - toc:
+ permalink: true
+ - tables
+ - fenced_code
+ - admonition
+ # - codehilite
+
+
+
diff --git a/mkdocs/requirements.txt b/mkdocs/requirements.txt
new file mode 100644
index 00000000..b9a11148
--- /dev/null
+++ b/mkdocs/requirements.txt
@@ -0,0 +1,28 @@
+Click==7.0
+Deprecated==1.2.7
+Jinja2==2.10.3
+Markdown==3.1.1
+MarkupSafe==1.1.1
+PyGithub==1.45
+PyJWT==1.7.1
+PyYAML==5.3
+Pygments==2.5.2
+certifi==2019.11.28
+chardet==3.0.4
+htmlmin==0.1.12
+idna==2.8
+jsmin==2.2.2
+livereload==2.6.1
+mkdocs-bootstrap4==0.1.2
+mkdocs-bootswatch==1.0
+mkdocs-git-committers-plugin==0.1.8
+mkdocs-material==4.6.0
+mkdocs-minify-plugin==0.2.1
+mkdocs==1.0.4
+pep562==1.0
+pymdown-extensions==6.2.1
+requests==2.22.0
+six==1.14.0
+tornado==6.0.3
+urllib3==1.25.8
+wrapt==1.11.2
diff --git a/mkdocs/theme/main.html b/mkdocs/theme/main.html
new file mode 100644
index 00000000..8fd1489e
--- /dev/null
+++ b/mkdocs/theme/main.html
@@ -0,0 +1,28 @@
+{% extends "base.html" %}
+
+{% block analytics %}
+
+{% set analytics = config.google_analytics %}
+
+
+{% endblock %}
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 00000000..9d60edec
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+log_print = True
diff --git a/setup-pyenv.sh b/setup-pyenv.sh
new file mode 100644
index 00000000..6b29d86d
--- /dev/null
+++ b/setup-pyenv.sh
@@ -0,0 +1,134 @@
+#!/usr/bin/env bash
+# NOTE: This script needs to be sourced so it can modify the environment.
+#
+# Environment variables that can be set:
+# - PYENV_VERSION
+# Python to install [required]
+# - PYENV_VERSION_STRING
+# String to `grep -F` against the output of `python --version` to validate
+# that the correct Python was installed (recommended) [default: none]
+# - PYENV_ROOT
+# Directory in which to install pyenv [default: ~/.travis-pyenv]
+# - PYENV_RELEASE
+# Release tag of pyenv to download [default: clone from master]
+# - PYENV_CACHE_PATH
+# Directory where full Python builds are cached (i.e., for Travis)
+
+# PYENV_ROOT is exported because pyenv uses it
+export PYENV_ROOT="${PYENV_ROOT:-$HOME/.travis-pyenv}"
+export PYTHON_CONFIGURE_OPTS="--enable-shared"
+PYENV_CACHE_PATH="${PYENV_CACHE_PATH:-$HOME/.pyenv_cache}"
+version_cache_path="$PYENV_CACHE_PATH/$PYENV_VERSION"
+version_pyenv_path="$PYENV_ROOT/versions/$PYENV_VERSION"
+
+# Functions
+#
+# verify_python -- attempts to call the Python command or binary
+# supplied in the first argument with the --version flag. If
+# PYENV_VERSION_STRING is set, then it validates the returned version string
+# as well (using grep -F). Returns whatever status code the command returns.
+verify_python() {
+ local python_bin="$1"; shift
+
+ if [[ -n "$PYENV_VERSION_STRING" ]]; then
+ "$python_bin" --version 2>&1 | grep -F "$PYENV_VERSION_STRING" &>/dev/null
+ else
+ "$python_bin" --version &>/dev/null
+ fi
+}
+
+# use_cached_python -- Tries symlinking to the cached PYENV_VERSION and
+# verifying that it's a working build. Returns 0 if it's found and it
+# verifies, otherwise returns 1.
+use_cached_python() {
+ if [[ -d "$version_cache_path" ]]; then
+ printf "Cached python found, %s. Verifying..." "$PYENV_VERSION"
+ ln -s "$version_cache_path" "$version_pyenv_path"
+ if verify_python "$version_pyenv_path/bin/python"; then
+ printf "success!\n"
+ return 0
+ else
+ printf "FAILED.\nClearing cached version..."
+ rm -f "$version_pyenv_path"
+ rm -rf "$version_cache_path"
+ printf "done.\n"
+ return 1
+ fi
+ else
+ echo "No cached python found."
+ return 1
+ fi
+}
+
+# output_debugging_info -- Outputs useful debugging information
+output_debugging_info() {
+ echo "**** Debugging information"
+ printf "PYENV_VERSION\n%s\n" "$PYENV_VERSION"
+ printf "PYENV_VERSION_STRING\n%s\n" "$PYENV_VERSION_STRING"
+ printf "PYENV_CACHE_PATH\n%s\n" "$PYENV_CACHE_PATH"
+ set -x
+ python --version
+ "$version_cache_path/bin/python" --version
+ which python
+ pyenv which python
+ set +x
+}
+
+# Main script begins.
+
+if [[ -z "$PYENV_VERSION" ]]; then
+ echo "PYENV_VERSION is not set. Not installing a pyenv."
+ return 0
+fi
+
+# Get out of the virtualenv we're in (if we're in one).
+[[ -z "$VIRTUAL_ENV" ]] || deactivate
+
+# Install pyenv
+echo "**** Installing pyenv."
+if [[ -n "$PYENV_RELEASE" ]]; then
+ # Fetch the release archive from Github (slightly faster than cloning)
+ mkdir "$PYENV_ROOT"
+ curl -fsSL "https://github.com/yyuu/pyenv/archive/$PYENV_RELEASE.tar.gz" \
+ | tar -xz -C "$PYENV_ROOT" --strip-components 1
+else
+ # Don't have a release to fetch, so just clone directly
+ git clone --depth 1 https://github.com/yyuu/pyenv.git "$PYENV_ROOT"
+fi
+
+export PATH="$PYENV_ROOT/bin:$PATH"
+eval "$(pyenv init -)"
+
+# Make sure the cache directory exists
+mkdir -p "$PYENV_CACHE_PATH"
+
+# Try using an already cached PYENV_VERSION. If it fails or is not found,
+# then install from scratch.
+echo "**** Trying to find and use cached python $PYENV_VERSION."
+if ! use_cached_python; then
+ echo "**** Installing python $PYENV_VERSION with pyenv now."
+ if pyenv install "$PYENV_VERSION"; then
+ if mv "$version_pyenv_path" "$PYENV_CACHE_PATH"; then
+ echo "Python was successfully built and moved to cache."
+ echo "**** Trying to find and use cached python $PYENV_VERSION."
+ if ! use_cached_python; then
+ echo "Python version $PYENV_VERSION was apparently successfully built"
+ echo "with pyenv, but, once cached, it could not be verified."
+ output_debugging_info
+ return 1
+ fi
+ else
+ echo "**** Warning: Python was succesfully built, but moving to cache"
+ echo "failed. Proceeding anyway without caching."
+ fi
+ else
+ echo "Python version $PYENV_VERSION build FAILED."
+ return 1
+ fi
+fi
+
+# Now we have to reinitialize pyenv, as we need the shims etc to be created so
+# the pyenv activates correctly.
+echo "**** Activating python $PYENV_VERSION and generating new virtualenv."
+eval "$(pyenv init -)"
+pyenv global "$PYENV_VERSION"
diff --git a/setup.py b/setup.py
new file mode 100644
index 00000000..1488c10c
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python
+
+from setuptools import setup
+
+from bin.__version__ import q_version
+
+setup(
+ name='q',
+ url='https://github.com/harelba/q',
+ license='LICENSE',
+ version=q_version,
+ author='Harel Ben-Attia',
+ description="Run SQL directly on CSV or TSV files",
+ author_email='harelba@gmail.com',
+ install_requires=[
+ 'six==1.11.0'
+ ],
+ packages=[
+ 'bin'
+ ],
+ entry_points={
+ 'console_scripts': [
+ 'q = bin.q:run_standalone'
+ ]
+ }
+)
diff --git a/test-requirements.txt b/test-requirements.txt
new file mode 100644
index 00000000..a89474ca
--- /dev/null
+++ b/test-requirements.txt
@@ -0,0 +1,2 @@
+pytest==4.6.2
+flake8==3.6.0
\ No newline at end of file
diff --git a/test/test-suite b/test/test-suite
index 26f91cff..02df2add 100755
--- a/test/test-suite
+++ b/test/test-suite
@@ -1,12 +1,13 @@
#!/usr/bin/env python
#
+# test suite for q.
+#
+# Prefer end-to-end tests, running the actual q command and testing stdout/stderr, and the return code.
+# Some utilities are provided for making that easy, see other tests for examples.
#
-# Simplistic test suite for q.
-#
-# Currently takes into account the project folder structure for running, so it needs
-# to be executed from the current folder
-#
+# Don't forget to use the Q_EXECUTABLE instead of hardcoding the q command line. This will be used in the near future
+# in order to test the resulting binary executables as well, instead of just executing the q python source code.
#
from __future__ import print_function
@@ -24,21 +25,24 @@ from tempfile import NamedTemporaryFile
import six
from six.moves import range
+
sys.path.append(os.path.join(os.path.abspath(os.path.dirname(sys.argv[0])),'..','bin'))
-from qtextasdata import QTextAsData, QInputParams
+from q import QTextAsData,QOutput,QOutputPrinter,QInputParams
import itertools
# q uses this encoding as the default output encoding. Some of the tests use it in order to
# make sure that the output is correctly encoded
SYSTEM_ENCODING = locale.getpreferredencoding()
-Q_EXECUTABLE = os.environ.get('Q_EXECUTABLE','../bin/q')
+Q_EXECUTABLE = os.getenv('Q_EXECUTABLE', '../bin/q.py')
+EXAMPLES = os.path.abspath(os.path.join(os.pardir, 'examples'))
DEBUG = 'Q_TEST_DEBUG' in os.environ
if len(sys.argv) > 2 and sys.argv[2] == '-v':
DEBUG = True
+
def run_command(cmd_to_run):
global DEBUG
if DEBUG:
@@ -64,6 +68,7 @@ def run_command(cmd_to_run):
print("RESULT:{}".format(res))
return res
+
uneven_ls_output = six.b("""drwxr-xr-x 2 root root 4096 Jun 11 2012 /selinux
drwxr-xr-x 2 root root 4096 Apr 19 2013 /mnt
drwxr-xr-x 2 root root 4096 Apr 24 2013 /srv
@@ -75,6 +80,7 @@ lrwxrwxrwx 1 root root 29 Jun 21 2013 /vmlinuz -> boot/vmlinuz-3.8
lrwxrwxrwx 1 root root 32 Jun 21 2013 /initrd.img -> boot/initrd.img-3.8.0-19-generic
""")
+
find_output = six.b("""8257537 32 drwxrwxrwt 218 root root 28672 Mar 1 11:00 /tmp
8299123 4 drwxrwxr-x 2 harel harel 4096 Feb 27 10:06 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/stormdist/testTopology3fad644a-54c0-4def-b19e-77ca97941595-1-1393513576
8263229 964 -rw-rw-r-- 1 mapred mapred 984569 Feb 27 10:06 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/stormdist/testTopology3fad644a-54c0-4def-b19e-77ca97941595-1-1393513576/stormcode.ser
@@ -87,6 +93,7 @@ find_output = six.b("""8257537 32 drwxrwxrwt 218 root root 28672 Ma
8263604 0 -rw-rw-r-- 1 harel harel 0 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514175754.version
""")
+
header_row = six.b('name,value1,value2')
sample_data_rows = [six.b('a,1,0'), six.b('b,2,0'), six.b('c,,0')]
sample_data_rows_with_empty_string = [six.b('a,aaa,0'), six.b('b,bbb,0'), six.b('c,,0')]
@@ -96,6 +103,9 @@ sample_data_with_empty_string_no_header = six.b("\n").join(
sample_data_with_header = header_row + six.b("\n") + sample_data_no_header
sample_data_with_missing_header_names = six.b("name,value1\n") + sample_data_no_header
+def generate_sample_data_with_header(header):
+ return header + six.b("\n") + sample_data_no_header
+
sample_quoted_data = six.b('''non_quoted regular_double_quoted double_double_quoted escaped_double_quoted multiline_double_double_quoted multiline_escaped_double_quoted
control-value-1 "control-value-2" control-value-3 "control-value-4" control-value-5 "control-value-6"
non-quoted-value "this is a quoted value" "this is a ""double double"" quoted value" "this is an escaped \\"quoted value\\"" "this is a double double quoted ""multiline
@@ -135,6 +145,7 @@ long_value1 = "23683289372328372328373"
int_value = "2328372328373"
sample_data_with_long_values = "%s\n%s\n%s" % (long_value1,int_value,int_value)
+
def one_column_warning(e):
return e[0].startswith(six.b('Warning: column count is one'))
@@ -159,6 +170,7 @@ class AbstractQTestCase(unittest.TestCase):
path = '/var/tmp'
return '%s/%s-%s.%s' % (path,prefix,random.randint(0,1000000000),postfix)
+
class SaveDbToDiskTests(AbstractQTestCase):
def test_store_to_disk(self):
@@ -171,7 +183,7 @@ class SaveDbToDiskTests(AbstractQTestCase):
self.assertTrue(len(o) == 0)
self.assertTrue(len(e) == 5)
self.assertTrue(e[0].startswith(six.b('Going to save data')))
- self.assertTrue(db_filename.encode(sys.stdout.encoding) in e[0])
+ self.assertTrue(db_filename.encode(sys.stdout.encoding or 'utf-8') in e[0])
self.assertTrue(e[1].startswith(six.b('Data has been loaded in')))
self.assertTrue(e[2].startswith(six.b('Saving data to db file')))
self.assertTrue(e[3].startswith(six.b('Data has been saved into')))
@@ -221,7 +233,7 @@ class BasicTests(AbstractQTestCase):
tmpfile = self.create_file_with_data(
six.b('\x1f\x8b\x08\x08\xf2\x18\x12S\x00\x03xxxxxx\x003\xe42\xe22\xe62\xe12\xe52\xe32\xe7\xb2\xe0\xb2\xe424\xe0\x02\x00\xeb\xbf\x8a\x13\x15\x00\x00\x00'))
- cmd = '../bin/q -z "select sum(c1),avg(c1) from %s"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -z "select sum(c1),avg(c1) from %s"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertTrue(retcode == 0)
@@ -252,7 +264,7 @@ class BasicTests(AbstractQTestCase):
def test_delimition_mistake_with_header(self):
tmpfile = self.create_file_with_data(sample_data_no_header)
- cmd = '../bin/q -d " " "select * from %s" -H' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d " " "select * from %s" -H' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertNotEqual(retcode, 0)
@@ -269,7 +281,7 @@ class BasicTests(AbstractQTestCase):
def test_regexp_int_data_handling(self):
tmpfile = self.create_file_with_data(sample_data_no_header)
- cmd = '../bin/q -d , "select c2 from %s where regexp(\'^1\',c2)"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , "select c2 from %s where regexp(\'^1\',c2)"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -283,7 +295,7 @@ class BasicTests(AbstractQTestCase):
def test_regexp_null_data_handling(self):
tmpfile = self.create_file_with_data(sample_data_no_header)
- cmd = '../bin/q -d , "select count(*) from %s where regexp(\'^\',c2)"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , "select count(*) from %s where regexp(\'^\',c2)"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -297,7 +309,7 @@ class BasicTests(AbstractQTestCase):
def test_select_one_column(self):
tmpfile = self.create_file_with_data(sample_data_no_header)
- cmd = '../bin/q -d , "select c1 from %s"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , "select c1 from %s"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -311,7 +323,7 @@ class BasicTests(AbstractQTestCase):
def test_tab_delimition_parameter(self):
tmpfile = self.create_file_with_data(
sample_data_no_header.replace(six.b(","), six.b("\t")))
- cmd = '../bin/q -t "select c1,c2,c3 from %s"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -t "select c1,c2,c3 from %s"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -326,7 +338,7 @@ class BasicTests(AbstractQTestCase):
def test_tab_delimition_parameter__with_manual_override_attempt(self):
tmpfile = self.create_file_with_data(
sample_data_no_header.replace(six.b(","), six.b("\t")))
- cmd = '../bin/q -t -d , "select c1,c2,c3 from %s"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -t -d , "select c1,c2,c3 from %s"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -340,7 +352,7 @@ class BasicTests(AbstractQTestCase):
def test_output_delimiter(self):
tmpfile = self.create_file_with_data(sample_data_no_header)
- cmd = '../bin/q -d , -D "|" "select c1,c2,c3 from %s"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , -D "|" "select c1,c2,c3 from %s"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -355,7 +367,7 @@ class BasicTests(AbstractQTestCase):
def test_output_delimiter_tab_parameter(self):
tmpfile = self.create_file_with_data(sample_data_no_header)
- cmd = '../bin/q -d , -T "select c1,c2,c3 from %s"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , -T "select c1,c2,c3 from %s"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -370,7 +382,7 @@ class BasicTests(AbstractQTestCase):
def test_output_delimiter_tab_parameter__with_manual_override_attempt(self):
tmpfile = self.create_file_with_data(sample_data_no_header)
- cmd = '../bin/q -d , -T -D "|" "select c1,c2,c3 from %s"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , -T -D "|" "select c1,c2,c3 from %s"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -397,7 +409,7 @@ class BasicTests(AbstractQTestCase):
def test_column_separation(self):
tmpfile = self.create_file_with_data(sample_data_no_header)
- cmd = '../bin/q -d , "select c1,c2,c3 from %s"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , "select c1,c2,c3 from %s"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -413,7 +425,7 @@ class BasicTests(AbstractQTestCase):
def test_column_analysis(self):
tmpfile = self.create_file_with_data(sample_data_no_header)
- cmd = '../bin/q -d , "select c1 from %s" -A' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , "select c1 from %s" -A' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -427,7 +439,7 @@ class BasicTests(AbstractQTestCase):
def test_column_analysis_no_header(self):
tmpfile = self.create_file_with_data(sample_data_no_header)
- cmd = '../bin/q -d , "select c1 from %s" -A' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , "select c1 from %s" -A' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -438,7 +450,7 @@ class BasicTests(AbstractQTestCase):
def test_header_exception_on_numeric_header_data(self):
tmpfile = self.create_file_with_data(sample_data_no_header)
- cmd = '../bin/q -d , "select * from %s" -A -H' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , "select * from %s" -A -H' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertNotEqual(retcode, 0)
@@ -453,7 +465,7 @@ class BasicTests(AbstractQTestCase):
def test_column_analysis_with_header(self):
tmpfile = self.create_file_with_data(sample_data_with_header)
- cmd = '../bin/q -d , "select c1 from %s" -A -H' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , "select c1 from %s" -A -H' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertNotEqual(retcode, 0)
@@ -470,7 +482,7 @@ class BasicTests(AbstractQTestCase):
def test_data_with_header(self):
tmpfile = self.create_file_with_data(sample_data_with_header)
- cmd = '../bin/q -d , "select name from %s" -H' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , "select name from %s" -H' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -481,7 +493,7 @@ class BasicTests(AbstractQTestCase):
def test_output_header_when_input_header_exists(self):
tmpfile = self.create_file_with_data(sample_data_with_header)
- cmd = '../bin/q -d , "select name from %s" -H -O' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , "select name from %s" -H -O' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -495,7 +507,7 @@ class BasicTests(AbstractQTestCase):
def test_generated_column_name_warning_when_header_line_exists(self):
tmpfile = self.create_file_with_data(sample_data_with_header)
- cmd = '../bin/q -d , "select c3 from %s" -H' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , "select c3 from %s" -H' % tmpfile.name
retcode, o, e = run_command(cmd)
@@ -503,14 +515,14 @@ class BasicTests(AbstractQTestCase):
self.assertEqual(len(o), 0)
self.assertEqual(len(e), 2)
self.assertTrue(six.b('no such column: c3') in e[0])
- self.assertEqual(
- e[1], six.b('Warning - There seems to be a "no such column" error, and -H (header line) exists. Please make sure that you are using the column names from the header line and not the default (cXX) column names'))
+ self.assertTrue(
+ e[1].startswith(six.b('Warning - There seems to be a "no such column" error, and -H (header line) exists. Please make sure that you are using the column names from the header line and not the default (cXX) column names')))
self.cleanup(tmpfile)
def test_column_analysis_with_unexpected_header(self):
tmpfile = self.create_file_with_data(sample_data_with_header)
- cmd = '../bin/q -d , "select c1 from %s" -A' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , "select c1 from %s" -A' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -529,7 +541,7 @@ class BasicTests(AbstractQTestCase):
def test_empty_data(self):
tmpfile = self.create_file_with_data(six.b(''))
- cmd = '../bin/q -d , "select c1 from %s"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , "select c1 from %s"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -542,7 +554,7 @@ class BasicTests(AbstractQTestCase):
def test_empty_data_with_header_param(self):
tmpfile = self.create_file_with_data(six.b(''))
- cmd = '../bin/q -d , "select c1 from %s" -H' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , "select c1 from %s" -H' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertNotEqual(retcode, 0)
@@ -556,7 +568,7 @@ class BasicTests(AbstractQTestCase):
def test_one_row_of_data_without_header_param(self):
tmpfile = self.create_file_with_data(header_row)
- cmd = '../bin/q -d , "select c2 from %s"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , "select c2 from %s"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -569,7 +581,7 @@ class BasicTests(AbstractQTestCase):
def test_one_row_of_data_with_header_param(self):
tmpfile = self.create_file_with_data(header_row)
- cmd = '../bin/q -d , "select c2 from %s" -H' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , "select c2 from %s" -H' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -582,7 +594,7 @@ class BasicTests(AbstractQTestCase):
def test_dont_leading_keep_whitespace_in_values(self):
tmpfile = self.create_file_with_data(sample_data_with_spaces_no_header)
- cmd = '../bin/q -d , "select c1 from %s"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , "select c1 from %s"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -597,7 +609,7 @@ class BasicTests(AbstractQTestCase):
def test_keep_leading_whitespace_in_values(self):
tmpfile = self.create_file_with_data(sample_data_with_spaces_no_header)
- cmd = '../bin/q -d , "select c1 from %s" -k' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , "select c1 from %s" -k' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -612,7 +624,7 @@ class BasicTests(AbstractQTestCase):
def test_no_impact_of_keeping_leading_whitespace_on_integers(self):
tmpfile = self.create_file_with_data(sample_data_with_spaces_no_header)
- cmd = '../bin/q -d , "select c2 from %s" -k -A' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , "select c2 from %s" -k -A' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -629,7 +641,7 @@ class BasicTests(AbstractQTestCase):
def test_spaces_in_header_row(self):
tmpfile = self.create_file_with_data(
header_row_with_spaces + six.b("\n") + sample_data_no_header)
- cmd = '../bin/q -d , "select name,\`value 1\` from %s" -H' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , "select name,\`value 1\` from %s" -H' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -645,7 +657,7 @@ class BasicTests(AbstractQTestCase):
def test_column_analysis_for_spaces_in_header_row(self):
tmpfile = self.create_file_with_data(
header_row_with_spaces + six.b("\n") + sample_data_no_header)
- cmd = '../bin/q -d , "select name,\`value 1\` from %s" -H -A' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , "select name,\`value 1\` from %s" -H -A' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -660,7 +672,7 @@ class BasicTests(AbstractQTestCase):
self.cleanup(tmpfile)
def test_no_query_in_command_line(self):
- cmd = '../bin/q -d , ""'
+ cmd = Q_EXECUTABLE + ' -d , ""'
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 1)
@@ -670,7 +682,7 @@ class BasicTests(AbstractQTestCase):
self.assertEqual(e[0],six.b('Query cannot be empty (query number 1)'))
def test_empty_query_in_command_line(self):
- cmd = '../bin/q -d , " "'
+ cmd = Q_EXECUTABLE + ' -d , " "'
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 1)
@@ -680,7 +692,7 @@ class BasicTests(AbstractQTestCase):
self.assertEqual(e[0],six.b('Query cannot be empty (query number 1)'))
def test_failure_in_query_stops_processing_queries(self):
- cmd = '../bin/q -d , "select 500" "select 300" "wrong-query" "select 8000"'
+ cmd = Q_EXECUTABLE + ' -d , "select 500" "select 300" "wrong-query" "select 8000"'
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 1)
@@ -690,7 +702,7 @@ class BasicTests(AbstractQTestCase):
self.assertEqual(o[1],six.b('300'))
def test_multiple_queries_in_command_line(self):
- cmd = '../bin/q -d , "select 500" "select 300+100" "select 300" "select 200"'
+ cmd = Q_EXECUTABLE + ' -d , "select 500" "select 300+100" "select 300" "select 200"'
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -703,7 +715,7 @@ class BasicTests(AbstractQTestCase):
self.assertEqual(o[3],six.b('200'))
def test_literal_calculation_query(self):
- cmd = '../bin/q -d , "select 1+40/6"'
+ cmd = Q_EXECUTABLE + ' -d , "select 1+40/6"'
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -713,7 +725,7 @@ class BasicTests(AbstractQTestCase):
self.assertEqual(o[0],six.b('7'))
def test_literal_calculation_query_float_result(self):
- cmd = '../bin/q -d , "select 1+40/6.0"'
+ cmd = Q_EXECUTABLE + ' -d , "select 1+40/6.0"'
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -726,7 +738,7 @@ class BasicTests(AbstractQTestCase):
tmp_data_file = self.create_file_with_data(sample_data_with_header)
tmp_query_file = self.create_file_with_data(six.b("select name from %s" % tmp_data_file.name))
- cmd = '../bin/q -d , -q %s -H' % tmp_query_file.name
+ cmd = Q_EXECUTABLE + ' -d , -q %s -H' % tmp_query_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -744,7 +756,7 @@ class BasicTests(AbstractQTestCase):
tmp_data_file = self.create_file_with_data(sample_data_with_header)
tmp_query_file = self.create_file_with_data(six.b("select name,'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name),encoding=None)
- cmd = '../bin/q -d , -q %s -H -Q ascii' % tmp_query_file.name
+ cmd = Q_EXECUTABLE + ' -d , -q %s -H -Q ascii' % tmp_query_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,3)
@@ -760,7 +772,7 @@ class BasicTests(AbstractQTestCase):
tmp_data_file = self.create_file_with_data(sample_data_with_header)
tmp_query_file = self.create_file_with_data(six.b("select name,'Hr\xc3\xa1\xc4\x8d' Hr\xc3\xa1\xc4\x8d from %s" % tmp_data_file.name),encoding=None)
- cmd = '../bin/q -d , -q %s -H -Q utf-8 -O' % tmp_query_file.name
+ cmd = Q_EXECUTABLE + ' -d , -q %s -H -Q utf-8 -O' % tmp_query_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,0)
@@ -779,7 +791,7 @@ class BasicTests(AbstractQTestCase):
tmp_data_file = self.create_file_with_data(sample_data_with_header)
tmp_query_file = self.create_file_with_data(six.b("select name,'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name),encoding=None)
- cmd = '../bin/q -d , -q %s -H -Q utf-8' % tmp_query_file.name
+ cmd = Q_EXECUTABLE + ' -d , -q %s -H -Q utf-8' % tmp_query_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -797,7 +809,7 @@ class BasicTests(AbstractQTestCase):
tmp_data_file = self.create_file_with_data(sample_data_with_header)
tmp_query_file = self.create_file_with_data(six.b("select name from %s" % tmp_data_file.name))
- cmd = '../bin/q -d , -q %s -H "select * from ppp"' % tmp_query_file.name
+ cmd = Q_EXECUTABLE + ' -d , -q %s -H "select * from ppp"' % tmp_query_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 1)
@@ -814,7 +826,7 @@ class BasicTests(AbstractQTestCase):
tmp_query_file = self.create_file_with_data(six.b("select 'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name),encoding=None)
for target_encoding in ['utf-8','ibm852']:
- cmd = '../bin/q -d , -q %s -H -Q utf-8 -E %s' % (tmp_query_file.name,target_encoding)
+ cmd = Q_EXECUTABLE + ' -d , -q %s -H -Q utf-8 -E %s' % (tmp_query_file.name,target_encoding)
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -832,7 +844,7 @@ class BasicTests(AbstractQTestCase):
tmp_data_file = self.create_file_with_data(sample_data_with_header)
tmp_query_file = self.create_file_with_data(six.b("select 'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name),encoding=None)
- cmd = '../bin/q -d , -q %s -H -Q utf-8 -E ascii' % tmp_query_file.name
+ cmd = Q_EXECUTABLE + ' -d , -q %s -H -Q utf-8 -E ascii' % tmp_query_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 3)
@@ -848,7 +860,7 @@ class BasicTests(AbstractQTestCase):
def test_use_query_file_with_empty_query(self):
tmp_query_file = self.create_file_with_data(six.b(" "))
- cmd = '../bin/q -d , -q %s -H' % tmp_query_file.name
+ cmd = Q_EXECUTABLE + ' -d , -q %s -H' % tmp_query_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 1)
@@ -860,7 +872,7 @@ class BasicTests(AbstractQTestCase):
self.cleanup(tmp_query_file)
def test_use_non_existent_query_file(self):
- cmd = '../bin/q -d , -q non-existent-query-file -H'
+ cmd = Q_EXECUTABLE + ' -d , -q non-existent-query-file -H'
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 1)
@@ -872,7 +884,7 @@ class BasicTests(AbstractQTestCase):
def test_non_quoted_values_in_quoted_data(self):
tmp_data_file = self.create_file_with_data(sample_quoted_data)
- cmd = '../bin/q -d " " "select c1 from %s"' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d " " "select c1 from %s"' % tmp_data_file.name
retcode, o, e = run_command(cmd)
@@ -890,7 +902,7 @@ class BasicTests(AbstractQTestCase):
def test_regular_quoted_values_in_quoted_data(self):
tmp_data_file = self.create_file_with_data(sample_quoted_data)
- cmd = '../bin/q -d " " "select c2 from %s"' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d " " "select c2 from %s"' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,0)
@@ -907,7 +919,7 @@ class BasicTests(AbstractQTestCase):
def test_double_double_quoted_values_in_quoted_data(self):
tmp_data_file = self.create_file_with_data(sample_quoted_data)
- cmd = '../bin/q -d " " "select c3 from %s"' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d " " "select c3 from %s"' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,0)
@@ -924,7 +936,7 @@ class BasicTests(AbstractQTestCase):
def test_escaped_double_quoted_values_in_quoted_data(self):
tmp_data_file = self.create_file_with_data(sample_quoted_data)
- cmd = '../bin/q -d " " "select c4 from %s"' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d " " "select c4 from %s"' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,0)
@@ -941,7 +953,7 @@ class BasicTests(AbstractQTestCase):
def test_none_input_quoting_mode_in_relaxed_mode(self):
tmp_data_file = self.create_file_with_data(sample_quoted_data2)
- cmd = '../bin/q -d " " -m relaxed -D , -w none -W none "select * from %s"' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d " " -m relaxed -D , -w none -W none "select * from %s"' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,0)
@@ -956,7 +968,7 @@ class BasicTests(AbstractQTestCase):
def test_none_input_quoting_mode_in_strict_mode(self):
tmp_data_file = self.create_file_with_data(sample_quoted_data2)
- cmd = '../bin/q -d " " -m strict -D , -w none "select * from %s"' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d " " -m strict -D , -w none "select * from %s"' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertNotEqual(retcode,0)
@@ -970,7 +982,7 @@ class BasicTests(AbstractQTestCase):
def test_minimal_input_quoting_mode(self):
tmp_data_file = self.create_file_with_data(sample_quoted_data2)
- cmd = '../bin/q -d " " -D , -w minimal "select * from %s"' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d " " -D , -w minimal "select * from %s"' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,0)
@@ -985,7 +997,7 @@ class BasicTests(AbstractQTestCase):
def test_all_input_quoting_mode(self):
tmp_data_file = self.create_file_with_data(sample_quoted_data2)
- cmd = '../bin/q -d " " -D , -w all "select * from %s"' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d " " -D , -w all "select * from %s"' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,0)
@@ -1000,7 +1012,7 @@ class BasicTests(AbstractQTestCase):
def test_incorrect_input_quoting_mode(self):
tmp_data_file = self.create_file_with_data(sample_quoted_data2)
- cmd = '../bin/q -d " " -D , -w unknown_wrapping_mode "select * from %s"' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d " " -D , -w unknown_wrapping_mode "select * from %s"' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertNotEqual(retcode,0)
@@ -1015,7 +1027,7 @@ class BasicTests(AbstractQTestCase):
def test_none_output_quoting_mode(self):
tmp_data_file = self.create_file_with_data(sample_quoted_data2)
- cmd = '../bin/q -d " " -D , -w all -W none "select * from %s"' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d " " -D , -w all -W none "select * from %s"' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,0)
@@ -1030,7 +1042,7 @@ class BasicTests(AbstractQTestCase):
def test_minimal_output_quoting_mode__without_need_to_quote_in_output(self):
tmp_data_file = self.create_file_with_data(sample_quoted_data2)
- cmd = '../bin/q -d " " -D , -w all -W minimal "select * from %s"' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d " " -D , -w all -W minimal "select * from %s"' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,0)
@@ -1046,7 +1058,7 @@ class BasicTests(AbstractQTestCase):
tmp_data_file = self.create_file_with_data(sample_quoted_data2)
# output delimiter is set to space, so the output will contain it
- cmd = '../bin/q -d " " -D " " -w all -W minimal "select * from %s"' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d " " -D " " -w all -W minimal "select * from %s"' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,0)
@@ -1061,7 +1073,7 @@ class BasicTests(AbstractQTestCase):
def test_nonnumeric_output_quoting_mode(self):
tmp_data_file = self.create_file_with_data(sample_quoted_data2)
- cmd = '../bin/q -d " " -D , -w all -W nonnumeric "select * from %s"' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d " " -D , -w all -W nonnumeric "select * from %s"' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,0)
@@ -1076,7 +1088,7 @@ class BasicTests(AbstractQTestCase):
def test_all_output_quoting_mode(self):
tmp_data_file = self.create_file_with_data(sample_quoted_data2)
- cmd = '../bin/q -d " " -D , -w all -W all "select * from %s"' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d " " -D , -w all -W all "select * from %s"' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,0)
@@ -1092,7 +1104,7 @@ class BasicTests(AbstractQTestCase):
tmp_data_file = self.create_file_with_data(input_data)
- basic_cmd = '../bin/q -w %s -W %s "select * from -"' % (input_wrapping_mode,output_wrapping_mode)
+ basic_cmd = Q_EXECUTABLE + ' -w %s -W %s "select * from -"' % (input_wrapping_mode,output_wrapping_mode)
chained_cmd = 'cat %s | %s | %s | %s' % (tmp_data_file.name,basic_cmd,basic_cmd,basic_cmd)
retcode, o, e = run_command(chained_cmd)
@@ -1117,7 +1129,7 @@ class BasicTests(AbstractQTestCase):
utf_8_data_with_bom = six.b('\xef\xbb\xbf"typeid","limit","apcost","date","checkpointId"\n"1","2","5","1,2,3,4,5,6,7","3000,3001,3002"\n"2","2","5","1,2,3,4,5,6,7","3003,3004,3005"\n')
tmp_data_file = self.create_file_with_data(utf_8_data_with_bom,encoding=None)
- cmd = '../bin/q -d , -H -O -e utf-8-sig "select * from %s"' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d , -H -O -e utf-8-sig "select * from %s"' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,0)
@@ -1136,7 +1148,7 @@ class BasicTests(AbstractQTestCase):
data = six.b('111,22.22,"testing text with special characters - citt\xc3\xa0 ",http://somekindofurl.com,12.13.14.15,12.1\n')
tmp_data_file = self.create_file_with_data(data)
- cmd = '../bin/q -d , "select * from %s"' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d , "select * from %s"' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,0)
@@ -1145,7 +1157,7 @@ class BasicTests(AbstractQTestCase):
self.assertEqual(o[0].decode('utf-8'),u'111,22.22,testing text with special characters - citt\xe0 ,http://somekindofurl.com,12.13.14.15,12.1')
- cmd = '../bin/q -d , "select * from %s" -A' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d , "select * from %s" -A' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,0)
@@ -1166,7 +1178,7 @@ class BasicTests(AbstractQTestCase):
tmp_data_file = self.create_file_with_data(sample_quoted_data)
# FIXME Need to convert \0a to proper encoding suitable for the person running the tests.
- cmd = '../bin/q -d " " "select replace(c5,X\'0A\',\'::\') from %s"' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d " " "select replace(c5,X\'0A\',\'::\') from %s"' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,0)
@@ -1184,7 +1196,7 @@ class BasicTests(AbstractQTestCase):
tmp_data_file = self.create_file_with_data(sample_quoted_data)
# FIXME Need to convert \0a to proper encoding suitable for the person running the tests.
- cmd = '../bin/q -d " " "select replace(c6,X\'0A\',\'::\') from %s"' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d " " "select replace(c6,X\'0A\',\'::\') from %s"' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,0)
@@ -1204,7 +1216,7 @@ class BasicTests(AbstractQTestCase):
tmp_data_file = self.create_file_with_data(double_double_quoted_data)
- cmd = '../bin/q -d " " --disable-double-double-quoting "select c2 from %s" -W none' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d " " --disable-double-double-quoting "select c2 from %s" -W none' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,0)
@@ -1214,7 +1226,7 @@ class BasicTests(AbstractQTestCase):
self.assertEqual(o[0],six.b('double_double_quoted'))
self.assertEqual(o[1],six.b('this is a quoted value with "double'))
- cmd = '../bin/q -d " " --disable-double-double-quoting "select c3 from %s" -W none' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d " " --disable-double-double-quoting "select c3 from %s" -W none' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,0)
@@ -1224,7 +1236,7 @@ class BasicTests(AbstractQTestCase):
self.assertEqual(o[0],six.b(''))
self.assertEqual(o[1],six.b('double'))
- cmd = '../bin/q -d " " --disable-double-double-quoting "select c4 from %s" -W none' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d " " --disable-double-double-quoting "select c4 from %s" -W none' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,0)
@@ -1242,7 +1254,7 @@ class BasicTests(AbstractQTestCase):
tmp_data_file = self.create_file_with_data(escaped_double_quoted_data)
- cmd = '../bin/q -d " " --disable-escaped-double-quoting "select c2 from %s" -W none' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d " " --disable-escaped-double-quoting "select c2 from %s" -W none' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,0)
@@ -1252,7 +1264,7 @@ class BasicTests(AbstractQTestCase):
self.assertEqual(o[0],six.b('escaped_double_quoted'))
self.assertEqual(o[1],six.b('this is a quoted value with \\escaped'))
- cmd = '../bin/q -d " " --disable-escaped-double-quoting "select c3 from %s" -W none' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d " " --disable-escaped-double-quoting "select c3 from %s" -W none' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,0)
@@ -1262,7 +1274,7 @@ class BasicTests(AbstractQTestCase):
self.assertEqual(o[0],six.b(''))
self.assertEqual(o[1],six.b('double'))
- cmd = '../bin/q -d " " --disable-escaped-double-quoting "select c4 from %s" -W none' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d " " --disable-escaped-double-quoting "select c4 from %s" -W none' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,0)
@@ -1279,7 +1291,7 @@ class BasicTests(AbstractQTestCase):
# these flags will be removed completely in the future
tmp_data_file = self.create_file_with_data(combined_quoted_data)
- cmd = '../bin/q -d " " --disable-double-double-quoting --disable-escaped-double-quoting "select * from %s" -A' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d " " --disable-double-double-quoting --disable-escaped-double-quoting "select * from %s" -A' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,0)
@@ -1288,7 +1300,7 @@ class BasicTests(AbstractQTestCase):
self.assertEqual(len(o),7) # found 7 fields
- cmd = '../bin/q -d " " --disable-escaped-double-quoting "select * from %s" -A' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d " " --disable-escaped-double-quoting "select * from %s" -A' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,0)
@@ -1297,7 +1309,7 @@ class BasicTests(AbstractQTestCase):
self.assertEqual(len(o),5) # found 5 fields
- cmd = '../bin/q -d " " --disable-double-double-quoting "select * from %s" -A' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d " " --disable-double-double-quoting "select * from %s" -A' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,0)
@@ -1306,7 +1318,7 @@ class BasicTests(AbstractQTestCase):
self.assertEqual(len(o),5) # found 5 fields
- cmd = '../bin/q -d " " "select * from %s" -A' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d " " "select * from %s" -A' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,0)
@@ -1318,7 +1330,7 @@ class BasicTests(AbstractQTestCase):
self.cleanup(tmp_data_file)
def test_nonexistent_file(self):
- cmd = '../bin/q "select * from non-existent-file"'
+ cmd = Q_EXECUTABLE + ' "select * from non-existent-file"'
retcode, o, e = run_command(cmd)
@@ -1335,7 +1347,7 @@ class BasicTests(AbstractQTestCase):
tmpfile = self.create_file_with_data(file_data)
- cmd = '../bin/q -H -d , "select a from %s"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -H -d , "select a from %s"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -1353,7 +1365,7 @@ class BasicTests(AbstractQTestCase):
tmpfile = self.create_file_with_data(file_data)
- cmd = '../bin/q -H -d , "select a from %s"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -H -d , "select a from %s"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 31)
@@ -1370,7 +1382,7 @@ class BasicTests(AbstractQTestCase):
file_data = six.b("a,b,c\nvery-long-text,2,3\n")
tmpfile = self.create_file_with_data(file_data)
- cmd = '../bin/q -H -d , -M 3 "select a from %s"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -H -d , -M 3 "select a from %s"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 31)
@@ -1381,7 +1393,7 @@ class BasicTests(AbstractQTestCase):
self.assertTrue((six.b("Offending file is '%s'" % tmpfile.name)) in e[0])
self.assertTrue(six.b('Line is 2') in e[0])
- cmd2 = '../bin/q -H -d , -M 300 -H "select a from %s"' % tmpfile.name
+ cmd2 = Q_EXECUTABLE + ' -H -d , -M 300 -H "select a from %s"' % tmpfile.name
retcode2, o2, e2 = run_command(cmd2)
self.assertEqual(retcode2, 0)
@@ -1396,7 +1408,7 @@ class BasicTests(AbstractQTestCase):
file_data = six.b("a,b,c\nvery-long-text,2,3\n")
tmpfile = self.create_file_with_data(file_data)
- cmd = '../bin/q -H -d , -M 0 "select a from %s"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -H -d , -M 0 "select a from %s"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 31)
@@ -1412,7 +1424,7 @@ class BasicTests(AbstractQTestCase):
file_data = six.b("a,b,a\n10,20,30\n30,40,50")
tmpfile = self.create_file_with_data(file_data)
- cmd = '../bin/q -H -d , "select a from %s"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -H -d , "select a from %s"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 35)
@@ -1425,11 +1437,63 @@ class BasicTests(AbstractQTestCase):
self.cleanup(tmpfile)
+class MultiHeaderTests(AbstractQTestCase):
+ def test_output_header_when_multiple_input_headers_exist(self):
+ TMPFILE_COUNT = 5
+ tmpfiles = [self.create_file_with_data(sample_data_with_header) for x in range(TMPFILE_COUNT)]
+
+ tmpfilenames = "+".join(map(lambda x:x.name, tmpfiles))
+
+ cmd = Q_EXECUTABLE + ' -d , "select name,value1,value2 from %s order by name" -H -O' % tmpfilenames
+ retcode, o, e = run_command(cmd)
+
+ self.assertEqual(retcode, 0)
+ self.assertEqual(len(o), TMPFILE_COUNT*3+1)
+ self.assertEqual(o[0], six.b("name,value1,value2"))
+
+ for i in range (TMPFILE_COUNT):
+ self.assertEqual(o[1+i],sample_data_rows[0])
+ for i in range (TMPFILE_COUNT):
+ self.assertEqual(o[TMPFILE_COUNT+1+i],sample_data_rows[1])
+ for i in range (TMPFILE_COUNT):
+ self.assertEqual(o[TMPFILE_COUNT*2+1+i],sample_data_rows[2])
+
+ for oi in o[1:]:
+ self.assertTrue(six.b('name') not in oi)
+
+ for i in range(TMPFILE_COUNT):
+ self.cleanup(tmpfiles[i])
+
+ def test_output_header_when_extra_header_column_names_are_different(self):
+ tmpfile1 = self.create_file_with_data(sample_data_with_header)
+ tmpfile2 = self.create_file_with_data(generate_sample_data_with_header(six.b('othername,value1,value2')))
+
+ cmd = Q_EXECUTABLE + ' -d , "select name,value1,value2 from %s+%s order by name" -H -O' % (tmpfile1.name,tmpfile2.name)
+ retcode, o, e = run_command(cmd)
+
+ self.assertEqual(retcode, 35)
+ self.assertEqual(len(o), 0)
+ self.assertEqual(len(e), 1)
+ self.assertTrue(e[0].startswith(six.b("Bad header row:")))
+
+ self.cleanup(tmpfile1)
+ self.cleanup(tmpfile2)
+
+ def test_output_header_when_extra_header_has_different_number_of_columns(self):
+ tmpfile1 = self.create_file_with_data(sample_data_with_header)
+ tmpfile2 = self.create_file_with_data(generate_sample_data_with_header(six.b('name,value1')))
+
+ cmd = Q_EXECUTABLE + ' -d , "select name,value1,value2 from %s+%s order by name" -H -O' % (tmpfile1.name,tmpfile2.name)
+ retcode, o, e = run_command(cmd)
+
+ self.assertEqual(retcode, 35)
+
+
class ParsingModeTests(AbstractQTestCase):
def test_strict_mode_column_count_mismatch_error(self):
tmpfile = self.create_file_with_data(uneven_ls_output)
- cmd = '../bin/q -m strict "select count(*) from %s"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -m strict "select count(*) from %s"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertNotEqual(retcode, 0)
@@ -1442,7 +1506,7 @@ class ParsingModeTests(AbstractQTestCase):
def test_strict_mode_too_large_specific_column_count(self):
tmpfile = self.create_file_with_data(sample_data_no_header)
- cmd = '../bin/q -d , -m strict -c 4 "select count(*) from %s"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , -m strict -c 4 "select count(*) from %s"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertNotEqual(retcode, 0)
@@ -1456,7 +1520,7 @@ class ParsingModeTests(AbstractQTestCase):
def test_strict_mode_too_small_specific_column_count(self):
tmpfile = self.create_file_with_data(sample_data_no_header)
- cmd = '../bin/q -d , -m strict -c 2 "select count(*) from %s"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , -m strict -c 2 "select count(*) from %s"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertNotEqual(retcode, 0)
@@ -1471,7 +1535,7 @@ class ParsingModeTests(AbstractQTestCase):
def test_relaxed_mode_missing_columns_in_header(self):
tmpfile = self.create_file_with_data(
sample_data_with_missing_header_names)
- cmd = '../bin/q -d , -m relaxed "select count(*) from %s" -H -A' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , -m relaxed "select count(*) from %s" -H -A' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -1488,7 +1552,7 @@ class ParsingModeTests(AbstractQTestCase):
def test_strict_mode_missing_columns_in_header(self):
tmpfile = self.create_file_with_data(
sample_data_with_missing_header_names)
- cmd = '../bin/q -d , -m strict "select count(*) from %s" -H -A' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , -m strict "select count(*) from %s" -H -A' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertNotEqual(retcode, 0)
@@ -1502,7 +1566,7 @@ class ParsingModeTests(AbstractQTestCase):
def test_output_delimiter_with_missing_fields(self):
tmpfile = self.create_file_with_data(sample_data_no_header)
- cmd = '../bin/q -d , "select * from %s" -D ";"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , "select * from %s" -D ";"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -1517,7 +1581,7 @@ class ParsingModeTests(AbstractQTestCase):
def test_handling_of_null_integers(self):
tmpfile = self.create_file_with_data(sample_data_no_header)
- cmd = '../bin/q -d , "select avg(c2) from %s"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , "select avg(c2) from %s"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -1530,7 +1594,7 @@ class ParsingModeTests(AbstractQTestCase):
def test_empty_integer_values_converted_to_null(self):
tmpfile = self.create_file_with_data(sample_data_no_header)
- cmd = '../bin/q -d , "select * from %s where c2 is null"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -d , "select * from %s where c2 is null"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -1544,7 +1608,7 @@ class ParsingModeTests(AbstractQTestCase):
def test_empty_string_values_not_converted_to_null(self):
tmpfile = self.create_file_with_data(
sample_data_with_empty_string_no_header)
- cmd = '../bin/q -d , "select * from %s where c2 == %s"' % (
+ cmd = Q_EXECUTABLE + ' -d , "select * from %s where c2 == %s"' % (
tmpfile.name, "''")
retcode, o, e = run_command(cmd)
@@ -1558,7 +1622,7 @@ class ParsingModeTests(AbstractQTestCase):
def test_relaxed_mode_detected_columns(self):
tmpfile = self.create_file_with_data(uneven_ls_output)
- cmd = '../bin/q -m relaxed "select count(*) from %s" -A' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -m relaxed "select count(*) from %s" -A' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -1582,7 +1646,7 @@ class ParsingModeTests(AbstractQTestCase):
def test_relaxed_mode_detected_columns_with_specific_column_count(self):
tmpfile = self.create_file_with_data(uneven_ls_output)
- cmd = '../bin/q -m relaxed "select count(*) from %s" -A -c 9' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -m relaxed "select count(*) from %s" -A -c 9' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -1606,7 +1670,7 @@ class ParsingModeTests(AbstractQTestCase):
def test_relaxed_mode_last_column_data_with_specific_column_count(self):
tmpfile = self.create_file_with_data(uneven_ls_output)
- cmd = '../bin/q -m relaxed "select c9 from %s" -c 9' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -m relaxed "select c9 from %s" -c 9' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -1622,7 +1686,7 @@ class ParsingModeTests(AbstractQTestCase):
def test_1_column_warning_in_relaxed_mode(self):
tmpfile = self.create_file_with_data(one_column_data)
- cmd = '../bin/q -m relaxed "select c1 from %s" -d ,' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -m relaxed "select c1 from %s" -d ,' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -1637,7 +1701,7 @@ class ParsingModeTests(AbstractQTestCase):
def test_1_column_warning_in_strict_mode(self):
tmpfile = self.create_file_with_data(one_column_data)
- cmd = '../bin/q -m relaxed "select c1 from %s" -d , -m strict' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -m relaxed "select c1 from %s" -d , -m strict' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -1653,7 +1717,7 @@ class ParsingModeTests(AbstractQTestCase):
def test_1_column_warning_suppression_in_relaxed_mode_when_column_count_is_specific(self):
tmpfile = self.create_file_with_data(one_column_data)
- cmd = '../bin/q -m relaxed "select c1 from %s" -d , -m relaxed -c 1' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -m relaxed "select c1 from %s" -d , -m relaxed -c 1' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -1667,7 +1731,7 @@ class ParsingModeTests(AbstractQTestCase):
def test_1_column_warning_suppression_in_strict_mode_when_column_count_is_specific(self):
tmpfile = self.create_file_with_data(one_column_data)
- cmd = '../bin/q -m relaxed "select c1 from %s" -d , -m strict -c 1' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -m relaxed "select c1 from %s" -d , -m strict -c 1' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -1681,7 +1745,7 @@ class ParsingModeTests(AbstractQTestCase):
def test_fluffy_mode(self):
tmpfile = self.create_file_with_data(uneven_ls_output)
- cmd = '../bin/q -m fluffy "select c9 from %s"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -m fluffy "select c9 from %s"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -1701,7 +1765,7 @@ class ParsingModeTests(AbstractQTestCase):
data_list[950] = six.b("column1 column2 column3 column4 column5")
tmpfile = self.create_file_with_data(six.b("\n").join(data_list))
- cmd = '../bin/q -m fluffy "select * from %s"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -m fluffy "select * from %s"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertNotEqual(retcode,0)
@@ -1718,7 +1782,7 @@ class ParsingModeTests(AbstractQTestCase):
data_list[750] = six.b("column1 column3 column4")
tmpfile = self.create_file_with_data(six.b("\n").join(data_list))
- cmd = '../bin/q -m strict "select * from %s"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -m strict "select * from %s"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertNotEqual(retcode,0)
@@ -1735,7 +1799,7 @@ class ParsingModeTests(AbstractQTestCase):
data_list[750] = six.b("column1 column2 column3 column4 column5")
tmpfile = self.create_file_with_data(six.b("\n").join(data_list))
- cmd = '../bin/q -m strict "select * from %s"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' -m strict "select * from %s"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertNotEqual(retcode,0)
@@ -1747,7 +1811,6 @@ class ParsingModeTests(AbstractQTestCase):
self.cleanup(tmpfile)
-
class FormattingTests(AbstractQTestCase):
def test_column_formatting(self):
@@ -1782,7 +1845,7 @@ class FormattingTests(AbstractQTestCase):
data = six.b('permalink,company,numEmps,category,city,state,fundedDate,raisedAmt,raisedCurrency,round\rlifelock,LifeLock,,web,Tempe,AZ,1-May-07,6850000,USD,b\rlifelock,LifeLock,,web,Tempe,AZ,1-Oct-06,6000000,USD,a\rlifelock,LifeLock,,web,Tempe,AZ,1-Jan-08,25000000,USD,c\rmycityfaces,MyCityFaces,7,web,Scottsdale,AZ,1-Jan-08,50000,USD,seed\rflypaper,Flypaper,,web,Phoenix,AZ,1-Feb-08,3000000,USD,a\rinfusionsoft,Infusionsoft,105,software,Gilbert,AZ,1-Oct-07,9000000,USD,a')
tmp_data_file = self.create_file_with_data(data)
- cmd = '../bin/q -d , -H "select * from %s"' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d , -H "select * from %s"' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertNotEqual(retcode, 0)
@@ -1807,7 +1870,7 @@ class FormattingTests(AbstractQTestCase):
data = six.b('permalink,company,numEmps,category,city,state,fundedDate,raisedAmt,raisedCurrency,round\rlifelock,LifeLock,,web,Tempe,AZ,1-May-07,6850000,USD,b\rlifelock,LifeLock,,web,Tempe,AZ,1-Oct-06,6000000,USD,a\rlifelock,LifeLock,,web,Tempe,AZ,1-Jan-08,25000000,USD,c\rmycityfaces,MyCityFaces,7,web,Scottsdale,AZ,1-Jan-08,50000,USD,seed\rflypaper,Flypaper,,web,Phoenix,AZ,1-Feb-08,3000000,USD,a\rinfusionsoft,Infusionsoft,105,software,Gilbert,AZ,1-Oct-07,9000000,USD,a')
tmp_data_file = self.create_file_with_data(data)
- cmd = '../bin/q -d , -H "select * from %s"' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d , -H "select * from %s"' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -1840,14 +1903,14 @@ class FormattingTests(AbstractQTestCase):
data = six.b('permalink,company,numEmps,category,city,state,fundedDate,raisedAmt,raisedCurrency,round\rlifelock,LifeLock,,web,Tempe,AZ,1-May-07,6850000,USD,b\rlifelock,LifeLock,,web,Tempe,AZ,1-Oct-06,6000000,USD,a\rlifelock,LifeLock,,web,Tempe,AZ,1-Jan-08,25000000,USD,c\rmycityfaces,MyCityFaces,7,web,Scottsdale,AZ,1-Jan-08,50000,USD,seed\rflypaper,Flypaper,,web,Phoenix,AZ,1-Feb-08,3000000,USD,a\rinfusionsoft,Infusionsoft,105,software,Gilbert,AZ,1-Oct-07,9000000,USD,a')
tmp_data_file = self.create_file_with_data(data)
- cmd = '../bin/q -d , -H -U "select permalink,company,numEmps,category,city,state,fundedDate,raisedAmt,raisedCurrency,round from %s"' % tmp_data_file.name
+ cmd = Q_EXECUTABLE + ' -d , -H -U "select permalink,company,numEmps,category,city,state,fundedDate,raisedAmt,raisedCurrency,round from %s"' % tmp_data_file.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode,0)
- if len(e) == 2:
+ if len(e) == 2 or len(e) == 1:
# In python 3.7, there's a deprecation warning for the 'U' file opening mode, which is ok for now
- self.assertEqual(len(e), 2)
+ self.assertIn(len(e), [1,2])
self.assertTrue(b"DeprecationWarning: 'U' mode is deprecated" in e[0])
elif len(e) != 0:
# Nothing should be output to stderr in other versions
@@ -1866,7 +1929,7 @@ class SqlTests(AbstractQTestCase):
def test_find_example(self):
tmpfile = self.create_file_with_data(find_output)
- cmd = '../bin/q "select c5,c6,sum(c7)/1024.0/1024 as total from %s group by c5,c6 order by total desc"' % tmpfile.name
+ cmd = Q_EXECUTABLE + ' "select c5,c6,sum(c7)/1024.0/1024 as total from %s group by c5,c6 order by total desc"' % tmpfile.name
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -1880,7 +1943,7 @@ class SqlTests(AbstractQTestCase):
self.cleanup(tmpfile)
def test_join_example(self):
- cmd = '../bin/q "select myfiles.c8,emails.c2 from ../examples/exampledatafile myfiles join ../examples/group-emails-example emails on (myfiles.c4 = emails.c1) where myfiles.c8 = \'ppp\'"'
+ cmd = Q_EXECUTABLE + ' "select myfiles.c8,emails.c2 from {0}/exampledatafile myfiles join {0}/group-emails-example emails on (myfiles.c4 = emails.c1) where myfiles.c8 = \'ppp\'"'.format(EXAMPLES)
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -1890,7 +1953,7 @@ class SqlTests(AbstractQTestCase):
self.assertEqual(o[1], six.b('ppp dip.2@otherdomain.com'))
def test_join_example_with_output_header(self):
- cmd = '../bin/q -O "select myfiles.c8 aaa,emails.c2 bbb from ../examples/exampledatafile myfiles join ../examples/group-emails-example emails on (myfiles.c4 = emails.c1) where myfiles.c8 = \'ppp\'"'
+ cmd = Q_EXECUTABLE + ' -O "select myfiles.c8 aaa,emails.c2 bbb from {0}/exampledatafile myfiles join {0}/group-emails-example emails on (myfiles.c4 = emails.c1) where myfiles.c8 = \'ppp\'"'.format(EXAMPLES)
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -1902,7 +1965,7 @@ class SqlTests(AbstractQTestCase):
def test_self_join1(self):
tmpfile = self.create_file_with_data(six.b("\n").join([six.b("{} 9000".format(i)) for i in range(0,10)]))
- cmd = '../bin/q "select * from %s a1 join %s a2 on (a1.c1 = a2.c1)"' % (tmpfile.name,tmpfile.name)
+ cmd = Q_EXECUTABLE + ' "select * from %s a1 join %s a2 on (a1.c1 = a2.c1)"' % (tmpfile.name,tmpfile.name)
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -1913,7 +1976,7 @@ class SqlTests(AbstractQTestCase):
def test_self_join_reuses_table(self):
tmpfile = self.create_file_with_data(six.b("\n").join([six.b("{} 9000".format(i)) for i in range(0,10)]))
- cmd = '../bin/q "select * from %s a1 join %s a2 on (a1.c1 = a2.c1)" -A' % (tmpfile.name,tmpfile.name)
+ cmd = Q_EXECUTABLE + ' "select * from %s a1 join %s a2 on (a1.c1 = a2.c1)" -A' % (tmpfile.name,tmpfile.name)
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -1928,7 +1991,7 @@ class SqlTests(AbstractQTestCase):
def test_self_join2(self):
tmpfile1 = self.create_file_with_data(six.b("\n").join([six.b("{} 9000".format(i)) for i in range(0,10)]))
- cmd = '../bin/q "select * from %s a1 join %s a2 on (a1.c2 = a2.c2)"' % (tmpfile1.name,tmpfile1.name)
+ cmd = Q_EXECUTABLE + ' "select * from %s a1 join %s a2 on (a1.c2 = a2.c2)"' % (tmpfile1.name,tmpfile1.name)
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -1938,7 +2001,7 @@ class SqlTests(AbstractQTestCase):
self.cleanup(tmpfile1)
tmpfile2 = self.create_file_with_data(six.b("\n").join([six.b("{} 9000".format(i)) for i in range(0,10)]))
- cmd = '../bin/q "select * from %s a1 join %s a2 on (a1.c2 = a2.c2) join %s a3 on (a1.c2 = a3.c2)"' % (tmpfile2.name,tmpfile2.name,tmpfile2.name)
+ cmd = Q_EXECUTABLE + ' "select * from %s a1 join %s a2 on (a1.c2 = a2.c2) join %s a3 on (a1.c2 = a3.c2)"' % (tmpfile2.name,tmpfile2.name,tmpfile2.name)
retcode, o, e = run_command(cmd)
self.assertEqual(retcode, 0)
@@ -1956,7 +2019,7 @@ class SqlTests(AbstractQTestCase):
'''))
# Check original column type detection
- cmd = '../bin/q -A -d , -H "select * from %s"' % (tmpfile.name)
+ cmd = Q_EXECUTABLE + ' -A -d , -H "select * from %s"' % (tmpfile.name)
retcode, o, e = run_command(cmd)
@@ -1972,7 +2035,7 @@ class SqlTests(AbstractQTestCase):
self.assertEqual(o[4],six.b(' `float_number` - float'))
# Check column types detected when actual detection is disabled
- cmd = '../bin/q -A -d , -H --as-text "select * from %s"' % (tmpfile.name)
+ cmd = Q_EXECUTABLE + ' -A -d , -H --as-text "select * from %s"' % (tmpfile.name)
retcode, o, e = run_command(cmd)
@@ -1987,7 +2050,7 @@ class SqlTests(AbstractQTestCase):
self.assertEqual(o[4],six.b(' `float_number` - text'))
# Get actual data with regular detection
- cmd = '../bin/q -d , -H "select * from %s"' % (tmpfile.name)
+ cmd = Q_EXECUTABLE + ' -d , -H "select * from %s"' % (tmpfile.name)
retcode, o, e = run_command(cmd)
@@ -2001,7 +2064,7 @@ class SqlTests(AbstractQTestCase):
self.assertEqual(o[3],six.b("regular text 4,-123,-123,122.2"))
# Get actual data without detection
- cmd = '../bin/q -d , -H --as-text "select * from %s"' % (tmpfile.name)
+ cmd = Q_EXECUTABLE + ' -d , -H --as-text "select * from %s"' % (tmpfile.name)
retcode, o, e = run_command(cmd)