diff --git a/README.md b/README.md index cb98568..d0abda7 100644 --- a/README.md +++ b/README.md @@ -34,12 +34,21 @@ To run: docker run -it tiledb:release +### Building with HDF Enabled + +TileDB at complitation does not require libhdfs or any jvm component except +for unit tests. TileDB at runtime instead will `dlload` the needed `libhdfs`. +As a result if you want HDFS support you must use a separate docker image +which include the entire HDFS runtime as required + + docker build --build-arg enable=hdfs -t tiledb:release-hdfs release-hdfs + ### Optional components If you'd like to build TileDB with HDFS, use the `enable` build argument when building the images, e.g.: - docker build --build-arg enable=hdfs -t tiledb:release + docker build --build-arg enable=hdfs -t tiledb:release release ## TileDB-R diff --git a/base/Dockerfile b/base/Dockerfile index 0080116..7ca8287 100644 --- a/base/Dockerfile +++ b/base/Dockerfile @@ -4,7 +4,7 @@ # docker build -t tiledb:base # Ubuntu Trusty -FROM ubuntu:trusty +FROM ubuntu:bionic # Setup home environment RUN useradd tiledb @@ -17,18 +17,11 @@ RUN apt-get update && apt-get install -y \ unzip \ git \ cmake \ - python3.5 \ - python3.5-dev \ + python3 \ + python3-pip \ + python3-dev \ libssl-dev \ + cmake \ && apt-get clean \ && apt-get purge -y \ - && rm -rf /bar/lib/apt/lists* \ - && update-alternatives --install /usr/local/bin/python3 python3 /usr/bin/python3.5 1 - -RUN cd /tmp \ - && wget https://cmake.org/files/v3.3/cmake-3.3.2-Linux-x86_64.tar.gz \ - && tar -xzf cmake-3.3.2-Linux-x86_64.tar.gz \ - && cp -R cmake-3.3.2-Linux-x86_64/bin /usr/ \ - && cp -R cmake-3.3.2-Linux-x86_64/doc /usr/ \ - && cp -R cmake-3.3.2-Linux-x86_64/man /usr/ \ - && cp -R cmake-3.3.2-Linux-x86_64/share /usr/ + && rm -rf /bar/lib/apt/lists* diff --git a/release-hdfs/Dockerfile b/release-hdfs/Dockerfile new file mode 100644 index 0000000..0dab912 --- /dev/null +++ b/release-hdfs/Dockerfile @@ -0,0 +1,74 @@ +# Build and install the latest TileDB stable release + +# To build: +# docker build -t tiledb:release +# +# Use the build arg 'enable' to configure optional TileDB components, e.g.: +# docker build --build-arg enable=s3,hdfs -t tiledb:release + +FROM tiledb:base + +# Optional components to enable (defaults to empty). +ARG enable +# Release version number of TileDB to install. +ARG version=1.7.4 +# Release version number of TileDB-Py to install. +# -- see below -- + +ADD install-hadoop.sh /tmp/install-hadoop.sh +RUN /tmp/install-hadoop.sh +# Install HDFS libs +#RUN apt-get update && apt-get install -y \ +# software-properties-common \ +# openjdk-8-jre \ +# && apt-get clean \ +# && apt-get purge -y \ +# && rm -rf /bar/lib/apt/lists* \ +# && update-alternatives --install /usr/local/bin/python3 python3 /usr/bin/python3.5 1 +# +#RUN mkdir -p /usr/local/hadoop/ \ +# && chown -R $(whoami) /usr/local/hadoop \ +# && pushd /usr/local/hadoop \ +# # download from closest mirror +# && curl -G -L -d "action=download" -d "filename=hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz" \ +# https://www.apache.org/dyn/mirrors/mirrors.cgi -o hadoop-${HADOOP_VERSION}.tar.gz \ +# && tar xzf hadoop-${HADOOP_VERSION}.tar.gz \ +# && rm -rf ./home/hadoop-${HADOOP_VERSION} \ +# && mv hadoop-${HADOOP_VERSION} home \ +# && chown -R $(whoami) /usr/local/hadoop \ +# && popd + +# Install TileDB +RUN wget -P /home/tiledb https://github.com/TileDB-Inc/TileDB/archive/${version}.tar.gz \ + && tar xzf /home/tiledb/${version}.tar.gz -C /home/tiledb \ + && rm /home/tiledb/${version}.tar.gz \ + && cd /home/tiledb/TileDB-${version} \ + && mkdir build \ + && cd build \ + && ../bootstrap --prefix=/usr/local --enable-s3 --enable-serialization --enable-hdfs --enable=${enable} \ + && make -j$(nproc) \ + && make -j$(nproc) examples \ + && make install-tiledb \ + && rm -rf /home/tiledb/TileDB-${version} + +# Release version number of TileDB-Py to install. +ARG pyversion=0.5.5 +ENV pyversion=$pyversion SETUPTOOLS_SCM_PRETEND_VERSION=$pyversion + +# ----------------------------------------------------------------------------- + +# Install Python bindings +RUN wget https://github.com/TileDB-Inc/TileDB-Py/archive/${pyversion}.tar.gz -O /home/tiledb/Py-${pyversion}.tar.gz \ + && tar xzf /home/tiledb/Py-${pyversion}.tar.gz -C /home/tiledb \ + && rm /home/tiledb/Py-${pyversion}.tar.gz \ + && cd /home/tiledb/TileDB-Py-${pyversion} \ + && pip3 install -r requirements.txt \ + && python3 setup.py install --tiledb=/usr/local \ + && rm -rf /home/tiledb/TileDB-Py-${pyversion} + +EXPOSE 22 + +ENV LD_LIBRARY_PATH="/usr/local/lib:$LD_LIBRARY_PATH" + +WORKDIR /home/tiledb +ENTRYPOINT /bin/bash diff --git a/release-hdfs/install-hadoop.sh b/release-hdfs/install-hadoop.sh new file mode 100755 index 0000000..d364eca --- /dev/null +++ b/release-hdfs/install-hadoop.sh @@ -0,0 +1,209 @@ +#!/bin/bash + +# +# The MIT License (MIT) +# +# Copyright (c) 2019-2020 TileDB, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# + +# Installs and configures HDFS. +set -x + +HADOOP_VERSION="3.1.3" + +die() { + echo "$@" 1>&2 ; popd 2>/dev/null; exit 1 +} + + +function update_apt_repo { + apt-get install -y software-properties-common wget && + apt-get update -y + apt-get install -y curl +} + +function install_java { + apt-get install -y openjdk-8-jre +} + +function install_hadoop { + mkdir -p /usr/local/hadoop/ && + chown -R $(whoami) /usr/local/hadoop || die "could not create local hadoop directory" + pushd /usr/local/hadoop + # download from closest mirror + curl -G -L -d "action=download" -d "filename=hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz" \ + https://www.apache.org/dyn/mirrors/mirrors.cgi -o hadoop-${HADOOP_VERSION}.tar.gz + if [ $? -ne 0 ]; then + die "error downloading hadoop from apache mirror" + fi; + tar xzf hadoop-${HADOOP_VERSION}.tar.gz || die "error extracting hadoop download" + if [ -d ./home/hadoop-${HADOOP_VERSION} ]; then + rm -rf ./home/hadoop-${HADOOP_VERSION} + fi + mv hadoop-${HADOOP_VERSION} home && chown -R $(whoami) /usr/local/hadoop + popd +} + +function create_hadoop_user { + useradd -m hduser && + adduser hduser && + chsh -s /bin/bash hduser + echo -e "hduser123\nhduser123\n" | passwd hduser + + useradd -m hadoop && + adduser hadoop && + chsh -s /bin/bash hadoop + echo -e "hadoop123\nhadoop123\n" | passwd hadoop +} + +function setup_core_xml { + export HADOOP_HOME=/usr/local/hadoop/home + local tmpfile=/tmp/hadoop_fafsa.xml + local file=$HADOOP_HOME/etc/hadoop/core-site.xml + rm -rf $file + cat >> $tmpfile < + + + +hadoop.tmp.dir +/tmp/hadooop +Temporary directories. + + +fs.default.name +hdfs://localhost:9000 + + +EOF + tmpfile=/tmp/hadoop_fafsa.xml + mv $tmpfile $file +} + +function setup_mapred_xml { + export HADOOP_HOME=/usr/local/hadoop/home + local tmpfile=/tmp/hadoop_mapred.xml + local file=$HADOOP_HOME/etc/hadoop/mapred-site.xml + rm -rf $file + cat >> $tmpfile < + + + +mapred.job.tracker +localhost:9010 +The tracker of MapReduce + + +EOT + tmpfile=/tmp/hadoop_mapred.xml + mv $tmpfile $file +} + +function setup_hdfs_xml { + export HADOOP_HOME=/usr/local/hadoop/home + local tmpfile=/tmp/hadoop_hdfs.xml + local file=$HADOOP_HOME/etc/hadoop/hdfs-site.xml + rm -rf $file + cat >> $tmpfile < + + + + +dfs.replication +1 + + + + + +dfs.default.replica +1 + + + +output.replace-datanode-on-failure +false + + + +dfs.client.read.shortcircuit +false + + + +rpc.client.connect.retry +10 + + + +rpc.client.read.timeout +3600000 + + + +rpc.client.write.timeout +3600000 + + + +EOF + tmpfile=/tmp/hadoop_hdfs.xml + mv $tmpfile $file +} + + +function setup_environment { + export HADOOP_HOME=/usr/local/hadoop/home + sed -i -- 's/JAVA_HOME=\${JAVA_HOME}/JAVA_HOME=\$(readlink -f \/usr\/bin\/java | sed "s:bin\/java::")/' \ + $HADOOP_HOME/etc/hadoop/hadoop-env.sh + setup_core_xml && + setup_mapred_xml && + setup_hdfs_xml || die "error in generating xml configuration files" +} + +function passwordless_ssh { + if [ -d ~/.ssh ]; then + rm -rf ~/.ssh + fi + apt-get --reinstall install -y openssh-server openssh-client || die "error (re)installing openssh" + mkdir ~/.ssh + ssh-keygen -t rsa -P "" -f ~/.ssh/id_rsa + cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys + ssh-keyscan -H localhost >> ~/.ssh/known_hosts + ssh-keyscan -H 127.0.0.1 >> ~/.ssh/known_hosts + ssh-keyscan -H 0.0.0.0 >> ~/.ssh/known_hosts + service ssh restart || die "error restarting ssh service" +} + +function run { + update_apt_repo || die "error updating apt-repo" + install_java || die "error installing java" + create_hadoop_user || die "error creating hadoop user" + install_hadoop || die "error installing hadoop" + setup_environment || die "error setting up environment" + passwordless_ssh || die "error setting up passwordless ssh" +} + +run +# sleep to make sure the ssh service restart is done because systemd +sleep 2 diff --git a/release/Dockerfile b/release/Dockerfile index 1bb2f4b..cdb7423 100644 --- a/release/Dockerfile +++ b/release/Dockerfile @@ -35,20 +35,16 @@ ENV pyversion=$pyversion SETUPTOOLS_SCM_PRETEND_VERSION=$pyversion # ----------------------------------------------------------------------------- # Install Python bindings -RUN wget -P /tmp https://bootstrap.pypa.io/get-pip.py \ - && python3 /tmp/get-pip.py \ - && rm /tmp/get-pip.py \ - && wget https://github.com/TileDB-Inc/TileDB-Py/archive/${pyversion}.tar.gz -O /home/tiledb/Py-${pyversion}.tar.gz \ +RUN wget https://github.com/TileDB-Inc/TileDB-Py/archive/${pyversion}.tar.gz -O /home/tiledb/Py-${pyversion}.tar.gz \ && tar xzf /home/tiledb/Py-${pyversion}.tar.gz -C /home/tiledb \ && rm /home/tiledb/Py-${pyversion}.tar.gz \ && cd /home/tiledb/TileDB-Py-${pyversion} \ - && pip install -r requirements.txt \ - && python3.5 setup.py install --tiledb=/usr/local \ + && pip3 install -r requirements.txt \ + && python3 setup.py install --tiledb=/usr/local \ && rm -rf /home/tiledb/TileDB-Py-${pyversion} EXPOSE 22 -# this can be removed for TileDB-Py 0.4.3 ENV LD_LIBRARY_PATH="/usr/local/lib:$LD_LIBRARY_PATH" WORKDIR /home/tiledb