Skip to content
This repository has been archived by the owner on Sep 9, 2024. It is now read-only.

Add HDFS enabled release images #39

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,21 @@ To run:

docker run -it tiledb:release

### Building with HDF Enabled

TileDB at complitation does not require libhdfs or any jvm component except
for unit tests. TileDB at runtime instead will `dlload` the needed `libhdfs`.
As a result if you want HDFS support you must use a separate docker image
which include the entire HDFS runtime as required

docker build --build-arg enable=hdfs -t tiledb:release-hdfs release-hdfs

### Optional components

If you'd like to build TileDB with HDFS, use the `enable` build argument
when building the images, e.g.:

docker build --build-arg enable=hdfs -t tiledb:release
docker build --build-arg enable=hdfs -t tiledb:release release

## TileDB-R

Expand Down
19 changes: 6 additions & 13 deletions base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# docker build -t tiledb:base

# Ubuntu Trusty
FROM ubuntu:trusty
FROM ubuntu:bionic

# Setup home environment
RUN useradd tiledb
Expand All @@ -17,18 +17,11 @@ RUN apt-get update && apt-get install -y \
unzip \
git \
cmake \
python3.5 \
python3.5-dev \
python3 \
python3-pip \
python3-dev \
libssl-dev \
cmake \
&& apt-get clean \
&& apt-get purge -y \
&& rm -rf /bar/lib/apt/lists* \
&& update-alternatives --install /usr/local/bin/python3 python3 /usr/bin/python3.5 1

RUN cd /tmp \
&& wget https://cmake.org/files/v3.3/cmake-3.3.2-Linux-x86_64.tar.gz \
&& tar -xzf cmake-3.3.2-Linux-x86_64.tar.gz \
&& cp -R cmake-3.3.2-Linux-x86_64/bin /usr/ \
&& cp -R cmake-3.3.2-Linux-x86_64/doc /usr/ \
&& cp -R cmake-3.3.2-Linux-x86_64/man /usr/ \
&& cp -R cmake-3.3.2-Linux-x86_64/share /usr/
&& rm -rf /bar/lib/apt/lists*
74 changes: 74 additions & 0 deletions release-hdfs/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Build and install the latest TileDB stable release

# To build:
# docker build -t tiledb:release
#
# Use the build arg 'enable' to configure optional TileDB components, e.g.:
# docker build --build-arg enable=s3,hdfs -t tiledb:release

FROM tiledb:base

# Optional components to enable (defaults to empty).
ARG enable
# Release version number of TileDB to install.
ARG version=1.7.4
# Release version number of TileDB-Py to install.
# -- see below --

ADD install-hadoop.sh /tmp/install-hadoop.sh
RUN /tmp/install-hadoop.sh
# Install HDFS libs
#RUN apt-get update && apt-get install -y \
# software-properties-common \
# openjdk-8-jre \
# && apt-get clean \
# && apt-get purge -y \
# && rm -rf /bar/lib/apt/lists* \
# && update-alternatives --install /usr/local/bin/python3 python3 /usr/bin/python3.5 1
#
#RUN mkdir -p /usr/local/hadoop/ \
# && chown -R $(whoami) /usr/local/hadoop \
# && pushd /usr/local/hadoop \
# # download from closest mirror
# && curl -G -L -d "action=download" -d "filename=hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz" \
# https://www.apache.org/dyn/mirrors/mirrors.cgi -o hadoop-${HADOOP_VERSION}.tar.gz \
# && tar xzf hadoop-${HADOOP_VERSION}.tar.gz \
# && rm -rf ./home/hadoop-${HADOOP_VERSION} \
# && mv hadoop-${HADOOP_VERSION} home \
# && chown -R $(whoami) /usr/local/hadoop \
# && popd

# Install TileDB
RUN wget -P /home/tiledb https://github.com/TileDB-Inc/TileDB/archive/${version}.tar.gz \
&& tar xzf /home/tiledb/${version}.tar.gz -C /home/tiledb \
&& rm /home/tiledb/${version}.tar.gz \
&& cd /home/tiledb/TileDB-${version} \
&& mkdir build \
&& cd build \
&& ../bootstrap --prefix=/usr/local --enable-s3 --enable-serialization --enable-hdfs --enable=${enable} \
&& make -j$(nproc) \
&& make -j$(nproc) examples \
&& make install-tiledb \
&& rm -rf /home/tiledb/TileDB-${version}

# Release version number of TileDB-Py to install.
ARG pyversion=0.5.5
ENV pyversion=$pyversion SETUPTOOLS_SCM_PRETEND_VERSION=$pyversion

# -----------------------------------------------------------------------------

# Install Python bindings
RUN wget https://github.com/TileDB-Inc/TileDB-Py/archive/${pyversion}.tar.gz -O /home/tiledb/Py-${pyversion}.tar.gz \
&& tar xzf /home/tiledb/Py-${pyversion}.tar.gz -C /home/tiledb \
&& rm /home/tiledb/Py-${pyversion}.tar.gz \
&& cd /home/tiledb/TileDB-Py-${pyversion} \
&& pip3 install -r requirements.txt \
&& python3 setup.py install --tiledb=/usr/local \
&& rm -rf /home/tiledb/TileDB-Py-${pyversion}

EXPOSE 22

ENV LD_LIBRARY_PATH="/usr/local/lib:$LD_LIBRARY_PATH"

WORKDIR /home/tiledb
ENTRYPOINT /bin/bash
209 changes: 209 additions & 0 deletions release-hdfs/install-hadoop.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
#!/bin/bash

#
# The MIT License (MIT)
#
# Copyright (c) 2019-2020 TileDB, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#

# Installs and configures HDFS.
set -x

HADOOP_VERSION="3.1.3"

die() {
echo "$@" 1>&2 ; popd 2>/dev/null; exit 1
}


function update_apt_repo {
apt-get install -y software-properties-common wget &&
apt-get update -y
apt-get install -y curl
}

function install_java {
apt-get install -y openjdk-8-jre
}

function install_hadoop {
mkdir -p /usr/local/hadoop/ &&
chown -R $(whoami) /usr/local/hadoop || die "could not create local hadoop directory"
pushd /usr/local/hadoop
# download from closest mirror
curl -G -L -d "action=download" -d "filename=hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz" \
https://www.apache.org/dyn/mirrors/mirrors.cgi -o hadoop-${HADOOP_VERSION}.tar.gz
if [ $? -ne 0 ]; then
die "error downloading hadoop from apache mirror"
fi;
tar xzf hadoop-${HADOOP_VERSION}.tar.gz || die "error extracting hadoop download"
if [ -d ./home/hadoop-${HADOOP_VERSION} ]; then
rm -rf ./home/hadoop-${HADOOP_VERSION}
fi
mv hadoop-${HADOOP_VERSION} home && chown -R $(whoami) /usr/local/hadoop
popd
}

function create_hadoop_user {
useradd -m hduser &&
adduser hduser &&
chsh -s /bin/bash hduser
echo -e "hduser123\nhduser123\n" | passwd hduser

useradd -m hadoop &&
adduser hadoop &&
chsh -s /bin/bash hadoop
echo -e "hadoop123\nhadoop123\n" | passwd hadoop
}

function setup_core_xml {
export HADOOP_HOME=/usr/local/hadoop/home
local tmpfile=/tmp/hadoop_fafsa.xml
local file=$HADOOP_HOME/etc/hadoop/core-site.xml
rm -rf $file
cat >> $tmpfile <<EOF
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>/tmp/hadooop</value>
<description>Temporary directories.</description>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
EOF
tmpfile=/tmp/hadoop_fafsa.xml
mv $tmpfile $file
}

function setup_mapred_xml {
export HADOOP_HOME=/usr/local/hadoop/home
local tmpfile=/tmp/hadoop_mapred.xml
local file=$HADOOP_HOME/etc/hadoop/mapred-site.xml
rm -rf $file
cat >> $tmpfile <<EOT
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>localhost:9010</value>
<description>The tracker of MapReduce</description>
</property>
</configuration>
EOT
tmpfile=/tmp/hadoop_mapred.xml
mv $tmpfile $file
}

function setup_hdfs_xml {
export HADOOP_HOME=/usr/local/hadoop/home
local tmpfile=/tmp/hadoop_hdfs.xml
local file=$HADOOP_HOME/etc/hadoop/hdfs-site.xml
rm -rf $file
cat >> $tmpfile <<EOF
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>

<property>
<name>dfs.replication</name>
<value>1</value>
</property>

<!-- libhdfs3 -->

<property>
<name>dfs.default.replica</name>
<value>1</value>
</property>

<property>
<name>output.replace-datanode-on-failure</name>
<value>false</value>
</property>

<property>
<name>dfs.client.read.shortcircuit</name>
<value>false</value>
</property>

<property>
<name>rpc.client.connect.retry</name>
<value>10</value>
</property>

<property>
<name>rpc.client.read.timeout</name>
<value>3600000</value>
</property>

<property>
<name>rpc.client.write.timeout</name>
<value>3600000</value>
</property>

</configuration>
EOF
tmpfile=/tmp/hadoop_hdfs.xml
mv $tmpfile $file
}


function setup_environment {
export HADOOP_HOME=/usr/local/hadoop/home
sed -i -- 's/JAVA_HOME=\${JAVA_HOME}/JAVA_HOME=\$(readlink -f \/usr\/bin\/java | sed "s:bin\/java::")/' \
$HADOOP_HOME/etc/hadoop/hadoop-env.sh
setup_core_xml &&
setup_mapred_xml &&
setup_hdfs_xml || die "error in generating xml configuration files"
}

function passwordless_ssh {
if [ -d ~/.ssh ]; then
rm -rf ~/.ssh
fi
apt-get --reinstall install -y openssh-server openssh-client || die "error (re)installing openssh"
mkdir ~/.ssh
ssh-keygen -t rsa -P "" -f ~/.ssh/id_rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
ssh-keyscan -H localhost >> ~/.ssh/known_hosts
ssh-keyscan -H 127.0.0.1 >> ~/.ssh/known_hosts
ssh-keyscan -H 0.0.0.0 >> ~/.ssh/known_hosts
service ssh restart || die "error restarting ssh service"
}

function run {
update_apt_repo || die "error updating apt-repo"
install_java || die "error installing java"
create_hadoop_user || die "error creating hadoop user"
install_hadoop || die "error installing hadoop"
setup_environment || die "error setting up environment"
passwordless_ssh || die "error setting up passwordless ssh"
}

run
# sleep to make sure the ssh service restart is done because systemd
sleep 2
10 changes: 3 additions & 7 deletions release/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,16 @@ ENV pyversion=$pyversion SETUPTOOLS_SCM_PRETEND_VERSION=$pyversion
# -----------------------------------------------------------------------------

# Install Python bindings
RUN wget -P /tmp https://bootstrap.pypa.io/get-pip.py \
&& python3 /tmp/get-pip.py \
&& rm /tmp/get-pip.py \
&& wget https://github.com/TileDB-Inc/TileDB-Py/archive/${pyversion}.tar.gz -O /home/tiledb/Py-${pyversion}.tar.gz \
RUN wget https://github.com/TileDB-Inc/TileDB-Py/archive/${pyversion}.tar.gz -O /home/tiledb/Py-${pyversion}.tar.gz \
&& tar xzf /home/tiledb/Py-${pyversion}.tar.gz -C /home/tiledb \
&& rm /home/tiledb/Py-${pyversion}.tar.gz \
&& cd /home/tiledb/TileDB-Py-${pyversion} \
&& pip install -r requirements.txt \
&& python3.5 setup.py install --tiledb=/usr/local \
&& pip3 install -r requirements.txt \
&& python3 setup.py install --tiledb=/usr/local \
&& rm -rf /home/tiledb/TileDB-Py-${pyversion}

EXPOSE 22

# this can be removed for TileDB-Py 0.4.3
ENV LD_LIBRARY_PATH="/usr/local/lib:$LD_LIBRARY_PATH"

WORKDIR /home/tiledb
Expand Down