From 092ba82146ddfbe6e192e9fde6dfe755a519f9ff Mon Sep 17 00:00:00 2001 From: htaox Date: Sun, 27 Apr 2014 20:04:43 +0100 Subject: [PATCH 01/97] Initial commit --- hbase-0.94.18/build | 13 ++ hbase-0.94.18/hbase-base/Dockerfile | 52 +++++++ hbase-0.94.18/hbase-base/build | 4 + .../hbase-base/files/configure_hbase.sh | 42 ++++++ hbase-0.94.18/hbase-base/files/hbase-env.sh | 135 ++++++++++++++++++ hbase-0.94.18/hbase-base/files/hbase-site.xml | 60 ++++++++ .../hbase-base/files/log4j.properties | 8 ++ hbase-0.94.18/hbase-base/files/zoo.cfg | 3 + hbase-0.94.18/hbase-master/Dockerfile | 7 + hbase-0.94.18/hbase-master/build | 4 + hbase-0.94.18/hbase-master/files/default_cmd | 25 ++++ .../hbase-master/files/run_hbase_master.sh | 11 ++ hbase-0.94.18/hbase-worker/Dockerfile | 13 ++ hbase-0.94.18/hbase-worker/build | 4 + hbase-0.94.18/hbase-worker/files/default_cmd | 22 +++ .../hbase-worker/files/run_hbase_worker.sh | 5 + 16 files changed, 408 insertions(+) create mode 100644 hbase-0.94.18/build create mode 100644 hbase-0.94.18/hbase-base/Dockerfile create mode 100644 hbase-0.94.18/hbase-base/build create mode 100644 hbase-0.94.18/hbase-base/files/configure_hbase.sh create mode 100644 hbase-0.94.18/hbase-base/files/hbase-env.sh create mode 100644 hbase-0.94.18/hbase-base/files/hbase-site.xml create mode 100644 hbase-0.94.18/hbase-base/files/log4j.properties create mode 100644 hbase-0.94.18/hbase-base/files/zoo.cfg create mode 100644 hbase-0.94.18/hbase-master/Dockerfile create mode 100644 hbase-0.94.18/hbase-master/build create mode 100644 hbase-0.94.18/hbase-master/files/default_cmd create mode 100644 hbase-0.94.18/hbase-master/files/run_hbase_master.sh create mode 100644 hbase-0.94.18/hbase-worker/Dockerfile create mode 100644 hbase-0.94.18/hbase-worker/build create mode 100644 hbase-0.94.18/hbase-worker/files/default_cmd create mode 100644 hbase-0.94.18/hbase-worker/files/run_hbase_worker.sh diff --git a/hbase-0.94.18/build b/hbase-0.94.18/build new file mode 100644 index 0000000..9610006 --- /dev/null +++ b/hbase-0.94.18/build @@ -0,0 +1,13 @@ +#!/bin/bash + +hbase_dirs=$(ls -d hbase*) +dir_list=("$hbase_dirs") + +# NOTE: the order matters but this is the right one +for i in ${dir_list[@]}; do + echo building $i; + cd $i; + cat build; + . build; + cd ..; +done diff --git a/hbase-0.94.18/hbase-base/Dockerfile b/hbase-0.94.18/hbase-base/Dockerfile new file mode 100644 index 0000000..4e91675 --- /dev/null +++ b/hbase-0.94.18/hbase-base/Dockerfile @@ -0,0 +1,52 @@ +# HBASE 0.94.18 +# +FROM apache-hadoop-hdfs-precise:1.2.1 + +#MAINTAINER amplab amp-docker@eecs.berkeley.edu + +# Download and Install HBase +ENV HBASE_VERSION 0.94.18 + +RUN mkdir -p /opt/downloads && cd /opt/downloads && curl -SsfLO "http://www.apache.org/dist/hbase/hbase-$HBASE_VERSION/hbase-$HBASE_VERSION.tar.gz" +RUN cd /opt && tar xvfz /opt/downloads/hbase-$HBASE_VERSION.tar.gz +# RUN mv /opt/hbase-$HBASE_VERSION /opt/hbase +# Add symlink +RUN rm /opt/hbase && ln -s /opt/hbase-$HBASE_VERSION /opt/hbase + +# Data will go here (see hbase-site.xml) +# RUN mkdir -p /data/hbase /opt/hbase/logs + +# ENV JAVA_HOME /usr/lib/jvm/java-6-openjdk-amd64 +ENV HBASE_HOME /opt/hbase +ENV HBASE_PREFIX /opt/hbase +ENV PATH $PATH:$HBASE_HOME/bin +ENV HBASE_CONF_DIR /opt/hbase/conf +#ENV HBASE_SERVER /opt/hbase/bin/hbase + +#ADD ./hbase-site.xml /opt/hbase/conf/hbase-site.xml +#ADD ./zoo.cfg /opt/hbase/conf/zoo.cfg +#ADD ./hbase-server /opt/hbase-server + +RUN rm $HBASE_HOME/conf/hbase-site.xml +ADD files/hbase-site.xml $HBASE_HOME/conf/hbase-site.xml + +RUN rm $HBASE_HOME/conf/hbase-env.sh +ADD files/hbase-env.sh $HBASE_HOME/conf/hbase-env.sh + +# Ports reference +# http://blog.cloudera.com/blog/2013/07/guide-to-using-apache-hbase-ports/ + +# Expose HBase Master Ports +EXPOSE 60000 60010 + +# Expose HBase Region Server Ports +EXPOSE 60020 60030 + +# Expose REST Server Ports +EXPOSE 8080 8085 + +# Expose Thrift Ports +EXPOSE 9090 9095 + +ADD files /root/hbase_files + diff --git a/hbase-0.94.18/hbase-base/build b/hbase-0.94.18/hbase-base/build new file mode 100644 index 0000000..1d80fd9 --- /dev/null +++ b/hbase-0.94.18/hbase-base/build @@ -0,0 +1,4 @@ +rm -f files/files.hash +for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash +mv /tmp/files.hash files/files.hash +sudo docker build -t ${IMAGE_PREFIX}hbase-base:0.94.18 . diff --git a/hbase-0.94.18/hbase-base/files/configure_hbase.sh b/hbase-0.94.18/hbase-base/files/configure_hbase.sh new file mode 100644 index 0000000..4753a22 --- /dev/null +++ b/hbase-0.94.18/hbase-base/files/configure_hbase.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +source /root/hadoop_files/configure_hadoop.sh + +function create_hbase_directories() { + create_hadoop_directories + rm -rf /opt/hbase-$HBASE_VERSION/work + mkdir -p /opt/hbase-$HBASE_VERSION/work + chown hdfs.hdfs /opt/hbase-$HBASE_VERSION/work + mkdir /tmp/hbase + chown hdfs.hdfs /tmp/hbase + # this one is for hbase shell logging + rm -rf /var/lib/hadoop/hdfs + mkdir -p /var/lib/hadoop/hdfs + chown hdfs.hdfs /var/lib/hadoop/hdfs + rm -rf /opt/hbase-$HBASE_VERSION/logs + mkdir -p /opt/hbase-$HBASE_VERSION/logs + chown hdfs.hdfs /opt/hbase-$HBASE_VERSION/logs +} + +function deploy_hbase_files() { + deploy_hadoop_files + cp /root/hbase_files/hbase-env.sh /opt/hbase-$HBASE_VERSION/conf/ + cp /root/hbase_files/log4j.properties /opt/hbase-$HBASE_VERSION/conf/ +} + +function configure_hbase() { + configure_hadoop $1 + sed -i s/__MASTER__/master/ /opt/hbase-$HBASE_VERSION/conf/hbase-env.sh + sed -i s/__HBASE_HOME__/"\/opt\/hbase-${HBASE_VERSION}"/ /opt/hbase-$HBASE_VERSION/conf/hbase-env.sh + sed -i s/__JAVA_HOME__/"\/usr\/lib\/jvm\/java-7-openjdk-amd64"/ /opt/hbase-$HBASE_VERSION/conf/hbase-env.sh + + sed -i "s/@IP@/$1/g" $HBASE_HOME/conf/hbase-site.xml + sed -i "s/@IP@/$1/g" $HBASE_HOME/conf/zoo.cfg + echo "$IP $(hostname)" >> /etc/hosts +} + +function prepare_hbase() { + create_hbase_directories + deploy_hbase_files + configure_hbase $1 +} diff --git a/hbase-0.94.18/hbase-base/files/hbase-env.sh b/hbase-0.94.18/hbase-base/files/hbase-env.sh new file mode 100644 index 0000000..fe4ceee --- /dev/null +++ b/hbase-0.94.18/hbase-base/files/hbase-env.sh @@ -0,0 +1,135 @@ +# +#/** +# * Copyright 2007 The Apache Software Foundation +# * +# * Licensed to the Apache Software Foundation (ASF) under one +# * or more contributor license agreements. See the NOTICE file +# * distributed with this work for additional information +# * regarding copyright ownership. The ASF licenses this file +# * to you under the Apache License, Version 2.0 (the +# * "License"); you may not use this file except in compliance +# * with the License. You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ + +# Set environment variables here. + +# This script sets variables multiple times over the course of starting an hbase process, +# so try to keep things idempotent unless you want to take an even deeper look +# into the startup scripts (bin/hbase, etc.) + +# The java implementation to use. Java 1.6 required. +# export JAVA_HOME=/usr/java/jdk1.6.0/ + +# Extra Java CLASSPATH elements. Optional. +# export HBASE_CLASSPATH= + +# The maximum amount of heap to use, in MB. Default is 1000. +# export HBASE_HEAPSIZE=1000 + +# Extra Java runtime options. +# Below are what we set by default. May only work with SUN JVM. +# For more on why as well as other possible settings, +# see http://wiki.apache.org/hadoop/PerformanceTuning +export HBASE_OPTS="-XX:+UseConcMarkSweepGC" + +# Uncomment one of the below three options to enable java garbage collection logging for the server-side processes. + +# This enables basic gc logging to the .out file. +# export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps" + +# This enables basic gc logging to its own file. +# If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR . +# export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:" + +# This enables basic GC logging to its own file with automatic log rolling. Only applies to jdk 1.6.0_34+ and 1.7.0_2+. +# If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR . +# export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc: -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=1 -XX:GCLogFileSize=512M" + +# Uncomment one of the below three options to enable java garbage collection logging for the client processes. + +# This enables basic gc logging to the .out file. +# export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps" + +# This enables basic gc logging to its own file. +# If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR . +# export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:" + +# This enables basic GC logging to its own file with automatic log rolling. Only applies to jdk 1.6.0_34+ and 1.7.0_2+. +# If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR . +# export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc: -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=1 -XX:GCLogFileSize=512M" + +# Uncomment below if you intend to use the EXPERIMENTAL off heap cache. +# export HBASE_OPTS="$HBASE_OPTS -XX:MaxDirectMemorySize=" +# Set hbase.offheapcache.percentage in hbase-site.xml to a nonzero value. + + +# Uncomment and adjust to enable JMX exporting +# See jmxremote.password and jmxremote.access in $JRE_HOME/lib/management to configure remote password access. +# More details at: http://java.sun.com/javase/6/docs/technotes/guides/management/agent.html +# +# export HBASE_JMX_BASE="-Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false" +# export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10101" +# export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10102" +# export HBASE_THRIFT_OPTS="$HBASE_THRIFT_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10103" +# export HBASE_ZOOKEEPER_OPTS="$HBASE_ZOOKEEPER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10104" +# export HBASE_REST_OPTS="$HBASE_REST_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10105" + +# File naming hosts on which HRegionServers will run. $HBASE_HOME/conf/regionservers by default. +# export HBASE_REGIONSERVERS=${HBASE_HOME}/conf/regionservers + +# Uncomment and adjust to keep all the Region Server pages mapped to be memory resident +#HBASE_REGIONSERVER_MLOCK=true +#HBASE_REGIONSERVER_UID="hbase" + +# File naming hosts on which backup HMaster will run. $HBASE_HOME/conf/backup-masters by default. +# export HBASE_BACKUP_MASTERS=${HBASE_HOME}/conf/backup-masters + +# Extra ssh options. Empty by default. +# export HBASE_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HBASE_CONF_DIR" + +# Where log files are stored. $HBASE_HOME/logs by default. +# export HBASE_LOG_DIR=${HBASE_HOME}/logs + +# Enable remote JDWP debugging of major HBase processes. Meant for Core Developers +# export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8070" +# export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8071" +# export HBASE_THRIFT_OPTS="$HBASE_THRIFT_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8072" +# export HBASE_ZOOKEEPER_OPTS="$HBASE_ZOOKEEPER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8073" + +# A string representing this instance of hbase. $USER by default. +# export HBASE_IDENT_STRING=$USER + +# The scheduling priority for daemon processes. See 'man nice'. +# export HBASE_NICENESS=10 + +# The directory where pid files are stored. /tmp by default. +# export HBASE_PID_DIR=/var/hadoop/pids + +# Seconds to sleep between slave commands. Unset by default. This +# can be useful in large clusters, where, e.g., slave rsyncs can +# otherwise arrive faster than the master can service them. +# export HBASE_SLAVE_SLEEP=0.1 + +# Tell HBase whether it should manage it's own instance of Zookeeper or not. +# export HBASE_MANAGES_ZK=true + +# The default log rolling policy is RFA, where the log file is rolled as per the size defined for the +# RFA appender. Please refer to the log4j.properties file to see more details on this appender. +# In case one needs to do log rolling on a date change, one should set the environment property +# HBASE_ROOT_LOGGER to ",DRFA". +# For example: +# HBASE_ROOT_LOGGER=INFO,DRFA +# The reason for changing default to RFA is to avoid the boundary case of filling out disk space as +# DRFA doesn't put any cap on the log size. Please refer to HBase-5655 for more context. + +export HBASE_HOME=__HBASE_HOME__ +export HBASE_MASTER_IP=__MASTER__ +export JAVA_HOME=__JAVA_HOME__ diff --git a/hbase-0.94.18/hbase-base/files/hbase-site.xml b/hbase-0.94.18/hbase-base/files/hbase-site.xml new file mode 100644 index 0000000..a0a1b46 --- /dev/null +++ b/hbase-0.94.18/hbase-base/files/hbase-site.xml @@ -0,0 +1,60 @@ + + + + + hbase.zookeeper.quorum + @IP@ + + + hbase.rootdir + hdfs://@IP@:9000/hbase + + + hbase.cluster.distributed + true + + + + hbase.master.dns.interface + eth0 + + + + + + + + + hbase.regionserver.dns.interface + eth0 + + + + + + + hbase.zookeeper.dns.interface + eth0 + + + diff --git a/hbase-0.94.18/hbase-base/files/log4j.properties b/hbase-0.94.18/hbase-base/files/log4j.properties new file mode 100644 index 0000000..d72dbad --- /dev/null +++ b/hbase-0.94.18/hbase-base/files/log4j.properties @@ -0,0 +1,8 @@ +# Set everything to be logged to the console +log4j.rootCategory=INFO, console +log4j.appender.console=org.apache.log4j.ConsoleAppender +log4j.appender.console.layout=org.apache.log4j.PatternLayout +log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n + +# Ignore messages below warning level from Jetty, because it's a bit verbose +log4j.logger.org.eclipse.jetty=WARN diff --git a/hbase-0.94.18/hbase-base/files/zoo.cfg b/hbase-0.94.18/hbase-base/files/zoo.cfg new file mode 100644 index 0000000..328d1e5 --- /dev/null +++ b/hbase-0.94.18/hbase-base/files/zoo.cfg @@ -0,0 +1,3 @@ +clientPort=2181 +clientPortAddress=@IP@ +server.1=@IP@:2181 diff --git a/hbase-0.94.18/hbase-master/Dockerfile b/hbase-0.94.18/hbase-master/Dockerfile new file mode 100644 index 0000000..cffa57d --- /dev/null +++ b/hbase-0.94.18/hbase-master/Dockerfile @@ -0,0 +1,7 @@ +# Spark +FROM hbase-base:0.94.18 +#MAINTAINER amplab amp-docker@eecs.berkeley.edu + +ADD files /root/hbase_master_files + +CMD ["/root/hbase_master_files/default_cmd"] diff --git a/hbase-0.94.18/hbase-master/build b/hbase-0.94.18/hbase-master/build new file mode 100644 index 0000000..44af856 --- /dev/null +++ b/hbase-0.94.18/hbase-master/build @@ -0,0 +1,4 @@ +rm -f files/files.hash +for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash +mv /tmp/files.hash files/files.hash +sudo docker build -t ${IMAGE_PREFIX}hbase-master:0.94.18 . diff --git a/hbase-0.94.18/hbase-master/files/default_cmd b/hbase-0.94.18/hbase-master/files/default_cmd new file mode 100644 index 0000000..87e7009 --- /dev/null +++ b/hbase-0.94.18/hbase-master/files/default_cmd @@ -0,0 +1,25 @@ +#!/bin/bash + +env + +source /root/hbase_files/configure_hbase.sh + +IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }') +echo "MASTER_IP=$IP" + +echo "preparing Hbase" +prepare_hbase $IP + +echo "starting Hadoop Namenode" +sudo -u hdfs hadoop namenode -format > /dev/null 2>&1 +service hadoop-namenode start > /dev/null 2>&1 + +echo "starting sshd" +/usr/sbin/sshd + +sleep 5 + +echo "starting Hbase Master" +cp /root/hbase_master_files/run_hbase_master.sh / +chmod a+rx /run_hbase_master.sh +sudo -u hdfs HBASE_VERSION=$HBASE_VERSION /run_hbase_master.sh diff --git a/hbase-0.94.18/hbase-master/files/run_hbase_master.sh b/hbase-0.94.18/hbase-master/files/run_hbase_master.sh new file mode 100644 index 0000000..6ce7388 --- /dev/null +++ b/hbase-0.94.18/hbase-master/files/run_hbase_master.sh @@ -0,0 +1,11 @@ +#!/bin/bash +#/opt/spark-0.9.0/sbin/start-master.sh + +"${HBASE_HOME}/bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" start zookeeper +"${HBASE_HOME}/bin"/hbase-daemon.sh --config "${HBASE_CONF_DIR}" start master + +while [ 1 ]; +do + tail -f /opt/hbase-${HBASE_VERSION}/logs/*.out + sleep 1 +done diff --git a/hbase-0.94.18/hbase-worker/Dockerfile b/hbase-0.94.18/hbase-worker/Dockerfile new file mode 100644 index 0000000..78efd9f --- /dev/null +++ b/hbase-0.94.18/hbase-worker/Dockerfile @@ -0,0 +1,13 @@ +# Spark +FROM hbase-base:0.94.18 +#MAINTAINER amplab amp-docker@eecs.berkeley.edu + +# Instead of using a random port, bind the worker to a specific port +#ENV SPARK_WORKER_PORT 8888 +#EXPOSE 8888 + +ADD files /root/hbase_worker_files + +# Add the entrypoint script for the master +CMD ["-h"] +ENTRYPOINT ["/root/hbase_worker_files/default_cmd"] diff --git a/hbase-0.94.18/hbase-worker/build b/hbase-0.94.18/hbase-worker/build new file mode 100644 index 0000000..82a2981 --- /dev/null +++ b/hbase-0.94.18/hbase-worker/build @@ -0,0 +1,4 @@ +rm -f files/files.hash +for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash +mv /tmp/files.hash files/files.hash +sudo docker build -t ${IMAGE_PREFIX}hbase-worker:0.94.18 . diff --git a/hbase-0.94.18/hbase-worker/files/default_cmd b/hbase-0.94.18/hbase-worker/files/default_cmd new file mode 100644 index 0000000..fe2ce9f --- /dev/null +++ b/hbase-0.94.18/hbase-worker/files/default_cmd @@ -0,0 +1,22 @@ +#!/bin/bash + +source /root/hbase_files/configure_hbase.sh + +IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }') +echo "WORKER_IP=$IP" + +echo "preparing HBase" +prepare_hbase $1 + +echo "starting Hadoop Datanode" +service hadoop-datanode start + +echo "starting sshd" +/usr/sbin/sshd + +sleep 5 + +echo "starting HBase Worker" +cp /root/hbase_worker_files/run_hbase_worker.sh / +chmod a+rx /run_hbase_worker.sh +sudo -u hdfs /run_hbase_worker.sh diff --git a/hbase-0.94.18/hbase-worker/files/run_hbase_worker.sh b/hbase-0.94.18/hbase-worker/files/run_hbase_worker.sh new file mode 100644 index 0000000..a5f651e --- /dev/null +++ b/hbase-0.94.18/hbase-worker/files/run_hbase_worker.sh @@ -0,0 +1,5 @@ +#!/bin/bash +. /opt/hbase-0.94.18/conf/hbase-env.sh +# ${SPARK_HOME}/bin/spark-class org.apache.spark.deploy.worker.Worker $MASTER + +"${HBASE_HOME}/bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" start regionserver From 32d4f70e17025e66447a589d24ca4a82c3fd8e68 Mon Sep 17 00:00:00 2001 From: htaox Date: Sun, 27 Apr 2014 21:36:02 +0100 Subject: [PATCH 02/97] Added tzdata-java dependency --- apache-hadoop-hdfs-precise/Dockerfile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/apache-hadoop-hdfs-precise/Dockerfile b/apache-hadoop-hdfs-precise/Dockerfile index ea8bac4..e75360d 100644 --- a/apache-hadoop-hdfs-precise/Dockerfile +++ b/apache-hadoop-hdfs-precise/Dockerfile @@ -1,4 +1,4 @@ -# Base Ubuntu Precise 12.04 LTS image +# Base Ubuntu Precise 12.04 LTS image # FROM ubuntu:precise MAINTAINER amplab amp-docker@eecs.berkeley.edu @@ -9,6 +9,11 @@ VOLUME ["/data"] # Set correct source list RUN echo "deb http://archive.ubuntu.com/ubuntu precise main universe" > /etc/apt/sources.list +# Openjdk dependency +# Ref: http://superuser.com/questions/478284/dependencies-problems-installing-openjdk-on-ubuntu +ADD http://160.26.2.181/ports/pool/main/t/tzdata/tzdata-java_2014a-0ubuntu0.12.04_all.deb /root/ +RUN dpkg -i /root/tzdata-java_2014a-0ubuntu0.12.04_all.deb + # install a few other useful packages plus Open Jdk 7 RUN apt-get update && apt-get upgrade -y && apt-get install -y less openjdk-7-jre-headless net-tools vim-tiny sudo openssh-server iputils-ping From 1a520effff5f73fae9138724f26bca971cf935b1 Mon Sep 17 00:00:00 2001 From: htaox Date: Sun, 27 Apr 2014 21:55:33 +0100 Subject: [PATCH 03/97] Corrected tzdata-java version --- apache-hadoop-hdfs-precise/Dockerfile | 66 +++++++++++++-------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/apache-hadoop-hdfs-precise/Dockerfile b/apache-hadoop-hdfs-precise/Dockerfile index e75360d..3fa1d49 100644 --- a/apache-hadoop-hdfs-precise/Dockerfile +++ b/apache-hadoop-hdfs-precise/Dockerfile @@ -1,33 +1,33 @@ -# Base Ubuntu Precise 12.04 LTS image -# -FROM ubuntu:precise -MAINTAINER amplab amp-docker@eecs.berkeley.edu - -# Setup a volume for data -VOLUME ["/data"] - -# Set correct source list -RUN echo "deb http://archive.ubuntu.com/ubuntu precise main universe" > /etc/apt/sources.list - -# Openjdk dependency -# Ref: http://superuser.com/questions/478284/dependencies-problems-installing-openjdk-on-ubuntu -ADD http://160.26.2.181/ports/pool/main/t/tzdata/tzdata-java_2014a-0ubuntu0.12.04_all.deb /root/ -RUN dpkg -i /root/tzdata-java_2014a-0ubuntu0.12.04_all.deb - -# install a few other useful packages plus Open Jdk 7 -RUN apt-get update && apt-get upgrade -y && apt-get install -y less openjdk-7-jre-headless net-tools vim-tiny sudo openssh-server iputils-ping - -# Install Hadoop -ADD http://mirror.sdunix.com/apache/hadoop/common/hadoop-1.2.1/hadoop_1.2.1-1_x86_64.deb /root/ -RUN dpkg -i /root/hadoop_1.2.1-1_x86_64.deb && rm /root/hadoop_1.2.1-1_x86_64.deb - -# Docker messes up /etc/hosts and adds two entries for 127.0.0.1 -# we try to recover from that by giving /etc/resolv.conf and therefore -# the nameserver priority -RUN sed -i s/"files dns"/"dns files"/ /etc/nsswitch.conf - -# add Hadoop config file templates -ADD files /root/hadoop_files - -# Set JAVA_HOME -ENV JAVA_HOME /usr/lib/jvm/java-7-openjdk-amd64 +# Base Ubuntu Precise 12.04 LTS image +# +FROM ubuntu:precise +MAINTAINER amplab amp-docker@eecs.berkeley.edu + +# Setup a volume for data +VOLUME ["/data"] + +# Set correct source list +RUN echo "deb http://archive.ubuntu.com/ubuntu precise main universe" > /etc/apt/sources.list + +# Openjdk dependency +# Ref: http://superuser.com/questions/478284/dependencies-problems-installing-openjdk-on-ubuntu +ADD http://160.26.2.181/ports/pool/main/t/tzdata/tzdata-java_2013g-0ubuntu0.12.04_all.deb /root/tzdata-java_2013g-0ubuntu0.12.04_all.deb +RUN dpkg -i /root/tzdata-java_2013g-0ubuntu0.12.04_all.deb && rm /root/tzdata-java_2013g-0ubuntu0.12.04_all.deb + +# install a few other useful packages plus Open Jdk 7 +RUN apt-get update && apt-get upgrade -y && apt-get install -y less openjdk-7-jre-headless net-tools vim-tiny sudo openssh-server iputils-ping + +# Install Hadoop +ADD http://mirror.sdunix.com/apache/hadoop/common/hadoop-1.2.1/hadoop_1.2.1-1_x86_64.deb /root/ +RUN dpkg -i /root/hadoop_1.2.1-1_x86_64.deb && rm /root/hadoop_1.2.1-1_x86_64.deb + +# Docker messes up /etc/hosts and adds two entries for 127.0.0.1 +# we try to recover from that by giving /etc/resolv.conf and therefore +# the nameserver priority +RUN sed -i s/"files dns"/"dns files"/ /etc/nsswitch.conf + +# add Hadoop config file templates +ADD files /root/hadoop_files + +# Set JAVA_HOME +ENV JAVA_HOME /usr/lib/jvm/java-7-openjdk-amd64 From e9bbad625263a9773547772642b9e4800c51dc89 Mon Sep 17 00:00:00 2001 From: htaox Date: Mon, 28 Apr 2014 16:51:37 -0400 Subject: [PATCH 04/97] Added execute permission for scripts --- build/build_all_hbase.sh | 22 ++++ deploy/deploy_hbase.sh | 115 ++++++++++++++++++ hbase-0.94.18/hbase-base/Dockerfile | 9 +- .../hbase-base/files/configure_hbase.sh | 84 ++++++------- hbase-0.94.18/hbase-master/Dockerfile | 8 +- hbase-0.94.18/hbase-worker/Dockerfile | 6 +- 6 files changed, 192 insertions(+), 52 deletions(-) create mode 100644 build/build_all_hbase.sh create mode 100644 deploy/deploy_hbase.sh diff --git a/build/build_all_hbase.sh b/build/build_all_hbase.sh new file mode 100644 index 0000000..d03fe0c --- /dev/null +++ b/build/build_all_hbase.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +if [[ "$USER" != "root" ]]; then + echo "please run as: sudo $0" + exit 1 +fi + +CURDIR=$(pwd) +BASEDIR=$(cd $(dirname $0); pwd)"/.." +dir_list=( "hbase-0.94.18" ) + +export IMAGE_PREFIX="htaox/" +#"amplab/" + +# NOTE: the order matters but this is the right one +for i in ${dir_list[@]}; do + echo building $i; + cd ${BASEDIR}/$i + cat build + ./build +done +cd $CURDIR diff --git a/deploy/deploy_hbase.sh b/deploy/deploy_hbase.sh new file mode 100644 index 0000000..7a90877 --- /dev/null +++ b/deploy/deploy_hbase.sh @@ -0,0 +1,115 @@ +#!/bin/bash + +DEBUG=0 +BASEDIR=$(cd $(dirname $0); pwd) + +hbase_images=( "htaox/hbase:0.94.18") +NAMESERVER_IMAGE="amplab/dnsmasq-precise" + +start_shell=0 +VOLUME_MAP="" + +image_type="?" +image_version="?" +NUM_WORKERS=2 + +source $BASEDIR/start_nameserver.sh +source $BASEDIR/start_hbase_cluster.sh + +function check_root() { + if [[ "$USER" != "root" ]]; then + echo "please run as: sudo $0" + exit 1 + fi +} + +function print_help() { + echo "usage: $0 -i [-w <#workers>] [-v ] [-c]" + echo "" + echo " image: hbase image from:" + echo -n " " + for i in ${hbase_images[@]}; do + echo -n " $i" + done + echo "" +} + +function parse_options() { + while getopts "i:w:cv:h" opt; do + case $opt in + i) + echo "$OPTARG" | grep "hbase:" > /dev/null; + if [ "$?" -eq 0 ]; then + image_type="hbase" + fi + image_name=$(echo "$OPTARG" | awk -F ":" '{print $1}') + image_version=$(echo "$OPTARG" | awk -F ":" '{print $2}') + ;; + w) + NUM_WORKERS=$OPTARG + ;; + h) + print_help + exit 0 + ;; + c) + start_shell=1 + ;; + v) + VOLUME_MAP=$OPTARG + ;; + esac + done + + if [ "$image_type" == "?" ]; then + echo "missing or invalid option: -i " + exit 1 + fi + + if [ ! "$VOLUME_MAP" == "" ]; then + echo "data volume chosen: $VOLUME_MAP" + VOLUME_MAP="-v $VOLUME_MAP:/data" + fi +} + +check_root + +if [[ "$#" -eq 0 ]]; then + print_help + exit 1 +fi + +parse_options $@ + +if [ "$image_type" == "hbase" ]; then + hbase_VERSION="$image_version" + echo "*** Starting hbase $hbase_VERSION ***" +else + echo "not starting anything" + exit 0 +fi + +start_nameserver $NAMESERVER_IMAGE +wait_for_nameserver +start_master ${image_name}-master $image_version +wait_for_master +if [ "$image_type" == "hbase" ]; then + SHELLCOMMAND="sudo $BASEDIR/start_shell.sh -i ${image_name}-shell:$hbase_VERSION -n $NAMESERVER $VOLUME_MAP" +elif [ "$image_type" == "shark" ]; then + SHELLCOMMAND="sudo $BASEDIR/start_shell.sh -i ${image_name}-shell:$SHARK_VERSION -n $NAMESERVER $VOLUME_MAP" +fi + +start_workers ${image_name}-worker $image_version +get_num_registered_workers +echo -n "waiting for workers to register " +until [[ "$NUM_REGISTERED_WORKERS" == "$NUM_WORKERS" ]]; do + echo -n "." + sleep 1 + get_num_registered_workers +done +echo "" +print_cluster_info "$SHELLCOMMAND" +if [[ "$start_shell" -eq 1 ]]; then + SHELL_ID=$($SHELLCOMMAND | tail -n 1 | awk '{print $4}') + sudo docker attach $SHELL_ID +fi diff --git a/hbase-0.94.18/hbase-base/Dockerfile b/hbase-0.94.18/hbase-base/Dockerfile index 4e91675..5b244f2 100644 --- a/hbase-0.94.18/hbase-base/Dockerfile +++ b/hbase-0.94.18/hbase-base/Dockerfile @@ -2,16 +2,19 @@ # FROM apache-hadoop-hdfs-precise:1.2.1 -#MAINTAINER amplab amp-docker@eecs.berkeley.edu +MAINTAINER htaox htaox@hotmail.com # Download and Install HBase ENV HBASE_VERSION 0.94.18 -RUN mkdir -p /opt/downloads && cd /opt/downloads && curl -SsfLO "http://www.apache.org/dist/hbase/hbase-$HBASE_VERSION/hbase-$HBASE_VERSION.tar.gz" +#RUN mkdir -p /opt/downloads && cd /opt/downloads && curl -SsfLO "http://www.apache.org/dist/hbase/hbase-$HBASE_VERSION/hbase-$HBASE_VERSION.tar.gz" +RUN mkdir -p /opt/downloads +ADD http://www.apache.org/dist/hbase/hbase-$HBASE_VERSION/hbase-$HBASE_VERSION.tar.gz /opt/downloads/hbase-$HBASE_VERSION.tar.gz + RUN cd /opt && tar xvfz /opt/downloads/hbase-$HBASE_VERSION.tar.gz # RUN mv /opt/hbase-$HBASE_VERSION /opt/hbase # Add symlink -RUN rm /opt/hbase && ln -s /opt/hbase-$HBASE_VERSION /opt/hbase +RUN ln -s /opt/hbase-$HBASE_VERSION /opt/hbase # Data will go here (see hbase-site.xml) # RUN mkdir -p /data/hbase /opt/hbase/logs diff --git a/hbase-0.94.18/hbase-base/files/configure_hbase.sh b/hbase-0.94.18/hbase-base/files/configure_hbase.sh index 4753a22..ddc58f9 100644 --- a/hbase-0.94.18/hbase-base/files/configure_hbase.sh +++ b/hbase-0.94.18/hbase-base/files/configure_hbase.sh @@ -1,42 +1,42 @@ -#!/bin/bash - -source /root/hadoop_files/configure_hadoop.sh - -function create_hbase_directories() { - create_hadoop_directories - rm -rf /opt/hbase-$HBASE_VERSION/work - mkdir -p /opt/hbase-$HBASE_VERSION/work - chown hdfs.hdfs /opt/hbase-$HBASE_VERSION/work - mkdir /tmp/hbase - chown hdfs.hdfs /tmp/hbase - # this one is for hbase shell logging - rm -rf /var/lib/hadoop/hdfs - mkdir -p /var/lib/hadoop/hdfs - chown hdfs.hdfs /var/lib/hadoop/hdfs - rm -rf /opt/hbase-$HBASE_VERSION/logs - mkdir -p /opt/hbase-$HBASE_VERSION/logs - chown hdfs.hdfs /opt/hbase-$HBASE_VERSION/logs -} - -function deploy_hbase_files() { - deploy_hadoop_files - cp /root/hbase_files/hbase-env.sh /opt/hbase-$HBASE_VERSION/conf/ - cp /root/hbase_files/log4j.properties /opt/hbase-$HBASE_VERSION/conf/ -} - -function configure_hbase() { - configure_hadoop $1 - sed -i s/__MASTER__/master/ /opt/hbase-$HBASE_VERSION/conf/hbase-env.sh - sed -i s/__HBASE_HOME__/"\/opt\/hbase-${HBASE_VERSION}"/ /opt/hbase-$HBASE_VERSION/conf/hbase-env.sh - sed -i s/__JAVA_HOME__/"\/usr\/lib\/jvm\/java-7-openjdk-amd64"/ /opt/hbase-$HBASE_VERSION/conf/hbase-env.sh - - sed -i "s/@IP@/$1/g" $HBASE_HOME/conf/hbase-site.xml - sed -i "s/@IP@/$1/g" $HBASE_HOME/conf/zoo.cfg - echo "$IP $(hostname)" >> /etc/hosts -} - -function prepare_hbase() { - create_hbase_directories - deploy_hbase_files - configure_hbase $1 -} +#!/bin/bash + +source /root/hadoop_files/configure_hadoop.sh + +function create_hbase_directories() { + create_hadoop_directories + rm -rf /opt/hbase-$HBASE_VERSION/work + mkdir -p /opt/hbase-$HBASE_VERSION/work + chown hdfs.hdfs /opt/hbase-$HBASE_VERSION/work + mkdir /tmp/hbase + chown hdfs.hdfs /tmp/hbase + # this one is for hbase shell logging + rm -rf /var/lib/hadoop/hdfs + mkdir -p /var/lib/hadoop/hdfs + chown hdfs.hdfs /var/lib/hadoop/hdfs + rm -rf /opt/hbase-$HBASE_VERSION/logs + mkdir -p /opt/hbase-$HBASE_VERSION/logs + chown hdfs.hdfs /opt/hbase-$HBASE_VERSION/logs +} + +function deploy_hbase_files() { + deploy_hadoop_files + cp /root/hbase_files/hbase-env.sh /opt/hbase-$HBASE_VERSION/conf/ + cp /root/hbase_files/log4j.properties /opt/hbase-$HBASE_VERSION/conf/ +} + +function configure_hbase() { + configure_hadoop $1 + sed -i s/__MASTER__/master/ /opt/hbase-$HBASE_VERSION/conf/hbase-env.sh + sed -i s/__HBASE_HOME__/"\/opt\/hbase-${HBASE_VERSION}"/ /opt/hbase-$HBASE_VERSION/conf/hbase-env.sh + sed -i s/__JAVA_HOME__/"\/usr\/lib\/jvm\/java-7-openjdk-amd64"/ /opt/hbase-$HBASE_VERSION/conf/hbase-env.sh + + sed -i "s/@IP@/$1/g" $HBASE_HOME/conf/hbase-site.xml + sed -i "s/@IP@/$1/g" $HBASE_HOME/conf/zoo.cfg + echo "$IP $(hostname)" >> /etc/hosts +} + +function prepare_hbase() { + create_hbase_directories + deploy_hbase_files + configure_hbase $1 +} diff --git a/hbase-0.94.18/hbase-master/Dockerfile b/hbase-0.94.18/hbase-master/Dockerfile index cffa57d..dfa14c0 100644 --- a/hbase-0.94.18/hbase-master/Dockerfile +++ b/hbase-0.94.18/hbase-master/Dockerfile @@ -1,7 +1,7 @@ -# Spark -FROM hbase-base:0.94.18 -#MAINTAINER amplab amp-docker@eecs.berkeley.edu +# HBase +FROM htaox/hbase-base:0.94.18 +MAINTAINER htaox htaox@hotmail.com ADD files /root/hbase_master_files - +RUN chmod 700 /root/hbase_master_files/default_cmd CMD ["/root/hbase_master_files/default_cmd"] diff --git a/hbase-0.94.18/hbase-worker/Dockerfile b/hbase-0.94.18/hbase-worker/Dockerfile index 78efd9f..e746850 100644 --- a/hbase-0.94.18/hbase-worker/Dockerfile +++ b/hbase-0.94.18/hbase-worker/Dockerfile @@ -1,13 +1,13 @@ # Spark -FROM hbase-base:0.94.18 -#MAINTAINER amplab amp-docker@eecs.berkeley.edu +FROM htaox/hbase-base:0.94.18 +MAINTAINER htaox htaox@hotmail.com # Instead of using a random port, bind the worker to a specific port #ENV SPARK_WORKER_PORT 8888 #EXPOSE 8888 ADD files /root/hbase_worker_files - +RUN chmod 700 /root/hbase_worker_files/default_cmd # Add the entrypoint script for the master CMD ["-h"] ENTRYPOINT ["/root/hbase_worker_files/default_cmd"] From be5f1bdb8963343eb5d256af0beb28a846d77600 Mon Sep 17 00:00:00 2001 From: htaox Date: Mon, 28 Apr 2014 17:07:48 -0400 Subject: [PATCH 05/97] Initial commit for starting cluster --- deploy/start_hbase_cluster.sh | 107 ++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 deploy/start_hbase_cluster.sh diff --git a/deploy/start_hbase_cluster.sh b/deploy/start_hbase_cluster.sh new file mode 100644 index 0000000..8bf519e --- /dev/null +++ b/deploy/start_hbase_cluster.sh @@ -0,0 +1,107 @@ +#!/bin/bash + +MASTER=-1 +MASTER_IP= +NUM_REGISTERED_WORKERS=0 + +# starts the Spark/Shark master container +function start_master() { + echo "starting master container" + if [ "$DEBUG" -gt 0 ]; then + echo sudo docker run -d --dns $NAMESERVER_IP -h master${DOMAINNAME} $VOLUME_MAP $1:$2 + fi + MASTER=$(sudo docker run -d --dns $NAMESERVER_IP -h master${DOMAINNAME} $VOLUME_MAP $1:$2) + + if [ "$MASTER" = "" ]; then + echo "error: could not start master container from image $1:$2" + exit 1 + fi + + echo "started master container: $MASTER" + sleep 3 + MASTER_IP=$(sudo docker logs $MASTER 2>&1 | egrep '^MASTER_IP=' | awk -F= '{print $2}' | tr -d -c "[:digit:] .") + echo "MASTER_IP: $MASTER_IP" + echo "address=\"/master/$MASTER_IP\"" >> $DNSFILE +} + +# starts a number of Spark/Shark workers +function start_workers() { + for i in `seq 1 $NUM_WORKERS`; do + echo "starting worker container" + hostname="worker${i}${DOMAINNAME}" + if [ "$DEBUG" -gt 0 ]; then + echo sudo docker run -d --dns $NAMESERVER_IP -h $hostname $VOLUME_MAP $1:$2 ${MASTER_IP} + fi + WORKER=$(sudo docker run -d --dns $NAMESERVER_IP -h $hostname $VOLUME_MAP $1:$2 ${MASTER_IP}) + + if [ "$WORKER" = "" ]; then + echo "error: could not start worker container from image $1:$2" + exit 1 + fi + + echo "started worker container: $WORKER" + sleep 3 + WORKER_IP=$(sudo docker logs $WORKER 2>&1 | egrep '^WORKER_IP=' | awk -F= '{print $2}' | tr -d -c "[:digit:] .") + echo "address=\"/$hostname/$WORKER_IP\"" >> $DNSFILE + done +} + +# prints out information on the cluster +function print_cluster_info() { + BASEDIR=$(cd $(dirname $0); pwd)"/.." + echo "" + echo "***********************************************************************" + echo "start shell via: $1" + echo "" + echo "visit Spark WebUI at: http://$MASTER_IP:8080/" + echo "visit Hadoop Namenode at: http://$MASTER_IP:50070" + echo "ssh into master via: ssh -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP}" + echo "" + echo "/data mapped: $VOLUME_MAP" + echo "" + echo "kill master via: sudo docker kill $MASTER" + echo "***********************************************************************" + echo "" + echo "to enable cluster name resolution add the following line to _the top_ of your host's /etc/resolv.conf:" + echo "nameserver $NAMESERVER_IP" +} + +function get_num_registered_workers() { + if [[ "$SPARK_VERSION" == "0.7.3" ]]; then + DATA=$( curl --noproxy -s http://$MASTER_IP:8080/?format=json | tr -d '\n' | sed s/\"/\\\\\"/g) + else + # Docker on Mac uses tinycore Linux with busybox which has a limited version wget (?) + echo $(uname -a) | grep "Linux boot2docker" > /dev/null + if [[ "$?" == "0" ]]; then + DATA=$( wget -Y off -q -O - http://$MASTER_IP:8080/json | tr -d '\n' | sed s/\"/\\\\\"/g) + else + DATA=$( wget --no-proxy -q -O - http://$MASTER_IP:8080/json | tr -d '\n' | sed s/\"/\\\\\"/g) + fi + fi + NUM_REGISTERED_WORKERS=$(python -c "import json; data = \"$DATA\"; value = json.loads(data); print len(value['workers'])") +} + +function wait_for_master { + if [[ "$SPARK_VERSION" == "0.7.3" ]]; then + query_string="INFO HttpServer: akka://sparkMaster/user/HttpServer started" + else + query_string="MasterWebUI: Started Master web UI" + fi + echo -n "waiting for master " + sudo docker logs $MASTER | grep "$query_string" > /dev/null + until [ "$?" -eq 0 ]; do + echo -n "." + sleep 1 + sudo docker logs $MASTER | grep "$query_string" > /dev/null; + done + echo "" + echo -n "waiting for nameserver to find master " + check_hostname result master "$MASTER_IP" + until [ "$result" -eq 0 ]; do + echo -n "." + sleep 1 + check_hostname result master "$MASTER_IP" + done + echo "" + sleep 3 +} From cc696160a0a99edd3b8b9f3c60bf1bfa2d0451bf Mon Sep 17 00:00:00 2001 From: htaox Date: Tue, 29 Apr 2014 13:18:44 -0400 Subject: [PATCH 06/97] Add zoo.cfg to HBase conf directory --- hbase-0.94.18/hbase-base/Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hbase-0.94.18/hbase-base/Dockerfile b/hbase-0.94.18/hbase-base/Dockerfile index 5b244f2..dc3257a 100644 --- a/hbase-0.94.18/hbase-base/Dockerfile +++ b/hbase-0.94.18/hbase-base/Dockerfile @@ -36,6 +36,9 @@ ADD files/hbase-site.xml $HBASE_HOME/conf/hbase-site.xml RUN rm $HBASE_HOME/conf/hbase-env.sh ADD files/hbase-env.sh $HBASE_HOME/conf/hbase-env.sh +RUN rm $HBASE_HOME/conf/zoo.cfg +ADD files/zoo.cfg $HBASE_HOME/conf/zoo.cfg + # Ports reference # http://blog.cloudera.com/blog/2013/07/guide-to-using-apache-hbase-ports/ From 72769f90d4ba386b9d17b46c57b8b4583cb12287 Mon Sep 17 00:00:00 2001 From: htaox Date: Tue, 29 Apr 2014 13:27:44 -0400 Subject: [PATCH 07/97] No need to remove before adding zoo.cfg to Hbase conf directory --- hbase-0.94.18/hbase-base/Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/hbase-0.94.18/hbase-base/Dockerfile b/hbase-0.94.18/hbase-base/Dockerfile index dc3257a..33d8cf2 100644 --- a/hbase-0.94.18/hbase-base/Dockerfile +++ b/hbase-0.94.18/hbase-base/Dockerfile @@ -36,7 +36,6 @@ ADD files/hbase-site.xml $HBASE_HOME/conf/hbase-site.xml RUN rm $HBASE_HOME/conf/hbase-env.sh ADD files/hbase-env.sh $HBASE_HOME/conf/hbase-env.sh -RUN rm $HBASE_HOME/conf/zoo.cfg ADD files/zoo.cfg $HBASE_HOME/conf/zoo.cfg # Ports reference From 9d02ef686bf223de42cc58c59a9cea3b686f55f0 Mon Sep 17 00:00:00 2001 From: htaox Date: Tue, 29 Apr 2014 13:35:50 -0400 Subject: [PATCH 08/97] Comment out updating /etc/hosts --- hbase-0.94.18/hbase-base/files/configure_hbase.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-0.94.18/hbase-base/files/configure_hbase.sh b/hbase-0.94.18/hbase-base/files/configure_hbase.sh index ddc58f9..ef5ef83 100644 --- a/hbase-0.94.18/hbase-base/files/configure_hbase.sh +++ b/hbase-0.94.18/hbase-base/files/configure_hbase.sh @@ -32,7 +32,7 @@ function configure_hbase() { sed -i "s/@IP@/$1/g" $HBASE_HOME/conf/hbase-site.xml sed -i "s/@IP@/$1/g" $HBASE_HOME/conf/zoo.cfg - echo "$IP $(hostname)" >> /etc/hosts + #echo "$1 $(hostname)" >> /etc/hosts } function prepare_hbase() { From 19642b70ed7a712505a558d73d61c3aac749fe5b Mon Sep 17 00:00:00 2001 From: htaox Date: Tue, 29 Apr 2014 15:30:55 -0400 Subject: [PATCH 09/97] Passed addtional named variables to run_hbase_master.sh --- hbase-0.94.18/hbase-master/files/default_cmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-0.94.18/hbase-master/files/default_cmd b/hbase-0.94.18/hbase-master/files/default_cmd index 87e7009..f89157b 100644 --- a/hbase-0.94.18/hbase-master/files/default_cmd +++ b/hbase-0.94.18/hbase-master/files/default_cmd @@ -22,4 +22,4 @@ sleep 5 echo "starting Hbase Master" cp /root/hbase_master_files/run_hbase_master.sh / chmod a+rx /run_hbase_master.sh -sudo -u hdfs HBASE_VERSION=$HBASE_VERSION /run_hbase_master.sh +sudo -u hdfs HBASE_HOME=$HBASE_HOME HBASE_CONF_DIR=$HBASE_CONF_DIR HBASE_VERSION=$HBASE_VERSION /run_hbase_master.sh From de8e6c0ada4a6ff7622485f39ed2fdb95c9e3e47 Mon Sep 17 00:00:00 2001 From: htaox Date: Tue, 29 Apr 2014 16:08:38 -0400 Subject: [PATCH 10/97] Just sleep for wait for master --- deploy/start_hbase_cluster.sh | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/deploy/start_hbase_cluster.sh b/deploy/start_hbase_cluster.sh index 8bf519e..68deb82 100644 --- a/deploy/start_hbase_cluster.sh +++ b/deploy/start_hbase_cluster.sh @@ -82,18 +82,19 @@ function get_num_registered_workers() { } function wait_for_master { - if [[ "$SPARK_VERSION" == "0.7.3" ]]; then - query_string="INFO HttpServer: akka://sparkMaster/user/HttpServer started" - else - query_string="MasterWebUI: Started Master web UI" - fi + #if [[ "$SPARK_VERSION" == "0.7.3" ]]; then + # query_string="INFO HttpServer: akka://sparkMaster/user/HttpServer started" + #else + # query_string="MasterWebUI: Started Master web UI" + #fi echo -n "waiting for master " - sudo docker logs $MASTER | grep "$query_string" > /dev/null - until [ "$?" -eq 0 ]; do - echo -n "." - sleep 1 - sudo docker logs $MASTER | grep "$query_string" > /dev/null; - done + #sudo docker logs $MASTER | grep "$query_string" > /dev/null + #until [ "$?" -eq 0 ]; do + #echo -n "." + #sleep 1 + #sudo docker logs $MASTER | grep "$query_string" > /dev/null; + #done + sleep 15 echo "" echo -n "waiting for nameserver to find master " check_hostname result master "$MASTER_IP" From 0754a6def8f91014b5aea16424f91ca0171f8fe4 Mon Sep 17 00:00:00 2001 From: htaox Date: Tue, 29 Apr 2014 16:57:49 -0400 Subject: [PATCH 11/97] Added ENV variables to startup HBase worker --- hbase-0.94.18/hbase-worker/files/default_cmd | 2 +- hbase-0.94.18/hbase-worker/files/run_hbase_worker.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hbase-0.94.18/hbase-worker/files/default_cmd b/hbase-0.94.18/hbase-worker/files/default_cmd index fe2ce9f..24bdf35 100644 --- a/hbase-0.94.18/hbase-worker/files/default_cmd +++ b/hbase-0.94.18/hbase-worker/files/default_cmd @@ -19,4 +19,4 @@ sleep 5 echo "starting HBase Worker" cp /root/hbase_worker_files/run_hbase_worker.sh / chmod a+rx /run_hbase_worker.sh -sudo -u hdfs /run_hbase_worker.sh +sudo -u hdfs HBASE_HOME=$HBASE_HOME HBASE_CONF_DIR=$HBASE_CONF_DIR HBASE_VERSION=$HBASE_VERSION /run_hbase_worker.sh diff --git a/hbase-0.94.18/hbase-worker/files/run_hbase_worker.sh b/hbase-0.94.18/hbase-worker/files/run_hbase_worker.sh index a5f651e..7ff96ac 100644 --- a/hbase-0.94.18/hbase-worker/files/run_hbase_worker.sh +++ b/hbase-0.94.18/hbase-worker/files/run_hbase_worker.sh @@ -1,5 +1,5 @@ #!/bin/bash -. /opt/hbase-0.94.18/conf/hbase-env.sh +#. /opt/hbase-0.94.18/conf/hbase-env.sh # ${SPARK_HOME}/bin/spark-class org.apache.spark.deploy.worker.Worker $MASTER "${HBASE_HOME}/bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" start regionserver From 6560db8d13953453391ae2fb4eda89ed5531816e Mon Sep 17 00:00:00 2001 From: htaox Date: Wed, 30 Apr 2014 09:14:08 -0400 Subject: [PATCH 12/97] Expose Zookeeper port 2181 --- deploy/start_hbase_cluster.sh | 1 + hbase-0.94.18/hbase-base/Dockerfile | 3 +++ 2 files changed, 4 insertions(+) diff --git a/deploy/start_hbase_cluster.sh b/deploy/start_hbase_cluster.sh index 68deb82..d6604c8 100644 --- a/deploy/start_hbase_cluster.sh +++ b/deploy/start_hbase_cluster.sh @@ -87,6 +87,7 @@ function wait_for_master { #else # query_string="MasterWebUI: Started Master web UI" #fi + # can use "INFO HBaseServer: Starting" echo -n "waiting for master " #sudo docker logs $MASTER | grep "$query_string" > /dev/null #until [ "$?" -eq 0 ]; do diff --git a/hbase-0.94.18/hbase-base/Dockerfile b/hbase-0.94.18/hbase-base/Dockerfile index 33d8cf2..3232e35 100644 --- a/hbase-0.94.18/hbase-base/Dockerfile +++ b/hbase-0.94.18/hbase-base/Dockerfile @@ -41,6 +41,9 @@ ADD files/zoo.cfg $HBASE_HOME/conf/zoo.cfg # Ports reference # http://blog.cloudera.com/blog/2013/07/guide-to-using-apache-hbase-ports/ +# Expose zookeeper +EXPOSE 2181 + # Expose HBase Master Ports EXPOSE 60000 60010 From f9dc87e26c7889e106d1226aca6118cb68386d5e Mon Sep 17 00:00:00 2001 From: htaox Date: Wed, 30 Apr 2014 09:16:27 -0400 Subject: [PATCH 13/97] Added comment on Master web UI --- hbase-0.94.18/hbase-base/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/hbase-0.94.18/hbase-base/Dockerfile b/hbase-0.94.18/hbase-base/Dockerfile index 3232e35..58f5b40 100644 --- a/hbase-0.94.18/hbase-base/Dockerfile +++ b/hbase-0.94.18/hbase-base/Dockerfile @@ -45,6 +45,7 @@ ADD files/zoo.cfg $HBASE_HOME/conf/zoo.cfg EXPOSE 2181 # Expose HBase Master Ports +# HBase Master web UI at :60010/master-status; ZK at :60010/zk.jsp EXPOSE 60000 60010 # Expose HBase Region Server Ports From 552426d64ab8eba51f2e4af2593ab17ad566f6e1 Mon Sep 17 00:00:00 2001 From: htaox Date: Wed, 30 Apr 2014 09:42:21 -0400 Subject: [PATCH 14/97] Updated wait_for_master loop --- deploy/start_hbase_cluster.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/deploy/start_hbase_cluster.sh b/deploy/start_hbase_cluster.sh index d6604c8..7e8d3e5 100644 --- a/deploy/start_hbase_cluster.sh +++ b/deploy/start_hbase_cluster.sh @@ -87,15 +87,15 @@ function wait_for_master { #else # query_string="MasterWebUI: Started Master web UI" #fi - # can use "INFO HBaseServer: Starting" + query_string="INFO org.apache.hadoop.http.HttpServer" echo -n "waiting for master " - #sudo docker logs $MASTER | grep "$query_string" > /dev/null - #until [ "$?" -eq 0 ]; do - #echo -n "." - #sleep 1 - #sudo docker logs $MASTER | grep "$query_string" > /dev/null; - #done - sleep 15 + sudo docker logs $MASTER | grep "$query_string" > /dev/null + until [ "$?" -eq 0 ]; do + echo -n "." + sleep 1 + sudo docker logs $MASTER | grep "$query_string" > /dev/null; + done + #sleep 15 echo "" echo -n "waiting for nameserver to find master " check_hostname result master "$MASTER_IP" From bea12de511ea89caea3a880d8eb8546173d19722 Mon Sep 17 00:00:00 2001 From: htaox Date: Wed, 30 Apr 2014 10:27:17 -0400 Subject: [PATCH 15/97] Pass $IP variable to run_hbase_master.sh --- hbase-0.94.18/hbase-master/files/default_cmd | 2 +- .../hbase-master/files/run_hbase_master.sh | 19 ++++++++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/hbase-0.94.18/hbase-master/files/default_cmd b/hbase-0.94.18/hbase-master/files/default_cmd index f89157b..25ae2c3 100644 --- a/hbase-0.94.18/hbase-master/files/default_cmd +++ b/hbase-0.94.18/hbase-master/files/default_cmd @@ -22,4 +22,4 @@ sleep 5 echo "starting Hbase Master" cp /root/hbase_master_files/run_hbase_master.sh / chmod a+rx /run_hbase_master.sh -sudo -u hdfs HBASE_HOME=$HBASE_HOME HBASE_CONF_DIR=$HBASE_CONF_DIR HBASE_VERSION=$HBASE_VERSION /run_hbase_master.sh +sudo -u hdfs HBASE_HOME=$HBASE_HOME HBASE_CONF_DIR=$HBASE_CONF_DIR HBASE_VERSION=$HBASE_VERSION IP=$IP /run_hbase_master.sh diff --git a/hbase-0.94.18/hbase-master/files/run_hbase_master.sh b/hbase-0.94.18/hbase-master/files/run_hbase_master.sh index 6ce7388..5813761 100644 --- a/hbase-0.94.18/hbase-master/files/run_hbase_master.sh +++ b/hbase-0.94.18/hbase-master/files/run_hbase_master.sh @@ -1,11 +1,24 @@ #!/bin/bash -#/opt/spark-0.9.0/sbin/start-master.sh +rm "${HBASE_HOME}/logs"/* + +echo -n "starting zookeeper" "${HBASE_HOME}/bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" start zookeeper -"${HBASE_HOME}/bin"/hbase-daemon.sh --config "${HBASE_CONF_DIR}" start master +#Simple test to see if zookeeper started +while [ 1 ]; +do + if [ -f "${HBASE_HOME}/logs"/*zookeeper*.log ] + then + break + else + sleep 1 + fi +done +echo -n "starting HBase master" +"${HBASE_HOME}/bin"/hbase-daemon.sh --config "${HBASE_CONF_DIR}" start master while [ 1 ]; do - tail -f /opt/hbase-${HBASE_VERSION}/logs/*.out + tail -f "${HBASE_HOME}/logs"/*.out sleep 1 done From b00b1069586d74ecb5fdde7e6b09fc99ae039bac Mon Sep 17 00:00:00 2001 From: htaox Date: Wed, 30 Apr 2014 12:13:52 -0400 Subject: [PATCH 16/97] Back to just sleep to wait for zookeeper to start --- hbase-0.94.18/hbase-master/files/run_hbase_master.sh | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/hbase-0.94.18/hbase-master/files/run_hbase_master.sh b/hbase-0.94.18/hbase-master/files/run_hbase_master.sh index 5813761..3e94783 100644 --- a/hbase-0.94.18/hbase-master/files/run_hbase_master.sh +++ b/hbase-0.94.18/hbase-master/files/run_hbase_master.sh @@ -4,16 +4,8 @@ rm "${HBASE_HOME}/logs"/* echo -n "starting zookeeper" "${HBASE_HOME}/bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" start zookeeper -#Simple test to see if zookeeper started -while [ 1 ]; -do - if [ -f "${HBASE_HOME}/logs"/*zookeeper*.log ] - then - break - else - sleep 1 - fi -done + +sleep 3 echo -n "starting HBase master" "${HBASE_HOME}/bin"/hbase-daemon.sh --config "${HBASE_CONF_DIR}" start master From d5a7e4916b0c85a95a9ffa082dd553d10f3f524c Mon Sep 17 00:00:00 2001 From: htaox Date: Thu, 1 May 2014 10:28:20 -0400 Subject: [PATCH 17/97] Download and install Zookeeper --- apache-hadoop-hdfs-precise/build | 0 build/build_all_hbase.sh | 0 deploy/deploy_hbase.sh | 0 deploy/start_hbase_cluster.sh | 16 ++++++++-------- dnsmasq-precise/build | 0 hbase-0.94.18/build | 0 hbase-0.94.18/hbase-base/Dockerfile | 19 ++++++++++++------- .../hbase-base/files/configure_hbase.sh | 2 +- hbase-0.94.18/hbase-base/files/zoo.cfg | 1 + hbase-0.94.18/hbase-master/files/default_cmd | 2 +- .../hbase-master/files/run_hbase_master.sh | 4 +--- hbase-0.94.18/hbase-worker/files/default_cmd | 0 .../hbase-worker/files/run_hbase_worker.sh | 0 mesos/mesos-base/build | 0 14 files changed, 24 insertions(+), 20 deletions(-) mode change 100755 => 100644 apache-hadoop-hdfs-precise/build mode change 100644 => 100755 build/build_all_hbase.sh mode change 100644 => 100755 deploy/deploy_hbase.sh mode change 100644 => 100755 deploy/start_hbase_cluster.sh mode change 100755 => 100644 dnsmasq-precise/build mode change 100644 => 100755 hbase-0.94.18/build mode change 100644 => 100755 hbase-0.94.18/hbase-base/files/configure_hbase.sh mode change 100644 => 100755 hbase-0.94.18/hbase-master/files/default_cmd mode change 100644 => 100755 hbase-0.94.18/hbase-master/files/run_hbase_master.sh mode change 100644 => 100755 hbase-0.94.18/hbase-worker/files/default_cmd mode change 100644 => 100755 hbase-0.94.18/hbase-worker/files/run_hbase_worker.sh mode change 100755 => 100644 mesos/mesos-base/build diff --git a/apache-hadoop-hdfs-precise/build b/apache-hadoop-hdfs-precise/build old mode 100755 new mode 100644 diff --git a/build/build_all_hbase.sh b/build/build_all_hbase.sh old mode 100644 new mode 100755 diff --git a/deploy/deploy_hbase.sh b/deploy/deploy_hbase.sh old mode 100644 new mode 100755 diff --git a/deploy/start_hbase_cluster.sh b/deploy/start_hbase_cluster.sh old mode 100644 new mode 100755 index 7e8d3e5..34ff852 --- a/deploy/start_hbase_cluster.sh +++ b/deploy/start_hbase_cluster.sh @@ -87,15 +87,15 @@ function wait_for_master { #else # query_string="MasterWebUI: Started Master web UI" #fi - query_string="INFO org.apache.hadoop.http.HttpServer" + #query_string="INFO org.apache.hadoop.http.HttpServer" echo -n "waiting for master " - sudo docker logs $MASTER | grep "$query_string" > /dev/null - until [ "$?" -eq 0 ]; do - echo -n "." - sleep 1 - sudo docker logs $MASTER | grep "$query_string" > /dev/null; - done - #sleep 15 + #sudo docker logs $MASTER | grep "$query_string" > /dev/null + #until [ "$?" -eq 0 ]; do + #echo -n "." + #sleep 1 + #sudo docker logs $MASTER | grep "$query_string" > /dev/null; + #done + sleep 5 echo "" echo -n "waiting for nameserver to find master " check_hostname result master "$MASTER_IP" diff --git a/dnsmasq-precise/build b/dnsmasq-precise/build old mode 100755 new mode 100644 diff --git a/hbase-0.94.18/build b/hbase-0.94.18/build old mode 100644 new mode 100755 diff --git a/hbase-0.94.18/hbase-base/Dockerfile b/hbase-0.94.18/hbase-base/Dockerfile index 58f5b40..ef7a2ea 100644 --- a/hbase-0.94.18/hbase-base/Dockerfile +++ b/hbase-0.94.18/hbase-base/Dockerfile @@ -1,9 +1,11 @@ # HBASE 0.94.18 # FROM apache-hadoop-hdfs-precise:1.2.1 - MAINTAINER htaox htaox@hotmail.com +# Intall cURL +RUN apt-get install -y curl + # Download and Install HBase ENV HBASE_VERSION 0.94.18 @@ -24,11 +26,6 @@ ENV HBASE_HOME /opt/hbase ENV HBASE_PREFIX /opt/hbase ENV PATH $PATH:$HBASE_HOME/bin ENV HBASE_CONF_DIR /opt/hbase/conf -#ENV HBASE_SERVER /opt/hbase/bin/hbase - -#ADD ./hbase-site.xml /opt/hbase/conf/hbase-site.xml -#ADD ./zoo.cfg /opt/hbase/conf/zoo.cfg -#ADD ./hbase-server /opt/hbase-server RUN rm $HBASE_HOME/conf/hbase-site.xml ADD files/hbase-site.xml $HBASE_HOME/conf/hbase-site.xml @@ -36,7 +33,15 @@ ADD files/hbase-site.xml $HBASE_HOME/conf/hbase-site.xml RUN rm $HBASE_HOME/conf/hbase-env.sh ADD files/hbase-env.sh $HBASE_HOME/conf/hbase-env.sh -ADD files/zoo.cfg $HBASE_HOME/conf/zoo.cfg +# Download and install Zookeeper 3.4.6 +RUN curl -s http://xenia.sote.hu/ftp/mirrors/www.apache.org/zookeeper/zookeeper-3.4.6/zookeeper-3.4.6.tar.gz | tar -xz -C /usr/local/ +RUN cd /usr/local && ln -s zookeeper-3.4.6 zookeeper +ENV ZOO_HOME /usr/local/zookeeper +ENV PATH $PATH:$ZOO_HOME/bin +#RUN mv $ZOO_HOME/conf/zoo_sample.cfg $ZOO_HOME/conf/zoo.cfg +RUN mkdir /tmp/zookeeper + +ADD files/zoo.cfg $ZOO_HOME/conf/zoo.cfg # Ports reference # http://blog.cloudera.com/blog/2013/07/guide-to-using-apache-hbase-ports/ diff --git a/hbase-0.94.18/hbase-base/files/configure_hbase.sh b/hbase-0.94.18/hbase-base/files/configure_hbase.sh old mode 100644 new mode 100755 index ef5ef83..50024e1 --- a/hbase-0.94.18/hbase-base/files/configure_hbase.sh +++ b/hbase-0.94.18/hbase-base/files/configure_hbase.sh @@ -31,7 +31,7 @@ function configure_hbase() { sed -i s/__JAVA_HOME__/"\/usr\/lib\/jvm\/java-7-openjdk-amd64"/ /opt/hbase-$HBASE_VERSION/conf/hbase-env.sh sed -i "s/@IP@/$1/g" $HBASE_HOME/conf/hbase-site.xml - sed -i "s/@IP@/$1/g" $HBASE_HOME/conf/zoo.cfg + sed -i "s/@IP@/$1/g" $ZOO_HOME/conf/zoo.cfg #echo "$1 $(hostname)" >> /etc/hosts } diff --git a/hbase-0.94.18/hbase-base/files/zoo.cfg b/hbase-0.94.18/hbase-base/files/zoo.cfg index 328d1e5..734b2d0 100644 --- a/hbase-0.94.18/hbase-base/files/zoo.cfg +++ b/hbase-0.94.18/hbase-base/files/zoo.cfg @@ -1,3 +1,4 @@ clientPort=2181 clientPortAddress=@IP@ server.1=@IP@:2181 +dataDir=/tmp/zookeeper diff --git a/hbase-0.94.18/hbase-master/files/default_cmd b/hbase-0.94.18/hbase-master/files/default_cmd old mode 100644 new mode 100755 index 25ae2c3..85263d7 --- a/hbase-0.94.18/hbase-master/files/default_cmd +++ b/hbase-0.94.18/hbase-master/files/default_cmd @@ -22,4 +22,4 @@ sleep 5 echo "starting Hbase Master" cp /root/hbase_master_files/run_hbase_master.sh / chmod a+rx /run_hbase_master.sh -sudo -u hdfs HBASE_HOME=$HBASE_HOME HBASE_CONF_DIR=$HBASE_CONF_DIR HBASE_VERSION=$HBASE_VERSION IP=$IP /run_hbase_master.sh +sudo -u hdfs ZOO_HOME=$ZOO_HOME HBASE_HOME=$HBASE_HOME HBASE_CONF_DIR=$HBASE_CONF_DIR HBASE_VERSION=$HBASE_VERSION IP=$IP /run_hbase_master.sh diff --git a/hbase-0.94.18/hbase-master/files/run_hbase_master.sh b/hbase-0.94.18/hbase-master/files/run_hbase_master.sh old mode 100644 new mode 100755 index 3e94783..1c84a75 --- a/hbase-0.94.18/hbase-master/files/run_hbase_master.sh +++ b/hbase-0.94.18/hbase-master/files/run_hbase_master.sh @@ -1,9 +1,7 @@ #!/bin/bash -rm "${HBASE_HOME}/logs"/* - echo -n "starting zookeeper" -"${HBASE_HOME}/bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" start zookeeper +$ZOO_HOME/bin/zkServer.sh start sleep 3 diff --git a/hbase-0.94.18/hbase-worker/files/default_cmd b/hbase-0.94.18/hbase-worker/files/default_cmd old mode 100644 new mode 100755 diff --git a/hbase-0.94.18/hbase-worker/files/run_hbase_worker.sh b/hbase-0.94.18/hbase-worker/files/run_hbase_worker.sh old mode 100644 new mode 100755 diff --git a/mesos/mesos-base/build b/mesos/mesos-base/build old mode 100755 new mode 100644 From 45936ead707ccb6814505c85268626e07edea37d Mon Sep 17 00:00:00 2001 From: htaox Date: Thu, 1 May 2014 12:28:20 -0400 Subject: [PATCH 18/97] Update hdfs.hdfs as owner of Zookeeper directories --- hbase-0.94.18/hbase-base/Dockerfile | 6 +++--- hbase-0.94.18/hbase-base/files/configure_hbase.sh | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/hbase-0.94.18/hbase-base/Dockerfile b/hbase-0.94.18/hbase-base/Dockerfile index ef7a2ea..a2d9aff 100644 --- a/hbase-0.94.18/hbase-base/Dockerfile +++ b/hbase-0.94.18/hbase-base/Dockerfile @@ -9,7 +9,7 @@ RUN apt-get install -y curl # Download and Install HBase ENV HBASE_VERSION 0.94.18 -#RUN mkdir -p /opt/downloads && cd /opt/downloads && curl -SsfLO "http://www.apache.org/dist/hbase/hbase-$HBASE_VERSION/hbase-$HBASE_VERSION.tar.gz" +#RUN mkdir -p /opt/downloads && cd /opt/downloads && curl -SsfLO "http://www.apache.org/dcd ist/hbase/hbase-$HBASE_VERSION/hbase-$HBASE_VERSION.tar.gz" RUN mkdir -p /opt/downloads ADD http://www.apache.org/dist/hbase/hbase-$HBASE_VERSION/hbase-$HBASE_VERSION.tar.gz /opt/downloads/hbase-$HBASE_VERSION.tar.gz @@ -35,11 +35,11 @@ ADD files/hbase-env.sh $HBASE_HOME/conf/hbase-env.sh # Download and install Zookeeper 3.4.6 RUN curl -s http://xenia.sote.hu/ftp/mirrors/www.apache.org/zookeeper/zookeeper-3.4.6/zookeeper-3.4.6.tar.gz | tar -xz -C /usr/local/ -RUN cd /usr/local && ln -s zookeeper-3.4.6 zookeeper +#RUN cd /usr/local && ln -s zookeeper-3.4.6 zookeeper +RUN ln -s /usr/local/zookeeper-3.4.6 /usr/local/zookeeper ENV ZOO_HOME /usr/local/zookeeper ENV PATH $PATH:$ZOO_HOME/bin #RUN mv $ZOO_HOME/conf/zoo_sample.cfg $ZOO_HOME/conf/zoo.cfg -RUN mkdir /tmp/zookeeper ADD files/zoo.cfg $ZOO_HOME/conf/zoo.cfg diff --git a/hbase-0.94.18/hbase-base/files/configure_hbase.sh b/hbase-0.94.18/hbase-base/files/configure_hbase.sh index 50024e1..32d38c6 100755 --- a/hbase-0.94.18/hbase-base/files/configure_hbase.sh +++ b/hbase-0.94.18/hbase-base/files/configure_hbase.sh @@ -16,6 +16,10 @@ function create_hbase_directories() { rm -rf /opt/hbase-$HBASE_VERSION/logs mkdir -p /opt/hbase-$HBASE_VERSION/logs chown hdfs.hdfs /opt/hbase-$HBASE_VERSION/logs + + chown hdfs.hdfs /usr/local/zookeeper + mkdir /tmp/zookeeper + chown hdfs.hdfs /tmp/zookeeper } function deploy_hbase_files() { From 071ef1ba859f8c7dae13933e220a23beb166b178 Mon Sep 17 00:00:00 2001 From: htaox Date: Thu, 1 May 2014 12:51:41 -0400 Subject: [PATCH 19/97] Update Zookeeper log directory --- hbase-0.94.18/hbase-base/Dockerfile | 1 + .../hbase-base/files/configure_hbase.sh | 3 + hbase-0.94.18/hbase-base/files/zkEnv.sh | 116 ++++++++++++++++++ 3 files changed, 120 insertions(+) create mode 100755 hbase-0.94.18/hbase-base/files/zkEnv.sh diff --git a/hbase-0.94.18/hbase-base/Dockerfile b/hbase-0.94.18/hbase-base/Dockerfile index a2d9aff..acad500 100644 --- a/hbase-0.94.18/hbase-base/Dockerfile +++ b/hbase-0.94.18/hbase-base/Dockerfile @@ -42,6 +42,7 @@ ENV PATH $PATH:$ZOO_HOME/bin #RUN mv $ZOO_HOME/conf/zoo_sample.cfg $ZOO_HOME/conf/zoo.cfg ADD files/zoo.cfg $ZOO_HOME/conf/zoo.cfg +ADD files/zkEnv.sh $ZOO_HOME/bin/zkEnv.sh # Ports reference # http://blog.cloudera.com/blog/2013/07/guide-to-using-apache-hbase-ports/ diff --git a/hbase-0.94.18/hbase-base/files/configure_hbase.sh b/hbase-0.94.18/hbase-base/files/configure_hbase.sh index 32d38c6..ce95f1a 100755 --- a/hbase-0.94.18/hbase-base/files/configure_hbase.sh +++ b/hbase-0.94.18/hbase-base/files/configure_hbase.sh @@ -20,6 +20,9 @@ function create_hbase_directories() { chown hdfs.hdfs /usr/local/zookeeper mkdir /tmp/zookeeper chown hdfs.hdfs /tmp/zookeeper + # Zookeeper logs + sudo mkdir /var/log/zookeeper + sudo chown hdfs.hdfs /var/log/zookeeper } function deploy_hbase_files() { diff --git a/hbase-0.94.18/hbase-base/files/zkEnv.sh b/hbase-0.94.18/hbase-base/files/zkEnv.sh new file mode 100755 index 0000000..726f54a --- /dev/null +++ b/hbase-0.94.18/hbase-base/files/zkEnv.sh @@ -0,0 +1,116 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script should be sourced into other zookeeper +# scripts to setup the env variables + +# We use ZOOCFGDIR if defined, +# otherwise we use /etc/zookeeper +# or the conf directory that is +# a sibling of this script's directory + +ZOO_LOG_DIR=/var/log/zookeeper +ZOOBINDIR="${ZOOBINDIR:-/usr/bin}" +ZOOKEEPER_PREFIX="${ZOOBINDIR}/.." + +if [ "x$ZOOCFGDIR" = "x" ] +then + if [ -e "${ZOOKEEPER_PREFIX}/conf" ]; then + ZOOCFGDIR="$ZOOBINDIR/../conf" + else + ZOOCFGDIR="$ZOOBINDIR/../etc/zookeeper" + fi +fi + +if [ -f "${ZOOCFGDIR}/zookeeper-env.sh" ]; then + . "${ZOOCFGDIR}/zookeeper-env.sh" +fi + +if [ "x$ZOOCFG" = "x" ] +then + ZOOCFG="zoo.cfg" +fi + +ZOOCFG="$ZOOCFGDIR/$ZOOCFG" + +if [ -f "$ZOOCFGDIR/java.env" ] +then + . "$ZOOCFGDIR/java.env" +fi + +if [ "x${ZOO_LOG_DIR}" = "x" ] +then + ZOO_LOG_DIR="." +fi + +if [ "x${ZOO_LOG4J_PROP}" = "x" ] +then + ZOO_LOG4J_PROP="INFO,CONSOLE" +fi + +if [ "$JAVA_HOME" != "" ]; then + JAVA="$JAVA_HOME/bin/java" +else + JAVA=java +fi + +#add the zoocfg dir to classpath +CLASSPATH="$ZOOCFGDIR:$CLASSPATH" + +for i in "$ZOOBINDIR"/../src/java/lib/*.jar +do + CLASSPATH="$i:$CLASSPATH" +done + +#make it work in the binary package +#(use array for LIBPATH to account for spaces within wildcard expansion) +if [ -e "${ZOOKEEPER_PREFIX}"/share/zookeeper/zookeeper-*.jar ]; then + LIBPATH=("${ZOOKEEPER_PREFIX}"/share/zookeeper/*.jar) +else + #release tarball format + for i in "$ZOOBINDIR"/../zookeeper-*.jar + do + CLASSPATH="$i:$CLASSPATH" + done + LIBPATH=("${ZOOBINDIR}"/../lib/*.jar) +fi + +for i in "${LIBPATH[@]}" +do + CLASSPATH="$i:$CLASSPATH" +done + +#make it work for developers +for d in "$ZOOBINDIR"/../build/lib/*.jar +do + CLASSPATH="$d:$CLASSPATH" +done + +#make it work for developers +CLASSPATH="$ZOOBINDIR/../build/classes:$CLASSPATH" + +case "`uname`" in + CYGWIN*) cygwin=true ;; + *) cygwin=false ;; +esac + +if $cygwin +then + CLASSPATH=`cygpath -wp "$CLASSPATH"` +fi + +#echo "CLASSPATH=$CLASSPATH" \ No newline at end of file From db5c1904420702264d0ff7933d8f61a1e6a86b95 Mon Sep 17 00:00:00 2001 From: htaox Date: Thu, 1 May 2014 15:49:15 -0400 Subject: [PATCH 20/97] Change startup routine --- deploy/deploy_hbase.sh | 7 ++++--- deploy/start_hbase_cluster.sh | 24 +++++++++++++----------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/deploy/deploy_hbase.sh b/deploy/deploy_hbase.sh index 7a90877..96fdaca 100755 --- a/deploy/deploy_hbase.sh +++ b/deploy/deploy_hbase.sh @@ -95,12 +95,13 @@ start_master ${image_name}-master $image_version wait_for_master if [ "$image_type" == "hbase" ]; then SHELLCOMMAND="sudo $BASEDIR/start_shell.sh -i ${image_name}-shell:$hbase_VERSION -n $NAMESERVER $VOLUME_MAP" -elif [ "$image_type" == "shark" ]; then - SHELLCOMMAND="sudo $BASEDIR/start_shell.sh -i ${image_name}-shell:$SHARK_VERSION -n $NAMESERVER $VOLUME_MAP" +#elif [ "$image_type" == "shark" ]; then +# SHELLCOMMAND="sudo $BASEDIR/start_shell.sh -i ${image_name}-shell:$SHARK_VERSION -n $NAMESERVER $VOLUME_MAP" fi start_workers ${image_name}-worker $image_version -get_num_registered_workers +#get_num_registered_workers +NUM_REGISTERED_WORKERS=0 echo -n "waiting for workers to register " until [[ "$NUM_REGISTERED_WORKERS" == "$NUM_WORKERS" ]]; do echo -n "." diff --git a/deploy/start_hbase_cluster.sh b/deploy/start_hbase_cluster.sh index 34ff852..e1ad8a3 100755 --- a/deploy/start_hbase_cluster.sh +++ b/deploy/start_hbase_cluster.sh @@ -67,18 +67,20 @@ function print_cluster_info() { } function get_num_registered_workers() { - if [[ "$SPARK_VERSION" == "0.7.3" ]]; then - DATA=$( curl --noproxy -s http://$MASTER_IP:8080/?format=json | tr -d '\n' | sed s/\"/\\\\\"/g) - else + sleep 5 + NUM_REGISTERED_WORKERS=$(($NUM_REGISTERED_WORKERS+1)) + #if [[ "$SPARK_VERSION" == "0.7.3" ]]; then + # DATA=$( curl --noproxy -s http://$MASTER_IP:8080/?format=json | tr -d '\n' | sed s/\"/\\\\\"/g) + #else # Docker on Mac uses tinycore Linux with busybox which has a limited version wget (?) - echo $(uname -a) | grep "Linux boot2docker" > /dev/null - if [[ "$?" == "0" ]]; then - DATA=$( wget -Y off -q -O - http://$MASTER_IP:8080/json | tr -d '\n' | sed s/\"/\\\\\"/g) - else - DATA=$( wget --no-proxy -q -O - http://$MASTER_IP:8080/json | tr -d '\n' | sed s/\"/\\\\\"/g) - fi - fi - NUM_REGISTERED_WORKERS=$(python -c "import json; data = \"$DATA\"; value = json.loads(data); print len(value['workers'])") + #echo $(uname -a) | grep "Linux boot2docker" > /dev/null + #if [[ "$?" == "0" ]]; then + # DATA=$( wget -Y off -q -O - http://$MASTER_IP:8080/json | tr -d '\n' | sed s/\"/\\\\\"/g) + #else + # DATA=$( wget --no-proxy -q -O - http://$MASTER_IP:8080/json | tr -d '\n' | sed s/\"/\\\\\"/g) + #fi + #fi + #NUM_REGISTERED_WORKERS=$(python -c "import json; data = \"$DATA\"; value = json.loads(data); print len(value['workers'])") } function wait_for_master { From c96daea2e327cb0ccf1b2b4d93baaf18a5ce9d7b Mon Sep 17 00:00:00 2001 From: htaox Date: Thu, 1 May 2014 16:16:18 -0400 Subject: [PATCH 21/97] Updated worker startup routine --- hbase-0.94.18/hbase-worker/files/default_cmd | 2 +- hbase-0.94.18/hbase-worker/files/run_hbase_worker.sh | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/hbase-0.94.18/hbase-worker/files/default_cmd b/hbase-0.94.18/hbase-worker/files/default_cmd index 24bdf35..ff84bd1 100755 --- a/hbase-0.94.18/hbase-worker/files/default_cmd +++ b/hbase-0.94.18/hbase-worker/files/default_cmd @@ -6,7 +6,7 @@ IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { p echo "WORKER_IP=$IP" echo "preparing HBase" -prepare_hbase $1 +prepare_hbase $IP echo "starting Hadoop Datanode" service hadoop-datanode start diff --git a/hbase-0.94.18/hbase-worker/files/run_hbase_worker.sh b/hbase-0.94.18/hbase-worker/files/run_hbase_worker.sh index 7ff96ac..31979bd 100755 --- a/hbase-0.94.18/hbase-worker/files/run_hbase_worker.sh +++ b/hbase-0.94.18/hbase-worker/files/run_hbase_worker.sh @@ -3,3 +3,8 @@ # ${SPARK_HOME}/bin/spark-class org.apache.spark.deploy.worker.Worker $MASTER "${HBASE_HOME}/bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" start regionserver +while [ 1 ]; +do + tail -f "${HBASE_HOME}/logs"/*.out + sleep 1 +done \ No newline at end of file From e310f1069a254faad703c7abf9c31289c513590f Mon Sep 17 00:00:00 2001 From: htaox Date: Fri, 2 May 2014 12:21:50 -0400 Subject: [PATCH 22/97] Delay startup of Hbase --- hbase-0.94.18/hbase-base/files/configure_hbase.sh | 6 +----- hbase-0.94.18/hbase-master/files/default_cmd | 15 +++++++++++++-- .../hbase-master/files/run_hbase_master.sh | 5 ----- hbase-0.94.18/hbase-worker/files/default_cmd | 9 ++++++++- .../hbase-worker/files/run_hbase_worker.sh | 7 ------- 5 files changed, 22 insertions(+), 20 deletions(-) diff --git a/hbase-0.94.18/hbase-base/files/configure_hbase.sh b/hbase-0.94.18/hbase-base/files/configure_hbase.sh index ce95f1a..6e2bf40 100755 --- a/hbase-0.94.18/hbase-base/files/configure_hbase.sh +++ b/hbase-0.94.18/hbase-base/files/configure_hbase.sh @@ -35,11 +35,7 @@ function configure_hbase() { configure_hadoop $1 sed -i s/__MASTER__/master/ /opt/hbase-$HBASE_VERSION/conf/hbase-env.sh sed -i s/__HBASE_HOME__/"\/opt\/hbase-${HBASE_VERSION}"/ /opt/hbase-$HBASE_VERSION/conf/hbase-env.sh - sed -i s/__JAVA_HOME__/"\/usr\/lib\/jvm\/java-7-openjdk-amd64"/ /opt/hbase-$HBASE_VERSION/conf/hbase-env.sh - - sed -i "s/@IP@/$1/g" $HBASE_HOME/conf/hbase-site.xml - sed -i "s/@IP@/$1/g" $ZOO_HOME/conf/zoo.cfg - #echo "$1 $(hostname)" >> /etc/hosts + sed -i s/__JAVA_HOME__/"\/usr\/lib\/jvm\/java-7-openjdk-amd64"/ /opt/hbase-$HBASE_VERSION/conf/hbase-env.sh } function prepare_hbase() { diff --git a/hbase-0.94.18/hbase-master/files/default_cmd b/hbase-0.94.18/hbase-master/files/default_cmd index 85263d7..ef7704b 100755 --- a/hbase-0.94.18/hbase-master/files/default_cmd +++ b/hbase-0.94.18/hbase-master/files/default_cmd @@ -17,9 +17,20 @@ service hadoop-namenode start > /dev/null 2>&1 echo "starting sshd" /usr/sbin/sshd -sleep 5 +#sleep 5 + +sed -i "s/@IP@/$IP/g" $HBASE_HOME/conf/hbase-site.xml +sed -i "s/@IP@/$IP/g" $ZOO_HOME/conf/zoo.cfg echo "starting Hbase Master" cp /root/hbase_master_files/run_hbase_master.sh / chmod a+rx /run_hbase_master.sh -sudo -u hdfs ZOO_HOME=$ZOO_HOME HBASE_HOME=$HBASE_HOME HBASE_CONF_DIR=$HBASE_CONF_DIR HBASE_VERSION=$HBASE_VERSION IP=$IP /run_hbase_master.sh + +while [ 1 ]; +do + tail -f "${HBASE_HOME}/logs"/*.out + sleep 1 +done + +# Don't start Hbase yet. Need to wait for the datanodes to come up. +#sudo -u hdfs ZOO_HOME=$ZOO_HOME HBASE_HOME=$HBASE_HOME HBASE_CONF_DIR=$HBASE_CONF_DIR HBASE_VERSION=$HBASE_VERSION IP=$IP /run_hbase_master.sh diff --git a/hbase-0.94.18/hbase-master/files/run_hbase_master.sh b/hbase-0.94.18/hbase-master/files/run_hbase_master.sh index 1c84a75..dfdcffd 100755 --- a/hbase-0.94.18/hbase-master/files/run_hbase_master.sh +++ b/hbase-0.94.18/hbase-master/files/run_hbase_master.sh @@ -7,8 +7,3 @@ sleep 3 echo -n "starting HBase master" "${HBASE_HOME}/bin"/hbase-daemon.sh --config "${HBASE_CONF_DIR}" start master -while [ 1 ]; -do - tail -f "${HBASE_HOME}/logs"/*.out - sleep 1 -done diff --git a/hbase-0.94.18/hbase-worker/files/default_cmd b/hbase-0.94.18/hbase-worker/files/default_cmd index ff84bd1..e09cd9d 100755 --- a/hbase-0.94.18/hbase-worker/files/default_cmd +++ b/hbase-0.94.18/hbase-worker/files/default_cmd @@ -19,4 +19,11 @@ sleep 5 echo "starting HBase Worker" cp /root/hbase_worker_files/run_hbase_worker.sh / chmod a+rx /run_hbase_worker.sh -sudo -u hdfs HBASE_HOME=$HBASE_HOME HBASE_CONF_DIR=$HBASE_CONF_DIR HBASE_VERSION=$HBASE_VERSION /run_hbase_worker.sh + +while [ 1 ]; +do + tail -f "${HBASE_HOME}/logs"/*.out + sleep 1 +done + +#sudo -u hdfs HBASE_HOME=$HBASE_HOME HBASE_CONF_DIR=$HBASE_CONF_DIR HBASE_VERSION=$HBASE_VERSION /run_hbase_worker.sh diff --git a/hbase-0.94.18/hbase-worker/files/run_hbase_worker.sh b/hbase-0.94.18/hbase-worker/files/run_hbase_worker.sh index 31979bd..f34e5fd 100755 --- a/hbase-0.94.18/hbase-worker/files/run_hbase_worker.sh +++ b/hbase-0.94.18/hbase-worker/files/run_hbase_worker.sh @@ -1,10 +1,3 @@ #!/bin/bash -#. /opt/hbase-0.94.18/conf/hbase-env.sh -# ${SPARK_HOME}/bin/spark-class org.apache.spark.deploy.worker.Worker $MASTER "${HBASE_HOME}/bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" start regionserver -while [ 1 ]; -do - tail -f "${HBASE_HOME}/logs"/*.out - sleep 1 -done \ No newline at end of file From f1303877681481f234c993a67fd43b856fd088a3 Mon Sep 17 00:00:00 2001 From: htaox Date: Fri, 2 May 2014 16:42:44 -0400 Subject: [PATCH 23/97] Start datanodes and regionservers in sequence --- deploy/deploy_hbase.sh | 5 +- deploy/start_hbase_cluster.sh | 64 +++++++++++-------- .../hbase-master/files/run_hbase_master.sh | 9 --- hbase-0.94.18/hbase-worker/files/default_cmd | 8 ++- .../hbase-worker/files/run_hbase_worker.sh | 3 - 5 files changed, 45 insertions(+), 44 deletions(-) delete mode 100755 hbase-0.94.18/hbase-master/files/run_hbase_master.sh delete mode 100755 hbase-0.94.18/hbase-worker/files/run_hbase_worker.sh diff --git a/deploy/deploy_hbase.sh b/deploy/deploy_hbase.sh index 96fdaca..b12321c 100755 --- a/deploy/deploy_hbase.sh +++ b/deploy/deploy_hbase.sh @@ -95,8 +95,6 @@ start_master ${image_name}-master $image_version wait_for_master if [ "$image_type" == "hbase" ]; then SHELLCOMMAND="sudo $BASEDIR/start_shell.sh -i ${image_name}-shell:$hbase_VERSION -n $NAMESERVER $VOLUME_MAP" -#elif [ "$image_type" == "shark" ]; then -# SHELLCOMMAND="sudo $BASEDIR/start_shell.sh -i ${image_name}-shell:$SHARK_VERSION -n $NAMESERVER $VOLUME_MAP" fi start_workers ${image_name}-worker $image_version @@ -114,3 +112,6 @@ if [[ "$start_shell" -eq 1 ]]; then SHELL_ID=$($SHELLCOMMAND | tail -n 1 | awk '{print $4}') sudo docker attach $SHELL_ID fi + +#After all the servers are up, we can start the services in sequence +start_hbase diff --git a/deploy/start_hbase_cluster.sh b/deploy/start_hbase_cluster.sh index e1ad8a3..d790816 100755 --- a/deploy/start_hbase_cluster.sh +++ b/deploy/start_hbase_cluster.sh @@ -3,6 +3,8 @@ MASTER=-1 MASTER_IP= NUM_REGISTERED_WORKERS=0 +BASEDIR=$(cd $(dirname $0); pwd) +REGIONSERVERS="${BASEDIR}/regionservers" # starts the Spark/Shark master container function start_master() { @@ -43,6 +45,8 @@ function start_workers() { sleep 3 WORKER_IP=$(sudo docker logs $WORKER 2>&1 | egrep '^WORKER_IP=' | awk -F= '{print $2}' | tr -d -c "[:digit:] .") echo "address=\"/$hostname/$WORKER_IP\"" >> $DNSFILE + echo "WORKER #${i} IP: $WORKER_IP" + echo $WORKER_IP >> $REGIONSERVERS done } @@ -53,7 +57,6 @@ function print_cluster_info() { echo "***********************************************************************" echo "start shell via: $1" echo "" - echo "visit Spark WebUI at: http://$MASTER_IP:8080/" echo "visit Hadoop Namenode at: http://$MASTER_IP:50070" echo "ssh into master via: ssh -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP}" echo "" @@ -67,36 +70,12 @@ function print_cluster_info() { } function get_num_registered_workers() { - sleep 5 - NUM_REGISTERED_WORKERS=$(($NUM_REGISTERED_WORKERS+1)) - #if [[ "$SPARK_VERSION" == "0.7.3" ]]; then - # DATA=$( curl --noproxy -s http://$MASTER_IP:8080/?format=json | tr -d '\n' | sed s/\"/\\\\\"/g) - #else - # Docker on Mac uses tinycore Linux with busybox which has a limited version wget (?) - #echo $(uname -a) | grep "Linux boot2docker" > /dev/null - #if [[ "$?" == "0" ]]; then - # DATA=$( wget -Y off -q -O - http://$MASTER_IP:8080/json | tr -d '\n' | sed s/\"/\\\\\"/g) - #else - # DATA=$( wget --no-proxy -q -O - http://$MASTER_IP:8080/json | tr -d '\n' | sed s/\"/\\\\\"/g) - #fi - #fi - #NUM_REGISTERED_WORKERS=$(python -c "import json; data = \"$DATA\"; value = json.loads(data); print len(value['workers'])") + sleep 3 + NUM_REGISTERED_WORKERS=$(($NUM_REGISTERED_WORKERS+1)) } function wait_for_master { - #if [[ "$SPARK_VERSION" == "0.7.3" ]]; then - # query_string="INFO HttpServer: akka://sparkMaster/user/HttpServer started" - #else - # query_string="MasterWebUI: Started Master web UI" - #fi - #query_string="INFO org.apache.hadoop.http.HttpServer" echo -n "waiting for master " - #sudo docker logs $MASTER | grep "$query_string" > /dev/null - #until [ "$?" -eq 0 ]; do - #echo -n "." - #sleep 1 - #sudo docker logs $MASTER | grep "$query_string" > /dev/null; - #done sleep 5 echo "" echo -n "waiting for nameserver to find master " @@ -109,3 +88,34 @@ function wait_for_master { echo "" sleep 3 } + +function start_hbase { + + echo -n "updating regionservers file" + scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o IdentityFile=$BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa $REGIONSERVERS root@$MASTER_IP:/opt/hbase/conf/ + + echo -n "change regionservers file permission" + ssh -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP} "chown hdfs.hdfs /opt/hbase/conf/regionservers" + + #update the core-site.xml and hbase-site.xml and start hadoop datanodes + while read WORKERADDRESS + do + echo -n "updating core-site.xml on ${WORKERADDRESS}" + ssh -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP} "scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o IdentityFile=/root/.ssh/id_rsa etc/hadoop/core-site.xml root@${WORKERADDRESS}:/etc/hadoop/" + echo -n "updating hbase-site.xml on ${WORKERADDRESS}" + ssh -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP} "scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o IdentityFile=/root/.ssh/id_rsa /opt/hbase/conf/hbase-site.xml root@${WORKERADDRESS}:/opt/hbase/conf/" + echo -n "starting datanode on ${WORKERADDRESS}" + ssh -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${WORKERADDRESS} "service hadoop-datanode start" + + done < $REGIONSERVERS + + echo -n "starting zookeeper on ${MASTER_IP}" + ssh -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP} "/usr/local/zookeeper/bin/zkServer.sh start" + + echo -n "starting hbase master on ${MASTER_IP}" + ssh -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP} "sudo -u hdfs /opt/hbase/bin/hbase-daemon.sh --config /opt/hbase/conf start master" + + echo -n "starting all hbase regionservers" + ssh -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP} "sudo -u hdfs /opt/hbase/bin/hbase-daemon.sh --config /opt/hbase/conf --hosts /opt/hbase/conf/regionservers start regionserver" + +} diff --git a/hbase-0.94.18/hbase-master/files/run_hbase_master.sh b/hbase-0.94.18/hbase-master/files/run_hbase_master.sh deleted file mode 100755 index dfdcffd..0000000 --- a/hbase-0.94.18/hbase-master/files/run_hbase_master.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -echo -n "starting zookeeper" -$ZOO_HOME/bin/zkServer.sh start - -sleep 3 - -echo -n "starting HBase master" -"${HBASE_HOME}/bin"/hbase-daemon.sh --config "${HBASE_CONF_DIR}" start master diff --git a/hbase-0.94.18/hbase-worker/files/default_cmd b/hbase-0.94.18/hbase-worker/files/default_cmd index e09cd9d..130ecfd 100755 --- a/hbase-0.94.18/hbase-worker/files/default_cmd +++ b/hbase-0.94.18/hbase-worker/files/default_cmd @@ -8,15 +8,17 @@ echo "WORKER_IP=$IP" echo "preparing HBase" prepare_hbase $IP -echo "starting Hadoop Datanode" -service hadoop-datanode start +#echo "starting Hadoop Datanode" +#service hadoop-datanode start + +#Don't start Hadoop yet, need to change core-site.xml from master first echo "starting sshd" /usr/sbin/sshd sleep 5 -echo "starting HBase Worker" +#echo "starting HBase Worker" cp /root/hbase_worker_files/run_hbase_worker.sh / chmod a+rx /run_hbase_worker.sh diff --git a/hbase-0.94.18/hbase-worker/files/run_hbase_worker.sh b/hbase-0.94.18/hbase-worker/files/run_hbase_worker.sh deleted file mode 100755 index f34e5fd..0000000 --- a/hbase-0.94.18/hbase-worker/files/run_hbase_worker.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -"${HBASE_HOME}/bin"/hbase-daemons.sh --config "${HBASE_CONF_DIR}" start regionserver From 52bb57901bfaea854b2a63bf6fccbde0e2c2eefb Mon Sep 17 00:00:00 2001 From: htaox Date: Sat, 3 May 2014 18:53:43 +0100 Subject: [PATCH 24/97] Fix core-site.xml location --- deploy/start_hbase_cluster.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/deploy/start_hbase_cluster.sh b/deploy/start_hbase_cluster.sh index d790816..838e9d7 100755 --- a/deploy/start_hbase_cluster.sh +++ b/deploy/start_hbase_cluster.sh @@ -91,6 +91,8 @@ function wait_for_master { function start_hbase { + chmod 400 $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa + echo -n "updating regionservers file" scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o IdentityFile=$BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa $REGIONSERVERS root@$MASTER_IP:/opt/hbase/conf/ @@ -101,12 +103,14 @@ function start_hbase { while read WORKERADDRESS do echo -n "updating core-site.xml on ${WORKERADDRESS}" - ssh -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP} "scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o IdentityFile=/root/.ssh/id_rsa etc/hadoop/core-site.xml root@${WORKERADDRESS}:/etc/hadoop/" + ssh -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP} "scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o IdentityFile=/root/.ssh/id_rsa /etc/hadoop/core-site.xml root@${WORKERADDRESS}:/etc/hadoop/" echo -n "updating hbase-site.xml on ${WORKERADDRESS}" ssh -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP} "scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o IdentityFile=/root/.ssh/id_rsa /opt/hbase/conf/hbase-site.xml root@${WORKERADDRESS}:/opt/hbase/conf/" echo -n "starting datanode on ${WORKERADDRESS}" ssh -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${WORKERADDRESS} "service hadoop-datanode start" + sleep 2 + done < $REGIONSERVERS echo -n "starting zookeeper on ${MASTER_IP}" From e66e2bf851c5863aff4e151f9851b0ef35738e1d Mon Sep 17 00:00:00 2001 From: htaox Date: Sat, 3 May 2014 23:08:29 +0100 Subject: [PATCH 25/97] Remove unnecessary function calls --- deploy/deploy_hbase.sh | 18 +------- deploy/start_hbase_cluster.sh | 42 ++++++++++++------- hbase-0.94.18/hbase-base/files/hbase-site.xml | 9 +++- 3 files changed, 35 insertions(+), 34 deletions(-) diff --git a/deploy/deploy_hbase.sh b/deploy/deploy_hbase.sh index b12321c..bb4bdba 100755 --- a/deploy/deploy_hbase.sh +++ b/deploy/deploy_hbase.sh @@ -93,25 +93,9 @@ start_nameserver $NAMESERVER_IMAGE wait_for_nameserver start_master ${image_name}-master $image_version wait_for_master -if [ "$image_type" == "hbase" ]; then - SHELLCOMMAND="sudo $BASEDIR/start_shell.sh -i ${image_name}-shell:$hbase_VERSION -n $NAMESERVER $VOLUME_MAP" -fi start_workers ${image_name}-worker $image_version -#get_num_registered_workers -NUM_REGISTERED_WORKERS=0 -echo -n "waiting for workers to register " -until [[ "$NUM_REGISTERED_WORKERS" == "$NUM_WORKERS" ]]; do - echo -n "." - sleep 1 - get_num_registered_workers -done echo "" -print_cluster_info "$SHELLCOMMAND" -if [[ "$start_shell" -eq 1 ]]; then - SHELL_ID=$($SHELLCOMMAND | tail -n 1 | awk '{print $4}') - sudo docker attach $SHELL_ID -fi - +print_cluster_info #After all the servers are up, we can start the services in sequence start_hbase diff --git a/deploy/start_hbase_cluster.sh b/deploy/start_hbase_cluster.sh index 838e9d7..1f0d49a 100755 --- a/deploy/start_hbase_cluster.sh +++ b/deploy/start_hbase_cluster.sh @@ -28,6 +28,9 @@ function start_master() { # starts a number of Spark/Shark workers function start_workers() { + + rm -f $REGIONSERVERS + for i in `seq 1 $NUM_WORKERS`; do echo "starting worker container" hostname="worker${i}${DOMAINNAME}" @@ -55,7 +58,6 @@ function print_cluster_info() { BASEDIR=$(cd $(dirname $0); pwd)"/.." echo "" echo "***********************************************************************" - echo "start shell via: $1" echo "" echo "visit Hadoop Namenode at: http://$MASTER_IP:50070" echo "ssh into master via: ssh -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP}" @@ -70,13 +72,13 @@ function print_cluster_info() { } function get_num_registered_workers() { - sleep 3 + sleep 2 NUM_REGISTERED_WORKERS=$(($NUM_REGISTERED_WORKERS+1)) } function wait_for_master { echo -n "waiting for master " - sleep 5 + sleep 1 echo "" echo -n "waiting for nameserver to find master " check_hostname result master "$MASTER_IP" @@ -86,7 +88,7 @@ function wait_for_master { check_hostname result master "$MASTER_IP" done echo "" - sleep 3 + sleep 2 } function start_hbase { @@ -102,24 +104,32 @@ function start_hbase { #update the core-site.xml and hbase-site.xml and start hadoop datanodes while read WORKERADDRESS do - echo -n "updating core-site.xml on ${WORKERADDRESS}" - ssh -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP} "scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o IdentityFile=/root/.ssh/id_rsa /etc/hadoop/core-site.xml root@${WORKERADDRESS}:/etc/hadoop/" - echo -n "updating hbase-site.xml on ${WORKERADDRESS}" - ssh -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP} "scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o IdentityFile=/root/.ssh/id_rsa /opt/hbase/conf/hbase-site.xml root@${WORKERADDRESS}:/opt/hbase/conf/" - echo -n "starting datanode on ${WORKERADDRESS}" - ssh -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${WORKERADDRESS} "service hadoop-datanode start" + echo "updating core-site.xml on ${WORKERADDRESS}" + ssh -n -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP} "scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o IdentityFile=/root/.ssh/id_rsa /etc/hadoop/core-site.xml root@${WORKERADDRESS}:/etc/hadoop/" + + echo "starting datanode on ${WORKERADDRESS}" + ssh -n -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${WORKERADDRESS} "service hadoop-datanode start" sleep 2 done < $REGIONSERVERS - echo -n "starting zookeeper on ${MASTER_IP}" - ssh -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP} "/usr/local/zookeeper/bin/zkServer.sh start" + echo "starting zookeeper on ${MASTER_IP}" + ssh -n -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP} "/usr/local/zookeeper/bin/zkServer.sh start" - echo -n "starting hbase master on ${MASTER_IP}" - ssh -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP} "sudo -u hdfs /opt/hbase/bin/hbase-daemon.sh --config /opt/hbase/conf start master" + echo "starting hbase master on ${MASTER_IP}" + ssh -n -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP} "sudo -u hdfs /opt/hbase/bin/hbase-daemon.sh --config /opt/hbase/conf start master" - echo -n "starting all hbase regionservers" - ssh -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP} "sudo -u hdfs /opt/hbase/bin/hbase-daemon.sh --config /opt/hbase/conf --hosts /opt/hbase/conf/regionservers start regionserver" + #update the hbase-site.xml and hbase-site.xml and start hbase regionservers + while read WORKERADDRESS + do + + echo "updating hbase-site.xml on ${WORKERADDRESS}" + ssh -n -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP} "scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o IdentityFile=/root/.ssh/id_rsa /opt/hbase/conf/hbase-site.xml root@${WORKERADDRESS}:/opt/hbase/conf/" + echo "starting hbase regionserver on ${WORKERADDRESS}" + ssh -n -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${WORKERADDRESS} "sudo -u hdfs /opt/hbase/bin/hbase-daemon.sh --config /opt/hbase/conf start regionserver" + + sleep 2 + done < $REGIONSERVERS } diff --git a/hbase-0.94.18/hbase-base/files/hbase-site.xml b/hbase-0.94.18/hbase-base/files/hbase-site.xml index a0a1b46..4ee628c 100644 --- a/hbase-0.94.18/hbase-base/files/hbase-site.xml +++ b/hbase-0.94.18/hbase-base/files/hbase-site.xml @@ -1,6 +1,13 @@ - + +hbase.master +@IP@:60000 +The host and port that the HBase master runs at. + A value of 'local' runs the master and a regionserver + in a single process. + + hbase.zookeeper.quorum @IP@ From 18303e20af1c860f8de549cd35c0dba5d04bb5c4 Mon Sep 17 00:00:00 2001 From: htaox Date: Sat, 3 May 2014 23:32:13 +0100 Subject: [PATCH 26/97] Remove unnecessary comments --- hbase-0.94.18/hbase-base/files/hbase-site.xml | 98 ++++++------------- hbase-0.94.18/hbase-worker/Dockerfile | 6 +- 2 files changed, 32 insertions(+), 72 deletions(-) diff --git a/hbase-0.94.18/hbase-base/files/hbase-site.xml b/hbase-0.94.18/hbase-base/files/hbase-site.xml index 4ee628c..a8c1c74 100644 --- a/hbase-0.94.18/hbase-base/files/hbase-site.xml +++ b/hbase-0.94.18/hbase-base/files/hbase-site.xml @@ -1,67 +1,31 @@ - - - -hbase.master -@IP@:60000 -The host and port that the HBase master runs at. - A value of 'local' runs the master and a regionserver - in a single process. - - - - hbase.zookeeper.quorum - @IP@ - - - hbase.rootdir - hdfs://@IP@:9000/hbase - - - hbase.cluster.distributed - true - - - - hbase.master.dns.interface - eth0 - - - - - - - - - hbase.regionserver.dns.interface - eth0 - - - - - - - hbase.zookeeper.dns.interface - eth0 - - - + + + + hbase.master + @IP@:60000 + + + hbase.zookeeper.quorum + @IP@ + + + hbase.rootdir + hdfs://@IP@:9000/hbase + + + hbase.cluster.distributed + true + + + hbase.master.dns.interface + eth0 + + + hbase.regionserver.dns.interface + eth0 + + + hbase.zookeeper.dns.interface + eth0 + + \ No newline at end of file diff --git a/hbase-0.94.18/hbase-worker/Dockerfile b/hbase-0.94.18/hbase-worker/Dockerfile index e746850..d13c5c0 100644 --- a/hbase-0.94.18/hbase-worker/Dockerfile +++ b/hbase-0.94.18/hbase-worker/Dockerfile @@ -1,11 +1,7 @@ -# Spark +#HBase FROM htaox/hbase-base:0.94.18 MAINTAINER htaox htaox@hotmail.com -# Instead of using a random port, bind the worker to a specific port -#ENV SPARK_WORKER_PORT 8888 -#EXPOSE 8888 - ADD files /root/hbase_worker_files RUN chmod 700 /root/hbase_worker_files/default_cmd # Add the entrypoint script for the master From 79d9e7a75e41809c3c1fd1534ec9abcbc6d62331 Mon Sep 17 00:00:00 2001 From: htaox Date: Sun, 4 May 2014 00:14:50 +0100 Subject: [PATCH 27/97] Add README-hbase.md --- README-hbase.md | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ README.md | 7 +++++++ 2 files changed, 55 insertions(+) create mode 100644 README-hbase.md diff --git a/README-hbase.md b/README-hbase.md new file mode 100644 index 0000000..735dad6 --- /dev/null +++ b/README-hbase.md @@ -0,0 +1,48 @@ +#### Deploy the HBase fully-distributed cluster + +
+$ NUMBER_OF_REGIONSERVERS=3
+$ sudo deploy/deploy_hbase.sh -i htaox/hbase:0.94.18 -w $NUMBER_OF_REGIONSERVERS
+
+ +This will (typically) result in the following setup: + +NAMESERVER 10.1.0.3 +HADOOP NAMENODE 10.1.0.4 +HBASE MASTER 10.1.0.4 +ZOOKEEPER 10.1.0.4 +HADOOP DATANODE 10.1.0.5 +HBASE REGIONSERVER 10.1.0.5 +HADOOP DATANODE 10.1.0.6 +HBASE REGIONSERVER 10.1.0.6 +HADOOP DATANODE 10.1.0.7 +HBASE REGIONSERVER 10.1.0.7 + +#### Kill the HBase cluster + +
+$ sudo deploy/kill_all.sh hbase
+$ sudo deploy/kill_all.sh nameserver
+
+ +#### After HBase cluster is killed, cleanup +
+$ sudo docker rm `sudo docker ps -a -q`
+$ sudo docker images | grep "" | awk '{print $3}' | xargs sudo docker rmi
+
+ +#### Build locally + +__Download the scripts__ + $ git clone -b add-hbase https://github.com/htaox/docker-scripts.git + +__Change file permissions__ + $ cd ~/docker-scripts + $ chmod a+x build/build_all_hbase.sh + $ chmod a+x hbase-0.94.18/build + $ chmod a+x deploy/deploy_hbase.sh + +__Build__ + $ sudo build/build_all_hbase.sh + + diff --git a/README.md b/README.md index 3d2fc61..e4a21a2 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,10 @@ +# Dockerfiles for fully-distributed HBase setup + +This project was based on a fork from amplab / docker-scripts. +DNS and cluster setup are all credited to the great work of the amplab team. +The rest of this file is the original README.md. +For building & running HBase on top of a Hadoop cluster, please check out README-hbase.md + # Dockerfiles for Spark and Shark ## Contents From 958dd35b6aabdc5d1d883570e219f166d526569a Mon Sep 17 00:00:00 2001 From: htaox Date: Sun, 4 May 2014 00:16:49 +0100 Subject: [PATCH 28/97] Tidy up --- README-hbase.md | 2 ++ README.md | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/README-hbase.md b/README-hbase.md index 735dad6..33eedcd 100644 --- a/README-hbase.md +++ b/README-hbase.md @@ -7,6 +7,7 @@ $ sudo deploy/deploy_hbase.sh -i htaox/hbase:0.94.18 -w $NUMBER_OF_REGIONSERVERS This will (typically) result in the following setup: +
 NAMESERVER         10.1.0.3
 HADOOP NAMENODE    10.1.0.4
 HBASE MASTER       10.1.0.4
@@ -17,6 +18,7 @@ HADOOP DATANODE    10.1.0.6
 HBASE REGIONSERVER 10.1.0.6
 HADOOP DATANODE    10.1.0.7
 HBASE REGIONSERVER 10.1.0.7
+
#### Kill the HBase cluster diff --git a/README.md b/README.md index e4a21a2..1e12a42 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Dockerfiles for fully-distributed HBase setup +#### Dockerfiles for fully-distributed HBase setup This project was based on a fork from amplab / docker-scripts. DNS and cluster setup are all credited to the great work of the amplab team. From 5ebb859179857b4597b2bbf62ae4f9c94067167c Mon Sep 17 00:00:00 2001 From: htaox Date: Sun, 4 May 2014 00:19:39 +0100 Subject: [PATCH 29/97] More tidy up --- README-hbase.md | 19 ++++++++++++------- README.md | 3 ++- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/README-hbase.md b/README-hbase.md index 33eedcd..10cf74e 100644 --- a/README-hbase.md +++ b/README-hbase.md @@ -36,15 +36,20 @@ $ sudo docker images | grep "" | awk '{print $3}' | xargs sudo docker rmi #### Build locally __Download the scripts__ - $ git clone -b add-hbase https://github.com/htaox/docker-scripts.git +
+$ git clone -b add-hbase https://github.com/htaox/docker-scripts.git
+
__Change file permissions__ - $ cd ~/docker-scripts - $ chmod a+x build/build_all_hbase.sh - $ chmod a+x hbase-0.94.18/build - $ chmod a+x deploy/deploy_hbase.sh +
    
+$ cd ~/docker-scripts
+$ chmod a+x build/build_all_hbase.sh
+$ chmod a+x hbase-0.94.18/build
+$ chmod a+x deploy/deploy_hbase.sh
+
__Build__ - $ sudo build/build_all_hbase.sh - +
    
+$ sudo build/build_all_hbase.sh
+
diff --git a/README.md b/README.md index 1e12a42..aec5417 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ -#### Dockerfiles for fully-distributed HBase setup +### Dockerfiles for fully-distributed HBase setup +--- This project was based on a fork from amplab / docker-scripts. DNS and cluster setup are all credited to the great work of the amplab team. From 88d7287dca296efa4f1952895d70d4fdf3d9424d Mon Sep 17 00:00:00 2001 From: htaox Date: Sun, 4 May 2014 00:22:15 +0100 Subject: [PATCH 30/97] Add link to README-hbase.md --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index aec5417..c6d0d8a 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,9 @@ ### Dockerfiles for fully-distributed HBase setup --- - This project was based on a fork from amplab / docker-scripts. DNS and cluster setup are all credited to the great work of the amplab team. The rest of this file is the original README.md. -For building & running HBase on top of a Hadoop cluster, please check out README-hbase.md +For building & running HBase on top of a Hadoop cluster, please check out [README-hbase.md](https://github.com/htaox/docker-scripts/blob/add-hbase/README-hbase.md) # Dockerfiles for Spark and Shark From 9b259874a109a28ea4d047d7f39056008e338579 Mon Sep 17 00:00:00 2001 From: htaox Date: Tue, 6 May 2014 15:48:33 -0400 Subject: [PATCH 31/97] Initial push Elasticsearch cluster --- build/build_all_elasticsearch.sh | 21 + deploy/deploy_elasticsearch.sh | 97 +++++ deploy/start_elasticsearch_cluster.sh | 103 +++++ elasticsearch-0.90.13/build | 13 + .../elasticsearch-base/Dockerfile | 43 ++ .../elasticsearch-base/build | 4 + .../files/elasticsearch.yml | 377 ++++++++++++++++++ .../elasticsearch-base/files/logging.yml | 56 +++ 8 files changed, 714 insertions(+) create mode 100755 build/build_all_elasticsearch.sh create mode 100755 deploy/deploy_elasticsearch.sh create mode 100755 deploy/start_elasticsearch_cluster.sh create mode 100755 elasticsearch-0.90.13/build create mode 100755 elasticsearch-0.90.13/elasticsearch-base/Dockerfile create mode 100755 elasticsearch-0.90.13/elasticsearch-base/build create mode 100755 elasticsearch-0.90.13/elasticsearch-base/files/elasticsearch.yml create mode 100755 elasticsearch-0.90.13/elasticsearch-base/files/logging.yml diff --git a/build/build_all_elasticsearch.sh b/build/build_all_elasticsearch.sh new file mode 100755 index 0000000..a2b477f --- /dev/null +++ b/build/build_all_elasticsearch.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +if [[ "$USER" != "root" ]]; then + echo "please run as: sudo $0" + exit 1 +fi + +CURDIR=$(pwd) +BASEDIR=$(cd $(dirname $0); pwd)"/.." +dir_list=( "elasticsearch-0.90.13" ) + +export IMAGE_PREFIX="htaox/" + +# NOTE: the order matters but this is the right one +for i in ${dir_list[@]}; do + echo building $i; + cd ${BASEDIR}/$i + cat build + ./build +done +cd $CURDIR diff --git a/deploy/deploy_elasticsearch.sh b/deploy/deploy_elasticsearch.sh new file mode 100755 index 0000000..bbfc379 --- /dev/null +++ b/deploy/deploy_elasticsearch.sh @@ -0,0 +1,97 @@ +#!/bin/bash + +DEBUG=0 +BASEDIR=$(cd $(dirname $0); pwd) + +elasticsearch_images=( "htaox/elasticsearch:0.90.13") +NAMESERVER_IMAGE="amplab/dnsmasq-precise" + +start_shell=0 +VOLUME_MAP="" + +image_type="?" +image_version="?" +NUM_WORKERS=2 + +source $BASEDIR/start_nameserver.sh +source $BASEDIR/start_elasticsearch_cluster.sh + +function check_root() { + if [[ "$USER" != "root" ]]; then + echo "please run as: sudo $0" + exit 1 + fi +} + +function print_help() { + echo "usage: $0 -i [-w <#workers>] [-v ] [-c]" + echo "" + echo " image: elasticsearch image from:" + echo -n " " + for i in ${elasticsearch_images[@]}; do + echo -n " $i" + done + echo "" +} + +function parse_options() { + while getopts "i:w:cv:h" opt; do + case $opt in + i) + echo "$OPTARG" | grep "elasticsearch:" > /dev/null; + if [ "$?" -eq 0 ]; then + image_type="elasticsearch" + fi + image_name=$(echo "$OPTARG" | awk -F ":" '{print $1}') + image_version=$(echo "$OPTARG" | awk -F ":" '{print $2}') + ;; + w) + NUM_WORKERS=$OPTARG + ;; + h) + print_help + exit 0 + ;; + c) + start_shell=1 + ;; + v) + VOLUME_MAP=$OPTARG + ;; + esac + done + + if [ "$image_type" == "?" ]; then + echo "missing or invalid option: -i " + exit 1 + fi + + if [ ! "$VOLUME_MAP" == "" ]; then + echo "data volume chosen: $VOLUME_MAP" + VOLUME_MAP="-v $VOLUME_MAP:/data" + fi +} + +check_root + +if [[ "$#" -eq 0 ]]; then + print_help + exit 1 +fi + +parse_options $@ + +if [ "$image_type" == "elasticsearch" ]; then + ELASTICSEARCH_VERSION="$image_version" + echo "*** Starting elasticsearch $ELASTICSEARCH_VERSION ***" +else + echo "not starting anything" + exit 0 +fi + +start_nameserver $NAMESERVER_IMAGE +wait_for_nameserver + +start_workers ${image_name}-worker $image_version +sleep 3 +echo "" diff --git a/deploy/start_elasticsearch_cluster.sh b/deploy/start_elasticsearch_cluster.sh new file mode 100755 index 0000000..9ed8dc7 --- /dev/null +++ b/deploy/start_elasticsearch_cluster.sh @@ -0,0 +1,103 @@ +#!/bin/bash + +MASTER=-1 +MASTER_IP= +NUM_REGISTERED_WORKERS=0 +BASEDIR=$(cd $(dirname $0); pwd) +ELASTICSERVERS="${BASEDIR}/elasticservers" + +# starts the Spark/Shark master container +: <<'END' +function start_master() { + echo "starting master container" + if [ "$DEBUG" -gt 0 ]; then + echo sudo docker run -d --dns $NAMESERVER_IP -h master${DOMAINNAME} $VOLUME_MAP $1:$2 + fi + MASTER=$(sudo docker run -d --dns $NAMESERVER_IP -h master${DOMAINNAME} $VOLUME_MAP $1:$2) + + if [ "$MASTER" = "" ]; then + echo "error: could not start master container from image $1:$2" + exit 1 + fi + + echo "started master container: $MASTER" + sleep 3 + MASTER_IP=$(sudo docker logs $MASTER 2>&1 | egrep '^MASTER_IP=' | awk -F= '{print $2}' | tr -d -c "[:digit:] .") + echo "MASTER_IP: $MASTER_IP" + echo "address=\"/master/$MASTER_IP\"" >> $DNSFILE +} +END + +# starts a number of Spark/Shark workers +function start_workers() { + + rm -f $ELASTICSERVERS + + for i in `seq 1 $NUM_WORKERS`; do + echo "starting worker container" + hostname="worker${i}${DOMAINNAME}" + if [ "$DEBUG" -gt 0 ]; then + echo sudo docker run -d --dns $NAMESERVER_IP -h $hostname $VOLUME_MAP $1:$2 ${MASTER_IP} + fi + WORKER=$(sudo docker run -d --dns $NAMESERVER_IP -h $hostname $VOLUME_MAP $1:$2 ${MASTER_IP}) + + if [ "$WORKER" = "" ]; then + echo "error: could not start worker container from image $1:$2" + exit 1 + fi + + echo "started worker container: $WORKER" + sleep 3 + WORKER_IP=$(sudo docker logs $WORKER 2>&1 | egrep '^WORKER_IP=' | awk -F= '{print $2}' | tr -d -c "[:digit:] .") + echo "address=\"/$hostname/$WORKER_IP\"" >> $DNSFILE + echo "WORKER #${i} IP: $WORKER_IP" + echo $WORKER_IP >> $ELASTICSERVERS + echo "WORKER #${i} CLUSTER HEALTH: http://${WORKER_IP}:9200/_plugin/head/" + done +} + +# prints out information on the cluster +: <<'END' +function print_cluster_info() { + BASEDIR=$(cd $(dirname $0); pwd)"/.." + echo "" + echo "***********************************************************************" + echo "" + echo "visit Hadoop Namenode at: http://$MASTER_IP:50070" + echo "ssh into master via: ssh -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP}" + echo "" + echo "/data mapped: $VOLUME_MAP" + echo "" + echo "kill master via: sudo docker kill $MASTER" + echo "***********************************************************************" + echo "" + echo "to enable cluster name resolution add the following line to _the top_ of your host's /etc/resolv.conf:" + echo "nameserver $NAMESERVER_IP" +} +END + +: <<'END' +function get_num_registered_workers() { + sleep 2 + NUM_REGISTERED_WORKERS=$(($NUM_REGISTERED_WORKERS+1)) +} +END + +: <<'END' +function wait_for_master { + echo -n "waiting for master " + sleep 1 + echo "" + echo -n "waiting for nameserver to find master " + check_hostname result master "$MASTER_IP" + until [ "$result" -eq 0 ]; do + echo -n "." + sleep 1 + check_hostname result master "$MASTER_IP" + done + echo "" + sleep 2 +} +END + + diff --git a/elasticsearch-0.90.13/build b/elasticsearch-0.90.13/build new file mode 100755 index 0000000..24737df --- /dev/null +++ b/elasticsearch-0.90.13/build @@ -0,0 +1,13 @@ +#!/bin/bash + +elasticsearch_dirs=$(ls -d elasticsearch*) +dir_list=("$elasticsearch_dirs") + +# NOTE: the order matters but this is the right one +for i in ${dir_list[@]}; do + echo building $i; + cd $i; + cat build; + . build; + cd ..; +done diff --git a/elasticsearch-0.90.13/elasticsearch-base/Dockerfile b/elasticsearch-0.90.13/elasticsearch-base/Dockerfile new file mode 100755 index 0000000..5c0398b --- /dev/null +++ b/elasticsearch-0.90.13/elasticsearch-base/Dockerfile @@ -0,0 +1,43 @@ +# +# ElasticSearch Dockerfile +# +# https://github.com/dockerfile/elasticsearch +# + +# Pull base image. +FROM dockerfile/java + +# Install ElasticSearch. +RUN cd /tmp && wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-0.90.13.tar.gz +RUN cd /tmp && tar xvzf elasticsearch-0.90.13.tar.gz && rm -f elasticsearch-0.90.13.tar.gz +RUN mv /tmp/elasticsearch-0.90.13 /elasticsearch +ENV ES_HOME /elasticsearch + + +# Install elasticsearch-head +RUN /elasticsearch/bin/plugin -install mobz/elasticsearch-head + +# Define mountable directories. +VOLUME ["/data"] + +# Define working directory. +WORKDIR /data + +# Define default command. +ENTRYPOINT ["/elasticsearch/bin/elasticsearch"] + +# Expose ports. +# - 9200: HTTP +# - 9300: transport +EXPOSE 9200 +EXPOSE 9300 + +ADD files /root/elasticsearch_files + +ADD files/elasticsearch.yml $ES_HOME/conf/elasticsearch.yml + +RUN IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }') +RUN echo "ES_IP=$IP" +RUN sed -i "s/@IP@/$IP/g" $ES_HOME/conf/elasticsearch.yml + +RUN $ES_HOME/bin/elasticsearch -f diff --git a/elasticsearch-0.90.13/elasticsearch-base/build b/elasticsearch-0.90.13/elasticsearch-base/build new file mode 100755 index 0000000..f691a2b --- /dev/null +++ b/elasticsearch-0.90.13/elasticsearch-base/build @@ -0,0 +1,4 @@ +rm -f files/files.hash +for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash +mv /tmp/files.hash files/files.hash +sudo docker build -t ${IMAGE_PREFIX}elasticsearch-base:0.90.13 . diff --git a/elasticsearch-0.90.13/elasticsearch-base/files/elasticsearch.yml b/elasticsearch-0.90.13/elasticsearch-base/files/elasticsearch.yml new file mode 100755 index 0000000..c4b47c8 --- /dev/null +++ b/elasticsearch-0.90.13/elasticsearch-base/files/elasticsearch.yml @@ -0,0 +1,377 @@ +##################### ElasticSearch Configuration Example ##################### + +# This file contains an overview of various configuration settings, +# targeted at operations staff. Application developers should +# consult the guide at . +# +# The installation procedure is covered at +# . +# +# ElasticSearch comes with reasonable defaults for most settings, +# so you can try it out without bothering with configuration. +# +# Most of the time, these defaults are just fine for running a production +# cluster. If you're fine-tuning your cluster, or wondering about the +# effect of certain configuration option, please _do ask_ on the +# mailing list or IRC channel [http://elasticsearch.org/community]. + +# Any element in the configuration can be replaced with environment variables +# by placing them in ${...} notation. For example: +# +# node.rack: ${RACK_ENV_VAR} + +# For information on supported formats and syntax for the config file, see +# + + +################################### Cluster ################################### + +# Cluster name identifies your cluster for auto-discovery. If you're running +# multiple clusters on the same network, make sure you're using unique names. +# +cluster.name: elasticsearch + + +#################################### Node ##################################### + +# Node names are generated dynamically on startup, so you're relieved +# from configuring them manually. You can tie this node to a specific name: +# +# node.name: "Franz Kafka" + +# Every node can be configured to allow or deny being eligible as the master, +# and to allow or deny to store the data. +# +# Allow this node to be eligible as a master node (enabled by default): +# +# node.master: true +# +# Allow this node to store data (enabled by default): +# +# node.data: true + +# You can exploit these settings to design advanced cluster topologies. +# +# 1. You want this node to never become a master node, only to hold data. +# This will be the "workhorse" of your cluster. +# +# node.master: false +# node.data: true +# +# 2. You want this node to only serve as a master: to not store any data and +# to have free resources. This will be the "coordinator" of your cluster. +# +# node.master: true +# node.data: false +# +# 3. You want this node to be neither master nor data node, but +# to act as a "search load balancer" (fetching data from nodes, +# aggregating results, etc.) +# +# node.master: false +# node.data: false + +# Use the Cluster Health API [http://localhost:9200/_cluster/health], the +# Node Info API [http://localhost:9200/_cluster/nodes] or GUI tools +# such as , +# , +# and +# to inspect the cluster state. + +# A node can have generic attributes associated with it, which can later be used +# for customized shard allocation filtering, or allocation awareness. An attribute +# is a simple key value pair, similar to node.key: value, here is an example: +# +# node.rack: rack314 + +# By default, multiple nodes are allowed to start from the same installation location +# to disable it, set the following: +# node.max_local_storage_nodes: 1 + + +#################################### Index #################################### + +# You can set a number of options (such as shard/replica options, mapping +# or analyzer definitions, translog settings, ...) for indices globally, +# in this file. +# +# Note, that it makes more sense to configure index settings specifically for +# a certain index, either when creating it or by using the index templates API. +# +# See and +# +# for more information. + +# Set the number of shards (splits) of an index (5 by default): +# +# index.number_of_shards: 5 + +# Set the number of replicas (additional copies) of an index (1 by default): +# +# index.number_of_replicas: 1 + +# Note, that for development on a local machine, with small indices, it usually +# makes sense to "disable" the distributed features: +# +# index.number_of_shards: 1 +# index.number_of_replicas: 0 + +# These settings directly affect the performance of index and search operations +# in your cluster. Assuming you have enough machines to hold shards and +# replicas, the rule of thumb is: +# +# 1. Having more *shards* enhances the _indexing_ performance and allows to +# _distribute_ a big index across machines. +# 2. Having more *replicas* enhances the _search_ performance and improves the +# cluster _availability_. +# +# The "number_of_shards" is a one-time setting for an index. +# +# The "number_of_replicas" can be increased or decreased anytime, +# by using the Index Update Settings API. +# +# ElasticSearch takes care about load balancing, relocating, gathering the +# results from nodes, etc. Experiment with different settings to fine-tune +# your setup. + +# Use the Index Status API () to inspect +# the index status. + + +#################################### Paths #################################### + +# Path to directory containing configuration (this file and logging.yml): +# +# path.conf: /path/to/conf + +# Path to directory where to store index data allocated for this node. +# +# path.data: /path/to/data +# +# Can optionally include more than one location, causing data to be striped across +# the locations (a la RAID 0) on a file level, favouring locations with most free +# space on creation. For example: +# +# path.data: /path/to/data1,/path/to/data2 + +# Path to temporary files: +# +# path.work: /path/to/work + +# Path to log files: +# +# path.logs: /path/to/logs + +# Path to where plugins are installed: +# +# path.plugins: /path/to/plugins + + +#################################### Plugin ################################### + +# If a plugin listed here is not installed for current node, the node will not start. +# +# plugin.mandatory: mapper-attachments,lang-groovy + + +################################### Memory #################################### + +# ElasticSearch performs poorly when JVM starts swapping: you should ensure that +# it _never_ swaps. +# +# Set this property to true to lock the memory: +# +# bootstrap.mlockall: true + +# Make sure that the ES_MIN_MEM and ES_MAX_MEM environment variables are set +# to the same value, and that the machine has enough memory to allocate +# for ElasticSearch, leaving enough memory for the operating system itself. +# +# You should also make sure that the ElasticSearch process is allowed to lock +# the memory, eg. by using `ulimit -l unlimited`. + + +############################## Network And HTTP ############################### + +# ElasticSearch, by default, binds itself to the 0.0.0.0 address, and listens +# on port [9200-9300] for HTTP traffic and on port [9300-9400] for node-to-node +# communication. (the range means that if the port is busy, it will automatically +# try the next port). + +# Set the bind address specifically (IPv4 or IPv6): +# +# network.bind_host: 192.168.0.1 + +# Set the address other nodes will use to communicate with this node. If not +# set, it is automatically derived. It must point to an actual IP address. +# +# network.publish_host: 192.168.0.1 + +# Set both 'bind_host' and 'publish_host': +# +network.host: @IP@ + +# Set a custom port for the node to node communication (9300 by default): +# +# transport.tcp.port: 9300 + +# Enable compression for all communication between nodes (disabled by default): +# +# transport.tcp.compress: true + +# Set a custom port to listen for HTTP traffic: +# +# http.port: 9200 + +# Set a custom allowed content length: +# +# http.max_content_length: 100mb + +# Disable HTTP completely: +# +# http.enabled: false + + +################################### Gateway ################################### + +# The gateway allows for persisting the cluster state between full cluster +# restarts. Every change to the state (such as adding an index) will be stored +# in the gateway, and when the cluster starts up for the first time, +# it will read its state from the gateway. + +# There are several types of gateway implementations. For more information, see +# . + +# The default gateway type is the "local" gateway (recommended): +# +# gateway.type: local + +# Settings below control how and when to start the initial recovery process on +# a full cluster restart (to reuse as much local data as possible when using shared +# gateway). + +# Allow recovery process after N nodes in a cluster are up: +# +# gateway.recover_after_nodes: 1 + +# Set the timeout to initiate the recovery process, once the N nodes +# from previous setting are up (accepts time value): +# +# gateway.recover_after_time: 5m + +# Set how many nodes are expected in this cluster. Once these N nodes +# are up (and recover_after_nodes is met), begin recovery process immediately +# (without waiting for recover_after_time to expire): +# +# gateway.expected_nodes: 2 + + +############################# Recovery Throttling ############################# + +# These settings allow to control the process of shards allocation between +# nodes during initial recovery, replica allocation, rebalancing, +# or when adding and removing nodes. + +# Set the number of concurrent recoveries happening on a node: +# +# 1. During the initial recovery +# +# cluster.routing.allocation.node_initial_primaries_recoveries: 4 +# +# 2. During adding/removing nodes, rebalancing, etc +# +# cluster.routing.allocation.node_concurrent_recoveries: 2 + +# Set to throttle throughput when recovering (eg. 100mb, by default 20mb): +# +# indices.recovery.max_bytes_per_sec: 20mb + +# Set to limit the number of open concurrent streams when +# recovering a shard from a peer: +# +# indices.recovery.concurrent_streams: 5 + + +################################## Discovery ################################## + +# Discovery infrastructure ensures nodes can be found within a cluster +# and master node is elected. Multicast discovery is the default. + +# Set to ensure a node sees N other master eligible nodes to be considered +# operational within the cluster. Its recommended to set it to a higher value +# than 1 when running more than 2 nodes in the cluster. +# +# discovery.zen.minimum_master_nodes: 1 + +# Set the time to wait for ping responses from other nodes when discovering. +# Set this option to a higher value on a slow or congested network +# to minimize discovery failures: +# +# discovery.zen.ping.timeout: 3s + +# For more information, see +# + +# Unicast discovery allows to explicitly control which nodes will be used +# to discover the cluster. It can be used when multicast is not present, +# or to restrict the cluster communication-wise. +# +# 1. Disable multicast discovery (enabled by default): +# +# discovery.zen.ping.multicast.enabled: false +# +# 2. Configure an initial list of master nodes in the cluster +# to perform discovery when new nodes (master or data) are started: +# +# discovery.zen.ping.unicast.hosts: ["host1", "host2:port"] + +# EC2 discovery allows to use AWS EC2 API in order to perform discovery. +# +# You have to install the cloud-aws plugin for enabling the EC2 discovery. +# +# For more information, see +# +# +# See +# for a step-by-step tutorial. + +# GCE discovery allows to use Google Compute Engine API in order to perform discovery. +# +# You have to install the cloud-gce plugin for enabling the GCE discovery. +# +# For more information, see . + +# Azure discovery allows to use Azure API in order to perform discovery. +# +# You have to install the cloud-azure plugin for enabling the Azure discovery. +# +# For more information, see . + +################################## Slow Log ################################## + +# Shard level query and fetch threshold logging. + +#index.search.slowlog.threshold.query.warn: 10s +#index.search.slowlog.threshold.query.info: 5s +#index.search.slowlog.threshold.query.debug: 2s +#index.search.slowlog.threshold.query.trace: 500ms + +#index.search.slowlog.threshold.fetch.warn: 1s +#index.search.slowlog.threshold.fetch.info: 800ms +#index.search.slowlog.threshold.fetch.debug: 500ms +#index.search.slowlog.threshold.fetch.trace: 200ms + +#index.indexing.slowlog.threshold.index.warn: 10s +#index.indexing.slowlog.threshold.index.info: 5s +#index.indexing.slowlog.threshold.index.debug: 2s +#index.indexing.slowlog.threshold.index.trace: 500ms + +################################## GC Logging ################################ + +#monitor.jvm.gc.young.warn: 1000ms +#monitor.jvm.gc.young.info: 700ms +#monitor.jvm.gc.young.debug: 400ms + +#monitor.jvm.gc.old.warn: 10s +#monitor.jvm.gc.old.info: 5s +#monitor.jvm.gc.old.debug: 2s diff --git a/elasticsearch-0.90.13/elasticsearch-base/files/logging.yml b/elasticsearch-0.90.13/elasticsearch-base/files/logging.yml new file mode 100755 index 0000000..9e00d01 --- /dev/null +++ b/elasticsearch-0.90.13/elasticsearch-base/files/logging.yml @@ -0,0 +1,56 @@ +# you can override this using by setting a system property, for example -Des.logger.level=DEBUG +es.logger.level: INFO +rootLogger: ${es.logger.level}, console, file +logger: + # log action execution errors for easier debugging + action: DEBUG + # reduce the logging for aws, too much is logged under the default INFO + com.amazonaws: WARN + + # gateway + #gateway: DEBUG + #index.gateway: DEBUG + + # peer shard recovery + #indices.recovery: DEBUG + + # discovery + #discovery: TRACE + + index.search.slowlog: TRACE, index_search_slow_log_file + index.indexing.slowlog: TRACE, index_indexing_slow_log_file + +additivity: + index.search.slowlog: false + index.indexing.slowlog: false + +appender: + console: + type: console + layout: + type: consolePattern + conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n" + + file: + type: dailyRollingFile + file: ${path.logs}/${cluster.name}.log + datePattern: "'.'yyyy-MM-dd" + layout: + type: pattern + conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n" + + index_search_slow_log_file: + type: dailyRollingFile + file: ${path.logs}/${cluster.name}_index_search_slowlog.log + datePattern: "'.'yyyy-MM-dd" + layout: + type: pattern + conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n" + + index_indexing_slow_log_file: + type: dailyRollingFile + file: ${path.logs}/${cluster.name}_index_indexing_slowlog.log + datePattern: "'.'yyyy-MM-dd" + layout: + type: pattern + conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n" From 911f2658d7b541184ac7986ecb78c051d233f871 Mon Sep 17 00:00:00 2001 From: htaox Date: Tue, 6 May 2014 16:20:48 -0400 Subject: [PATCH 32/97] Changed Elasticsearch start routine --- elasticsearch-0.90.13/elasticsearch-base/Dockerfile | 3 ++- elasticsearch-0.90.13/elasticsearch-base/files/default_cmd | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100755 elasticsearch-0.90.13/elasticsearch-base/files/default_cmd diff --git a/elasticsearch-0.90.13/elasticsearch-base/Dockerfile b/elasticsearch-0.90.13/elasticsearch-base/Dockerfile index 5c0398b..a883885 100755 --- a/elasticsearch-0.90.13/elasticsearch-base/Dockerfile +++ b/elasticsearch-0.90.13/elasticsearch-base/Dockerfile @@ -40,4 +40,5 @@ RUN IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) RUN echo "ES_IP=$IP" RUN sed -i "s/@IP@/$IP/g" $ES_HOME/conf/elasticsearch.yml -RUN $ES_HOME/bin/elasticsearch -f +RUN chmod 700 /root/elasticsearch_files/default_cmd +CMD ["/root/elasticsearch_files/default_cmd"] \ No newline at end of file diff --git a/elasticsearch-0.90.13/elasticsearch-base/files/default_cmd b/elasticsearch-0.90.13/elasticsearch-base/files/default_cmd new file mode 100755 index 0000000..4739f52 --- /dev/null +++ b/elasticsearch-0.90.13/elasticsearch-base/files/default_cmd @@ -0,0 +1,7 @@ +${ES_HOME}/bin/elasticsearch -d + +while [ 1 ]; +do + tail -f "${ES_HOME}/logs"/*.out + sleep 1 +done \ No newline at end of file From a672fe823e1dad4da186e16f4476b85890d02cca Mon Sep 17 00:00:00 2001 From: htaox Date: Wed, 7 May 2014 16:46:24 -0400 Subject: [PATCH 33/97] Separated elasticsearch containers into master and workers --- deploy/deploy_elasticsearch.sh | 4 ++++ deploy/start_elasticsearch_cluster.sh | 18 ++++++------------ .../elasticsearch-base/Dockerfile | 3 --- .../elasticsearch-base/files/elasticsearch.yml | 5 ++++- .../elasticsearch-master/Dockerfile | 17 +++++++++++++++++ .../elasticsearch-master/build | 4 ++++ .../elasticsearch-worker/Dockerfile | 17 +++++++++++++++++ .../elasticsearch-worker/build | 4 ++++ 8 files changed, 56 insertions(+), 16 deletions(-) create mode 100755 elasticsearch-0.90.13/elasticsearch-master/Dockerfile create mode 100755 elasticsearch-0.90.13/elasticsearch-master/build create mode 100755 elasticsearch-0.90.13/elasticsearch-worker/Dockerfile create mode 100755 elasticsearch-0.90.13/elasticsearch-worker/build diff --git a/deploy/deploy_elasticsearch.sh b/deploy/deploy_elasticsearch.sh index bbfc379..4e58b52 100755 --- a/deploy/deploy_elasticsearch.sh +++ b/deploy/deploy_elasticsearch.sh @@ -92,6 +92,10 @@ fi start_nameserver $NAMESERVER_IMAGE wait_for_nameserver +start_master ${image_name}-master $image_version +wait_for_master + start_workers ${image_name}-worker $image_version sleep 3 echo "" +print_cluster_info \ No newline at end of file diff --git a/deploy/start_elasticsearch_cluster.sh b/deploy/start_elasticsearch_cluster.sh index 9ed8dc7..6e5f41d 100755 --- a/deploy/start_elasticsearch_cluster.sh +++ b/deploy/start_elasticsearch_cluster.sh @@ -6,8 +6,7 @@ NUM_REGISTERED_WORKERS=0 BASEDIR=$(cd $(dirname $0); pwd) ELASTICSERVERS="${BASEDIR}/elasticservers" -# starts the Spark/Shark master container -: <<'END' +# starts the elasticsearch master container function start_master() { echo "starting master container" if [ "$DEBUG" -gt 0 ]; then @@ -26,9 +25,8 @@ function start_master() { echo "MASTER_IP: $MASTER_IP" echo "address=\"/master/$MASTER_IP\"" >> $DNSFILE } -END -# starts a number of Spark/Shark workers +# starts a number of elasticsearch workers function start_workers() { rm -f $ELASTICSERVERS @@ -57,24 +55,22 @@ function start_workers() { } # prints out information on the cluster -: <<'END' function print_cluster_info() { BASEDIR=$(cd $(dirname $0); pwd)"/.." echo "" echo "***********************************************************************" echo "" - echo "visit Hadoop Namenode at: http://$MASTER_IP:50070" - echo "ssh into master via: ssh -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP}" - echo "" echo "/data mapped: $VOLUME_MAP" echo "" - echo "kill master via: sudo docker kill $MASTER" + echo "MASTER_IP: ${MASTER_IP}" + echo "" + echo "WORKERS:" + cat -n $ELASTICSERVERS echo "***********************************************************************" echo "" echo "to enable cluster name resolution add the following line to _the top_ of your host's /etc/resolv.conf:" echo "nameserver $NAMESERVER_IP" } -END : <<'END' function get_num_registered_workers() { @@ -83,7 +79,6 @@ function get_num_registered_workers() { } END -: <<'END' function wait_for_master { echo -n "waiting for master " sleep 1 @@ -98,6 +93,5 @@ function wait_for_master { echo "" sleep 2 } -END diff --git a/elasticsearch-0.90.13/elasticsearch-base/Dockerfile b/elasticsearch-0.90.13/elasticsearch-base/Dockerfile index a883885..4d67413 100755 --- a/elasticsearch-0.90.13/elasticsearch-base/Dockerfile +++ b/elasticsearch-0.90.13/elasticsearch-base/Dockerfile @@ -39,6 +39,3 @@ ADD files/elasticsearch.yml $ES_HOME/conf/elasticsearch.yml RUN IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }') RUN echo "ES_IP=$IP" RUN sed -i "s/@IP@/$IP/g" $ES_HOME/conf/elasticsearch.yml - -RUN chmod 700 /root/elasticsearch_files/default_cmd -CMD ["/root/elasticsearch_files/default_cmd"] \ No newline at end of file diff --git a/elasticsearch-0.90.13/elasticsearch-base/files/elasticsearch.yml b/elasticsearch-0.90.13/elasticsearch-base/files/elasticsearch.yml index c4b47c8..e69bed4 100755 --- a/elasticsearch-0.90.13/elasticsearch-base/files/elasticsearch.yml +++ b/elasticsearch-0.90.13/elasticsearch-base/files/elasticsearch.yml @@ -50,6 +50,9 @@ cluster.name: elasticsearch # # node.data: true +node.master: @MASTER@ +node.data: @DATA@ + # You can exploit these settings to design advanced cluster topologies. # # 1. You want this node to never become a master node, only to hold data. @@ -181,7 +184,7 @@ cluster.name: elasticsearch # # Set this property to true to lock the memory: # -# bootstrap.mlockall: true +bootstrap.mlockall: true # Make sure that the ES_MIN_MEM and ES_MAX_MEM environment variables are set # to the same value, and that the machine has enough memory to allocate diff --git a/elasticsearch-0.90.13/elasticsearch-master/Dockerfile b/elasticsearch-0.90.13/elasticsearch-master/Dockerfile new file mode 100755 index 0000000..93cb348 --- /dev/null +++ b/elasticsearch-0.90.13/elasticsearch-master/Dockerfile @@ -0,0 +1,17 @@ +# +# ElasticSearch Dockerfile +# +# https://github.com/dockerfile/elasticsearch +# + +# Pull base image. +FROM htaox/elasticsearch-base:0.90.13 +MAINTAINER htaox htaox@hotmail.com + +RUN sed -i "s/@MASTER@/true/g" $ES_HOME/conf/elasticsearch.yml +RUN sed -i "s/@DATA@/false/g" $ES_HOME/conf/elasticsearch.yml + +ENV ES_HEAP_SIZE 1024 + +RUN chmod 700 /root/elasticsearch_files/default_cmd +CMD ["/root/elasticsearch_files/default_cmd"] \ No newline at end of file diff --git a/elasticsearch-0.90.13/elasticsearch-master/build b/elasticsearch-0.90.13/elasticsearch-master/build new file mode 100755 index 0000000..d37a0b1 --- /dev/null +++ b/elasticsearch-0.90.13/elasticsearch-master/build @@ -0,0 +1,4 @@ +rm -f files/files.hash +for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash +mv /tmp/files.hash files/files.hash +sudo docker build -t ${IMAGE_PREFIX}elasticsearch-master:0.90.13 . diff --git a/elasticsearch-0.90.13/elasticsearch-worker/Dockerfile b/elasticsearch-0.90.13/elasticsearch-worker/Dockerfile new file mode 100755 index 0000000..1cd6c30 --- /dev/null +++ b/elasticsearch-0.90.13/elasticsearch-worker/Dockerfile @@ -0,0 +1,17 @@ +# +# ElasticSearch Dockerfile +# +# https://github.com/dockerfile/elasticsearch +# + +# Pull base image. +FROM htaox/elasticsearch-base:0.90.13 +MAINTAINER htaox htaox@hotmail.com + +RUN sed -i "s/@MASTER@/false/g" $ES_HOME/conf/elasticsearch.yml +RUN sed -i "s/@DATA@/true/g" $ES_HOME/conf/elasticsearch.yml + +ENV ES_HEAP_SIZE 2048 + +RUN chmod 700 /root/elasticsearch_files/default_cmd +CMD ["/root/elasticsearch_files/default_cmd"] \ No newline at end of file diff --git a/elasticsearch-0.90.13/elasticsearch-worker/build b/elasticsearch-0.90.13/elasticsearch-worker/build new file mode 100755 index 0000000..116155b --- /dev/null +++ b/elasticsearch-0.90.13/elasticsearch-worker/build @@ -0,0 +1,4 @@ +rm -f files/files.hash +for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash +mv /tmp/files.hash files/files.hash +sudo docker build -t ${IMAGE_PREFIX}elasticsearch-worker:0.90.13 . From 0e2cc6ccaa2900800b50b544c619b8b5749d9564 Mon Sep 17 00:00:00 2001 From: htaox Date: Thu, 8 May 2014 09:44:25 -0400 Subject: [PATCH 34/97] Remove unnecessary CMD in master and worker Dockerfile --- elasticsearch-0.90.13/elasticsearch-base/files/default_cmd | 7 ------- elasticsearch-0.90.13/elasticsearch-master/Dockerfile | 5 +---- elasticsearch-0.90.13/elasticsearch-worker/Dockerfile | 5 +---- 3 files changed, 2 insertions(+), 15 deletions(-) delete mode 100755 elasticsearch-0.90.13/elasticsearch-base/files/default_cmd diff --git a/elasticsearch-0.90.13/elasticsearch-base/files/default_cmd b/elasticsearch-0.90.13/elasticsearch-base/files/default_cmd deleted file mode 100755 index 4739f52..0000000 --- a/elasticsearch-0.90.13/elasticsearch-base/files/default_cmd +++ /dev/null @@ -1,7 +0,0 @@ -${ES_HOME}/bin/elasticsearch -d - -while [ 1 ]; -do - tail -f "${ES_HOME}/logs"/*.out - sleep 1 -done \ No newline at end of file diff --git a/elasticsearch-0.90.13/elasticsearch-master/Dockerfile b/elasticsearch-0.90.13/elasticsearch-master/Dockerfile index 93cb348..361a954 100755 --- a/elasticsearch-0.90.13/elasticsearch-master/Dockerfile +++ b/elasticsearch-0.90.13/elasticsearch-master/Dockerfile @@ -11,7 +11,4 @@ MAINTAINER htaox htaox@hotmail.com RUN sed -i "s/@MASTER@/true/g" $ES_HOME/conf/elasticsearch.yml RUN sed -i "s/@DATA@/false/g" $ES_HOME/conf/elasticsearch.yml -ENV ES_HEAP_SIZE 1024 - -RUN chmod 700 /root/elasticsearch_files/default_cmd -CMD ["/root/elasticsearch_files/default_cmd"] \ No newline at end of file +ENV ES_HEAP_SIZE 1g diff --git a/elasticsearch-0.90.13/elasticsearch-worker/Dockerfile b/elasticsearch-0.90.13/elasticsearch-worker/Dockerfile index 1cd6c30..177fe8c 100755 --- a/elasticsearch-0.90.13/elasticsearch-worker/Dockerfile +++ b/elasticsearch-0.90.13/elasticsearch-worker/Dockerfile @@ -11,7 +11,4 @@ MAINTAINER htaox htaox@hotmail.com RUN sed -i "s/@MASTER@/false/g" $ES_HOME/conf/elasticsearch.yml RUN sed -i "s/@DATA@/true/g" $ES_HOME/conf/elasticsearch.yml -ENV ES_HEAP_SIZE 2048 - -RUN chmod 700 /root/elasticsearch_files/default_cmd -CMD ["/root/elasticsearch_files/default_cmd"] \ No newline at end of file +ENV ES_HEAP_SIZE 2g From 74447218cd00c049eeaa0866bba23ff191dff277 Mon Sep 17 00:00:00 2001 From: "U-NYUMC\\taoh02" Date: Thu, 8 May 2014 12:11:57 -0400 Subject: [PATCH 35/97] Added init.d routine --- .../elasticsearch-base/Dockerfile | 18 ++- .../elasticsearch-base/files/default_cmd | 150 ++++++++++++++++++ .../elasticsearch-master/Dockerfile | 6 +- .../elasticsearch-worker/Dockerfile | 6 +- 4 files changed, 172 insertions(+), 8 deletions(-) create mode 100755 elasticsearch-0.90.13/elasticsearch-base/files/default_cmd diff --git a/elasticsearch-0.90.13/elasticsearch-base/Dockerfile b/elasticsearch-0.90.13/elasticsearch-base/Dockerfile index 4d67413..cdd39aa 100755 --- a/elasticsearch-0.90.13/elasticsearch-base/Dockerfile +++ b/elasticsearch-0.90.13/elasticsearch-base/Dockerfile @@ -13,10 +13,16 @@ RUN cd /tmp && tar xvzf elasticsearch-0.90.13.tar.gz && rm -f elasticsearch-0.90 RUN mv /tmp/elasticsearch-0.90.13 /elasticsearch ENV ES_HOME /elasticsearch - # Install elasticsearch-head RUN /elasticsearch/bin/plugin -install mobz/elasticsearch-head +# Create elasticsearch user +ENV ES_USER elasticsearch +ENV ES_GROUP elasticsearch + +RUN addgroup elasticsearch +RUN adduser --ingroup elasticsearch elasticsearch + # Define mountable directories. VOLUME ["/data"] @@ -24,7 +30,7 @@ VOLUME ["/data"] WORKDIR /data # Define default command. -ENTRYPOINT ["/elasticsearch/bin/elasticsearch"] +# ENTRYPOINT ["/elasticsearch/bin/elasticsearch"] # Expose ports. # - 9200: HTTP @@ -33,9 +39,9 @@ EXPOSE 9200 EXPOSE 9300 ADD files /root/elasticsearch_files - +ADD files/default_cmd $ES_HOME/bin/default_cmd ADD files/elasticsearch.yml $ES_HOME/conf/elasticsearch.yml -RUN IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }') -RUN echo "ES_IP=$IP" -RUN sed -i "s/@IP@/$IP/g" $ES_HOME/conf/elasticsearch.yml +RUN chmod 700 files/default_cmd $ES_HOME/bin + +RUN chown elasticsearch.elasticsearch $ES_HOME diff --git a/elasticsearch-0.90.13/elasticsearch-base/files/default_cmd b/elasticsearch-0.90.13/elasticsearch-base/files/default_cmd new file mode 100755 index 0000000..3dd9733 --- /dev/null +++ b/elasticsearch-0.90.13/elasticsearch-base/files/default_cmd @@ -0,0 +1,150 @@ +#!/bin/sh +# +# /etc/init.d/elasticsearch -- startup script for Elasticsearch +# +# Written by Miquel van Smoorenburg . +# Modified for Debian GNU/Linux by Ian Murdock . +# Modified for Tomcat by Stefan Gybas . +# Modified for Tomcat6 by Thierry Carrez . +# Additional improvements by Jason Brittain . +# Modified by Nicolas Huray for ElasticSearch . +# +### BEGIN INIT INFO +# Provides: elasticsearch +# Required-Start: $network $remote_fs $named +# Required-Stop: $network $remote_fs $named +# Default-Start: 2 3 4 5 +# Default-Stop: 0 1 6 +# Short-Description: Starts elasticsearch +# Description: Starts elasticsearch using start-stop-daemon +### END INIT INFO + +PATH=/bin:/usr/bin:/sbin:/usr/sbin +NAME=elasticsearch +DESC="ElasticSearch Server" +DEFAULT=/etc/default/$NAME + +if [ `id -u` -ne 0 ]; then + echo "You need root privileges to run this script" + exit 1 +fi + + +. /lib/lsb/init-functions + +if [ -r /etc/default/rcS ]; then + . /etc/default/rcS +fi + + +# The following variables can be overwritten in $DEFAULT + +# Run ElasticSearch as this user ID and group ID +ES_USER=elasticsearch +ES_GROUP=elasticsearch + +# The first existing directory is used for JAVA_HOME (if JAVA_HOME is not defined in $DEFAULT) +JDK_DIRS="/usr/lib/jvm/java-7-oracle /usr/lib/jvm/java-7-openjdk /usr/lib/jvm/java-7-openjdk-amd64/ /usr/lib/jvm/java-7-openjdk-armhf /usr/lib/jvm/java-7-openjdk-i386/ /usr/lib/jvm/java-6-sun /usr/lib/jvm/java-6-openjdk /usr/lib/jvm/java-6-openjdk-amd64 /usr/lib/jvm/java-6-openjdk-armhf /usr/lib/jvm/java-6-openjdk-i386 /usr/lib/jvm/default-java" + +# Look for the right JVM to use +for jdir in $JDK_DIRS; do + if [ -r "$jdir/bin/java" -a -z "${JAVA_HOME}" ]; then + JAVA_HOME="$jdir" + fi +done +export JAVA_HOME + +# Directory where the ElasticSearch binary distribution resides +ES_HOME=/usr/share/$NAME + +# Heap Size (defaults to 256m min, 1g max) +ES_HEAP_SIZE=2g + +# Heap new generation +#ES_HEAP_NEWSIZE= + +# max direct memory +#ES_DIRECT_SIZE= + +# Additional Java OPTS +#ES_JAVA_OPTS= + +# ElasticSearch log directory +# LOG_DIR=/var/log/$NAME +LOG_DIR=$ES_HOME/log + +# ElasticSearch data directory +# DATA_DIR=/var/lib/$NAME +DATA_DIR=$ES_HOME/lib + +# ElasticSearch work directory +# WORK_DIR=/tmp/$NAME +WORK_DIR=$ES_HOME/tmp + +# ElasticSearch configuration directory +# CONF_DIR=/etc/$NAME +CONF_DIR=$ES_HOME/conf + +# ElasticSearch configuration file (elasticsearch.yml) +CONF_FILE=$CONF_DIR/elasticsearch.yml + +# Maximum number of VMA (Virtual Memory Areas) a process can own +MAX_MAP_COUNT=65535 + +# End of variables that can be overwritten in $DEFAULT + +# overwrite settings from default file +if [ -f "$DEFAULT" ]; then + . "$DEFAULT" +fi + +# Define other required variables +# PID_FILE=/var/run/$NAME.pid +PID_FILE=$ES_HOME/run/$NAME.pid + +DAEMON=$ES_HOME/bin/elasticsearch +DAEMON_OPTS="-p $PID_FILE -Des.default.config=$CONF_FILE -Des.default.path.home=$ES_HOME -Des.default.path.logs=$LOG_DIR -Des.default.path.data=$DATA_DIR -Des.default.path.work=$WORK_DIR -Des.default.path.conf=$CONF_DIR" + +export ES_HEAP_SIZE +export ES_HEAP_NEWSIZE +export ES_DIRECT_SIZE +export ES_JAVA_OPTS + +# Check DAEMON exists +test -x $DAEMON || exit 0 + +checkJava() { + if [ -x "$JAVA_HOME/bin/java" ]; then + JAVA="$JAVA_HOME/bin/java" + else + JAVA=`which java` + fi + + if [ ! -x "$JAVA" ]; then + echo "Could not find any executable java binary. Please install java in your PATH or set JAVA_HOME" + exit 1 + fi +} + +checkJava + +log_daemon_msg "Starting $DESC" + +pid=`pidofproc -p $PID_FILE elasticsearch` +if [ -n "$pid" ] ; then + log_begin_msg "Already running." + log_end_msg 0 + exit 0 +fi + +# Prepare environment +mkdir -p "$LOG_DIR" "$DATA_DIR" "$WORK_DIR" && chown "$ES_USER":"$ES_GROUP" "$LOG_DIR" "$DATA_DIR" "$WORK_DIR" +mkdir $ES_HOME/run +touch "$PID_FILE" && chown "$ES_USER":"$ES_GROUP" "$PID_FILE" + +if [ -n "$MAX_MAP_COUNT" ]; then + sysctl -q -w vm.max_map_count=$MAX_MAP_COUNT +fi + +# Start Daemon +exec sudo -u $ES_USER $DAEMON $DAEMON_OPTS \ No newline at end of file diff --git a/elasticsearch-0.90.13/elasticsearch-master/Dockerfile b/elasticsearch-0.90.13/elasticsearch-master/Dockerfile index 361a954..4b9b3f3 100755 --- a/elasticsearch-0.90.13/elasticsearch-master/Dockerfile +++ b/elasticsearch-0.90.13/elasticsearch-master/Dockerfile @@ -8,7 +8,11 @@ FROM htaox/elasticsearch-base:0.90.13 MAINTAINER htaox htaox@hotmail.com +RUN IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }') +RUN echo "ES_IP=$IP" +RUN sed -i "s/@IP@/$IP/g" $ES_HOME/conf/elasticsearch.yml + RUN sed -i "s/@MASTER@/true/g" $ES_HOME/conf/elasticsearch.yml RUN sed -i "s/@DATA@/false/g" $ES_HOME/conf/elasticsearch.yml -ENV ES_HEAP_SIZE 1g +CMD ["/elasticsearch/bin/default_cmd"] \ No newline at end of file diff --git a/elasticsearch-0.90.13/elasticsearch-worker/Dockerfile b/elasticsearch-0.90.13/elasticsearch-worker/Dockerfile index 177fe8c..dc37205 100755 --- a/elasticsearch-0.90.13/elasticsearch-worker/Dockerfile +++ b/elasticsearch-0.90.13/elasticsearch-worker/Dockerfile @@ -8,7 +8,11 @@ FROM htaox/elasticsearch-base:0.90.13 MAINTAINER htaox htaox@hotmail.com +RUN IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }') +RUN echo "ES_IP=$IP" +RUN sed -i "s/@IP@/$IP/g" $ES_HOME/conf/elasticsearch.yml + RUN sed -i "s/@MASTER@/false/g" $ES_HOME/conf/elasticsearch.yml RUN sed -i "s/@DATA@/true/g" $ES_HOME/conf/elasticsearch.yml -ENV ES_HEAP_SIZE 2g +CMD ["/elasticsearch/bin/default_cmd"] From 876c954684109cb33551b153ccb3949d122411e3 Mon Sep 17 00:00:00 2001 From: "U-NYUMC\\taoh02" Date: Thu, 8 May 2014 12:28:44 -0400 Subject: [PATCH 36/97] Use useradd instead of adduser --- elasticsearch-0.90.13/elasticsearch-base/Dockerfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/elasticsearch-0.90.13/elasticsearch-base/Dockerfile b/elasticsearch-0.90.13/elasticsearch-base/Dockerfile index cdd39aa..4a0499d 100755 --- a/elasticsearch-0.90.13/elasticsearch-base/Dockerfile +++ b/elasticsearch-0.90.13/elasticsearch-base/Dockerfile @@ -21,7 +21,8 @@ ENV ES_USER elasticsearch ENV ES_GROUP elasticsearch RUN addgroup elasticsearch -RUN adduser --ingroup elasticsearch elasticsearch +RUN useradd elasticsearch -g elasticsearch +# RUN adduser --ingroup elasticsearch elasticsearch # Define mountable directories. VOLUME ["/data"] @@ -42,6 +43,6 @@ ADD files /root/elasticsearch_files ADD files/default_cmd $ES_HOME/bin/default_cmd ADD files/elasticsearch.yml $ES_HOME/conf/elasticsearch.yml -RUN chmod 700 files/default_cmd $ES_HOME/bin +RUN chmod 700 $ES_HOME/bin/default_cmd RUN chown elasticsearch.elasticsearch $ES_HOME From 945d3caff3410a4bf77b29c0df4b945271a777f6 Mon Sep 17 00:00:00 2001 From: "U-NYUMC\\taoh02" Date: Thu, 8 May 2014 14:09:14 -0400 Subject: [PATCH 37/97] Added execuatble scripts for master & worker --- .../elasticsearch-master/Dockerfile | 11 +++-------- .../files/run_elasticsearch_master.sh | 15 +++++++++++++++ .../elasticsearch-worker/Dockerfile | 11 +++-------- .../files/run_elasticsearch_worker.sh | 15 +++++++++++++++ 4 files changed, 36 insertions(+), 16 deletions(-) create mode 100755 elasticsearch-0.90.13/elasticsearch-master/files/run_elasticsearch_master.sh create mode 100755 elasticsearch-0.90.13/elasticsearch-worker/files/run_elasticsearch_worker.sh diff --git a/elasticsearch-0.90.13/elasticsearch-master/Dockerfile b/elasticsearch-0.90.13/elasticsearch-master/Dockerfile index 4b9b3f3..b7a0bbb 100755 --- a/elasticsearch-0.90.13/elasticsearch-master/Dockerfile +++ b/elasticsearch-0.90.13/elasticsearch-master/Dockerfile @@ -8,11 +8,6 @@ FROM htaox/elasticsearch-base:0.90.13 MAINTAINER htaox htaox@hotmail.com -RUN IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }') -RUN echo "ES_IP=$IP" -RUN sed -i "s/@IP@/$IP/g" $ES_HOME/conf/elasticsearch.yml - -RUN sed -i "s/@MASTER@/true/g" $ES_HOME/conf/elasticsearch.yml -RUN sed -i "s/@DATA@/false/g" $ES_HOME/conf/elasticsearch.yml - -CMD ["/elasticsearch/bin/default_cmd"] \ No newline at end of file +ADD files /root/elasticsearch_master_files +RUN chmod 700 /root/elasticsearch_master_files/run_elasticsearch_master.sh +CMD ["/root/elasticsearch_master_files/run_elasticsearch_master.sh"] \ No newline at end of file diff --git a/elasticsearch-0.90.13/elasticsearch-master/files/run_elasticsearch_master.sh b/elasticsearch-0.90.13/elasticsearch-master/files/run_elasticsearch_master.sh new file mode 100755 index 0000000..9e5741f --- /dev/null +++ b/elasticsearch-0.90.13/elasticsearch-master/files/run_elasticsearch_master.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +env + +echo 'Starting Elasticsearch Master' + +IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }') +echo "ES_MASTER_IP=$IP" + +sed -i "s/@IP@/$IP/g" $ES_HOME/conf/elasticsearch.yml + +sed -i "s/@MASTER@/true/g" $ES_HOME/conf/elasticsearch.yml +sed -i "s/@DATA@/false/g" $ES_HOME/conf/elasticsearch.yml + +$ES_HOME/bin/default_cmd diff --git a/elasticsearch-0.90.13/elasticsearch-worker/Dockerfile b/elasticsearch-0.90.13/elasticsearch-worker/Dockerfile index dc37205..be95d2e 100755 --- a/elasticsearch-0.90.13/elasticsearch-worker/Dockerfile +++ b/elasticsearch-0.90.13/elasticsearch-worker/Dockerfile @@ -8,11 +8,6 @@ FROM htaox/elasticsearch-base:0.90.13 MAINTAINER htaox htaox@hotmail.com -RUN IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }') -RUN echo "ES_IP=$IP" -RUN sed -i "s/@IP@/$IP/g" $ES_HOME/conf/elasticsearch.yml - -RUN sed -i "s/@MASTER@/false/g" $ES_HOME/conf/elasticsearch.yml -RUN sed -i "s/@DATA@/true/g" $ES_HOME/conf/elasticsearch.yml - -CMD ["/elasticsearch/bin/default_cmd"] +ADD files /root/elasticsearch_worker_files +RUN chmod 700 /root/elasticsearch_worker_files/run_elasticsearch_worker.sh +CMD ["/root/elasticsearch_worker_files/run_elasticsearch_worker.sh"] diff --git a/elasticsearch-0.90.13/elasticsearch-worker/files/run_elasticsearch_worker.sh b/elasticsearch-0.90.13/elasticsearch-worker/files/run_elasticsearch_worker.sh new file mode 100755 index 0000000..4bbcba7 --- /dev/null +++ b/elasticsearch-0.90.13/elasticsearch-worker/files/run_elasticsearch_worker.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +env + +echo 'Starting Elasticsearch Worker' + +IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }') +echo "ES_WORKER_IP=$IP" + +sed -i "s/@IP@/$IP/g" $ES_HOME/conf/elasticsearch.yml + +sed -i "s/@MASTER@/false/g" $ES_HOME/conf/elasticsearch.yml +sed -i "s/@DATA@/true/g" $ES_HOME/conf/elasticsearch.yml + +$ES_HOME/bin/default_cmd \ No newline at end of file From a82a11e3c7fdfb87ce3482a1ab022398a11e35b2 Mon Sep 17 00:00:00 2001 From: "U-NYUMC\\taoh02" Date: Thu, 8 May 2014 15:48:03 -0400 Subject: [PATCH 38/97] Removed carriage return from default_cmd --- .../elasticsearch-base/files/default_cmd | 298 +++++++++--------- 1 file changed, 149 insertions(+), 149 deletions(-) diff --git a/elasticsearch-0.90.13/elasticsearch-base/files/default_cmd b/elasticsearch-0.90.13/elasticsearch-base/files/default_cmd index 3dd9733..ce0ffaa 100755 --- a/elasticsearch-0.90.13/elasticsearch-base/files/default_cmd +++ b/elasticsearch-0.90.13/elasticsearch-base/files/default_cmd @@ -1,150 +1,150 @@ -#!/bin/sh -# -# /etc/init.d/elasticsearch -- startup script for Elasticsearch -# -# Written by Miquel van Smoorenburg . -# Modified for Debian GNU/Linux by Ian Murdock . -# Modified for Tomcat by Stefan Gybas . -# Modified for Tomcat6 by Thierry Carrez . -# Additional improvements by Jason Brittain . -# Modified by Nicolas Huray for ElasticSearch . -# -### BEGIN INIT INFO -# Provides: elasticsearch -# Required-Start: $network $remote_fs $named -# Required-Stop: $network $remote_fs $named -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: Starts elasticsearch -# Description: Starts elasticsearch using start-stop-daemon -### END INIT INFO - -PATH=/bin:/usr/bin:/sbin:/usr/sbin -NAME=elasticsearch -DESC="ElasticSearch Server" -DEFAULT=/etc/default/$NAME - -if [ `id -u` -ne 0 ]; then - echo "You need root privileges to run this script" - exit 1 -fi - - -. /lib/lsb/init-functions - -if [ -r /etc/default/rcS ]; then - . /etc/default/rcS -fi - - -# The following variables can be overwritten in $DEFAULT - -# Run ElasticSearch as this user ID and group ID -ES_USER=elasticsearch -ES_GROUP=elasticsearch - -# The first existing directory is used for JAVA_HOME (if JAVA_HOME is not defined in $DEFAULT) -JDK_DIRS="/usr/lib/jvm/java-7-oracle /usr/lib/jvm/java-7-openjdk /usr/lib/jvm/java-7-openjdk-amd64/ /usr/lib/jvm/java-7-openjdk-armhf /usr/lib/jvm/java-7-openjdk-i386/ /usr/lib/jvm/java-6-sun /usr/lib/jvm/java-6-openjdk /usr/lib/jvm/java-6-openjdk-amd64 /usr/lib/jvm/java-6-openjdk-armhf /usr/lib/jvm/java-6-openjdk-i386 /usr/lib/jvm/default-java" - -# Look for the right JVM to use -for jdir in $JDK_DIRS; do - if [ -r "$jdir/bin/java" -a -z "${JAVA_HOME}" ]; then - JAVA_HOME="$jdir" - fi -done -export JAVA_HOME - -# Directory where the ElasticSearch binary distribution resides -ES_HOME=/usr/share/$NAME - -# Heap Size (defaults to 256m min, 1g max) -ES_HEAP_SIZE=2g - -# Heap new generation -#ES_HEAP_NEWSIZE= - -# max direct memory -#ES_DIRECT_SIZE= - -# Additional Java OPTS -#ES_JAVA_OPTS= - -# ElasticSearch log directory -# LOG_DIR=/var/log/$NAME -LOG_DIR=$ES_HOME/log - -# ElasticSearch data directory -# DATA_DIR=/var/lib/$NAME -DATA_DIR=$ES_HOME/lib - -# ElasticSearch work directory -# WORK_DIR=/tmp/$NAME -WORK_DIR=$ES_HOME/tmp - -# ElasticSearch configuration directory -# CONF_DIR=/etc/$NAME -CONF_DIR=$ES_HOME/conf - -# ElasticSearch configuration file (elasticsearch.yml) -CONF_FILE=$CONF_DIR/elasticsearch.yml - -# Maximum number of VMA (Virtual Memory Areas) a process can own -MAX_MAP_COUNT=65535 - -# End of variables that can be overwritten in $DEFAULT - -# overwrite settings from default file -if [ -f "$DEFAULT" ]; then - . "$DEFAULT" -fi - -# Define other required variables -# PID_FILE=/var/run/$NAME.pid -PID_FILE=$ES_HOME/run/$NAME.pid - -DAEMON=$ES_HOME/bin/elasticsearch -DAEMON_OPTS="-p $PID_FILE -Des.default.config=$CONF_FILE -Des.default.path.home=$ES_HOME -Des.default.path.logs=$LOG_DIR -Des.default.path.data=$DATA_DIR -Des.default.path.work=$WORK_DIR -Des.default.path.conf=$CONF_DIR" - -export ES_HEAP_SIZE -export ES_HEAP_NEWSIZE -export ES_DIRECT_SIZE -export ES_JAVA_OPTS - -# Check DAEMON exists -test -x $DAEMON || exit 0 - -checkJava() { - if [ -x "$JAVA_HOME/bin/java" ]; then - JAVA="$JAVA_HOME/bin/java" - else - JAVA=`which java` - fi - - if [ ! -x "$JAVA" ]; then - echo "Could not find any executable java binary. Please install java in your PATH or set JAVA_HOME" - exit 1 - fi -} - -checkJava - -log_daemon_msg "Starting $DESC" - -pid=`pidofproc -p $PID_FILE elasticsearch` -if [ -n "$pid" ] ; then - log_begin_msg "Already running." - log_end_msg 0 - exit 0 -fi - -# Prepare environment -mkdir -p "$LOG_DIR" "$DATA_DIR" "$WORK_DIR" && chown "$ES_USER":"$ES_GROUP" "$LOG_DIR" "$DATA_DIR" "$WORK_DIR" -mkdir $ES_HOME/run -touch "$PID_FILE" && chown "$ES_USER":"$ES_GROUP" "$PID_FILE" - -if [ -n "$MAX_MAP_COUNT" ]; then - sysctl -q -w vm.max_map_count=$MAX_MAP_COUNT -fi - -# Start Daemon +#!/bin/sh +# +# /etc/init.d/elasticsearch -- startup script for Elasticsearch +# +# Written by Miquel van Smoorenburg . +# Modified for Debian GNU/Linux by Ian Murdock . +# Modified for Tomcat by Stefan Gybas . +# Modified for Tomcat6 by Thierry Carrez . +# Additional improvements by Jason Brittain . +# Modified by Nicolas Huray for ElasticSearch . +# +### BEGIN INIT INFO +# Provides: elasticsearch +# Required-Start: $network $remote_fs $named +# Required-Stop: $network $remote_fs $named +# Default-Start: 2 3 4 5 +# Default-Stop: 0 1 6 +# Short-Description: Starts elasticsearch +# Description: Starts elasticsearch using start-stop-daemon +### END INIT INFO + +PATH=/bin:/usr/bin:/sbin:/usr/sbin +NAME=elasticsearch +DESC="ElasticSearch Server" +DEFAULT=/etc/default/$NAME + +if [ `id -u` -ne 0 ]; then + echo "You need root privileges to run this script" + exit 1 +fi + + +. /lib/lsb/init-functions + +if [ -r /etc/default/rcS ]; then + . /etc/default/rcS +fi + + +# The following variables can be overwritten in $DEFAULT + +# Run ElasticSearch as this user ID and group ID +ES_USER=elasticsearch +ES_GROUP=elasticsearch + +# The first existing directory is used for JAVA_HOME (if JAVA_HOME is not defined in $DEFAULT) +JDK_DIRS="/usr/lib/jvm/java-7-oracle /usr/lib/jvm/java-7-openjdk /usr/lib/jvm/java-7-openjdk-amd64/ /usr/lib/jvm/java-7-openjdk-armhf /usr/lib/jvm/java-7-openjdk-i386/ /usr/lib/jvm/java-6-sun /usr/lib/jvm/java-6-openjdk /usr/lib/jvm/java-6-openjdk-amd64 /usr/lib/jvm/java-6-openjdk-armhf /usr/lib/jvm/java-6-openjdk-i386 /usr/lib/jvm/default-java" + +# Look for the right JVM to use +for jdir in $JDK_DIRS; do + if [ -r "$jdir/bin/java" -a -z "${JAVA_HOME}" ]; then + JAVA_HOME="$jdir" + fi +done +export JAVA_HOME + +# Directory where the ElasticSearch binary distribution resides +ES_HOME=/usr/share/$NAME + +# Heap Size (defaults to 256m min, 1g max) +ES_HEAP_SIZE=2g + +# Heap new generation +#ES_HEAP_NEWSIZE= + +# max direct memory +#ES_DIRECT_SIZE= + +# Additional Java OPTS +#ES_JAVA_OPTS= + +# ElasticSearch log directory +# LOG_DIR=/var/log/$NAME +LOG_DIR=$ES_HOME/log + +# ElasticSearch data directory +# DATA_DIR=/var/lib/$NAME +DATA_DIR=$ES_HOME/lib + +# ElasticSearch work directory +# WORK_DIR=/tmp/$NAME +WORK_DIR=$ES_HOME/tmp + +# ElasticSearch configuration directory +# CONF_DIR=/etc/$NAME +CONF_DIR=$ES_HOME/conf + +# ElasticSearch configuration file (elasticsearch.yml) +CONF_FILE=$CONF_DIR/elasticsearch.yml + +# Maximum number of VMA (Virtual Memory Areas) a process can own +MAX_MAP_COUNT=65535 + +# End of variables that can be overwritten in $DEFAULT + +# overwrite settings from default file +if [ -f "$DEFAULT" ]; then + . "$DEFAULT" +fi + +# Define other required variables +# PID_FILE=/var/run/$NAME.pid +PID_FILE=$ES_HOME/run/$NAME.pid + +DAEMON=$ES_HOME/bin/elasticsearch +DAEMON_OPTS="-p $PID_FILE -Des.default.config=$CONF_FILE -Des.default.path.home=$ES_HOME -Des.default.path.logs=$LOG_DIR -Des.default.path.data=$DATA_DIR -Des.default.path.work=$WORK_DIR -Des.default.path.conf=$CONF_DIR" + +export ES_HEAP_SIZE +export ES_HEAP_NEWSIZE +export ES_DIRECT_SIZE +export ES_JAVA_OPTS + +# Check DAEMON exists +test -x $DAEMON || exit 0 + +checkJava() { + if [ -x "$JAVA_HOME/bin/java" ]; then + JAVA="$JAVA_HOME/bin/java" + else + JAVA=`which java` + fi + + if [ ! -x "$JAVA" ]; then + echo "Could not find any executable java binary. Please install java in your PATH or set JAVA_HOME" + exit 1 + fi +} + +checkJava + +log_daemon_msg "Starting $DESC" + +pid=`pidofproc -p $PID_FILE elasticsearch` +if [ -n "$pid" ] ; then + log_begin_msg "Already running." + log_end_msg 0 + exit 0 +fi + +# Prepare environment +mkdir -p "$LOG_DIR" "$DATA_DIR" "$WORK_DIR" && chown "$ES_USER":"$ES_GROUP" "$LOG_DIR" "$DATA_DIR" "$WORK_DIR" +mkdir $ES_HOME/run +touch "$PID_FILE" && chown "$ES_USER":"$ES_GROUP" "$PID_FILE" + +if [ -n "$MAX_MAP_COUNT" ]; then + sysctl -q -w vm.max_map_count=$MAX_MAP_COUNT +fi + +# Start Daemon exec sudo -u $ES_USER $DAEMON $DAEMON_OPTS \ No newline at end of file From 5af26dcc68b1977be05658563dbf01468b833d21 Mon Sep 17 00:00:00 2001 From: "U-NYUMC\\taoh02" Date: Thu, 8 May 2014 16:23:53 -0400 Subject: [PATCH 39/97] Run elasticsearch in the foreground --- .../elasticsearch-base/files/default_cmd | 150 ------------------ 1 file changed, 150 deletions(-) delete mode 100755 elasticsearch-0.90.13/elasticsearch-base/files/default_cmd diff --git a/elasticsearch-0.90.13/elasticsearch-base/files/default_cmd b/elasticsearch-0.90.13/elasticsearch-base/files/default_cmd deleted file mode 100755 index ce0ffaa..0000000 --- a/elasticsearch-0.90.13/elasticsearch-base/files/default_cmd +++ /dev/null @@ -1,150 +0,0 @@ -#!/bin/sh -# -# /etc/init.d/elasticsearch -- startup script for Elasticsearch -# -# Written by Miquel van Smoorenburg . -# Modified for Debian GNU/Linux by Ian Murdock . -# Modified for Tomcat by Stefan Gybas . -# Modified for Tomcat6 by Thierry Carrez . -# Additional improvements by Jason Brittain . -# Modified by Nicolas Huray for ElasticSearch . -# -### BEGIN INIT INFO -# Provides: elasticsearch -# Required-Start: $network $remote_fs $named -# Required-Stop: $network $remote_fs $named -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: Starts elasticsearch -# Description: Starts elasticsearch using start-stop-daemon -### END INIT INFO - -PATH=/bin:/usr/bin:/sbin:/usr/sbin -NAME=elasticsearch -DESC="ElasticSearch Server" -DEFAULT=/etc/default/$NAME - -if [ `id -u` -ne 0 ]; then - echo "You need root privileges to run this script" - exit 1 -fi - - -. /lib/lsb/init-functions - -if [ -r /etc/default/rcS ]; then - . /etc/default/rcS -fi - - -# The following variables can be overwritten in $DEFAULT - -# Run ElasticSearch as this user ID and group ID -ES_USER=elasticsearch -ES_GROUP=elasticsearch - -# The first existing directory is used for JAVA_HOME (if JAVA_HOME is not defined in $DEFAULT) -JDK_DIRS="/usr/lib/jvm/java-7-oracle /usr/lib/jvm/java-7-openjdk /usr/lib/jvm/java-7-openjdk-amd64/ /usr/lib/jvm/java-7-openjdk-armhf /usr/lib/jvm/java-7-openjdk-i386/ /usr/lib/jvm/java-6-sun /usr/lib/jvm/java-6-openjdk /usr/lib/jvm/java-6-openjdk-amd64 /usr/lib/jvm/java-6-openjdk-armhf /usr/lib/jvm/java-6-openjdk-i386 /usr/lib/jvm/default-java" - -# Look for the right JVM to use -for jdir in $JDK_DIRS; do - if [ -r "$jdir/bin/java" -a -z "${JAVA_HOME}" ]; then - JAVA_HOME="$jdir" - fi -done -export JAVA_HOME - -# Directory where the ElasticSearch binary distribution resides -ES_HOME=/usr/share/$NAME - -# Heap Size (defaults to 256m min, 1g max) -ES_HEAP_SIZE=2g - -# Heap new generation -#ES_HEAP_NEWSIZE= - -# max direct memory -#ES_DIRECT_SIZE= - -# Additional Java OPTS -#ES_JAVA_OPTS= - -# ElasticSearch log directory -# LOG_DIR=/var/log/$NAME -LOG_DIR=$ES_HOME/log - -# ElasticSearch data directory -# DATA_DIR=/var/lib/$NAME -DATA_DIR=$ES_HOME/lib - -# ElasticSearch work directory -# WORK_DIR=/tmp/$NAME -WORK_DIR=$ES_HOME/tmp - -# ElasticSearch configuration directory -# CONF_DIR=/etc/$NAME -CONF_DIR=$ES_HOME/conf - -# ElasticSearch configuration file (elasticsearch.yml) -CONF_FILE=$CONF_DIR/elasticsearch.yml - -# Maximum number of VMA (Virtual Memory Areas) a process can own -MAX_MAP_COUNT=65535 - -# End of variables that can be overwritten in $DEFAULT - -# overwrite settings from default file -if [ -f "$DEFAULT" ]; then - . "$DEFAULT" -fi - -# Define other required variables -# PID_FILE=/var/run/$NAME.pid -PID_FILE=$ES_HOME/run/$NAME.pid - -DAEMON=$ES_HOME/bin/elasticsearch -DAEMON_OPTS="-p $PID_FILE -Des.default.config=$CONF_FILE -Des.default.path.home=$ES_HOME -Des.default.path.logs=$LOG_DIR -Des.default.path.data=$DATA_DIR -Des.default.path.work=$WORK_DIR -Des.default.path.conf=$CONF_DIR" - -export ES_HEAP_SIZE -export ES_HEAP_NEWSIZE -export ES_DIRECT_SIZE -export ES_JAVA_OPTS - -# Check DAEMON exists -test -x $DAEMON || exit 0 - -checkJava() { - if [ -x "$JAVA_HOME/bin/java" ]; then - JAVA="$JAVA_HOME/bin/java" - else - JAVA=`which java` - fi - - if [ ! -x "$JAVA" ]; then - echo "Could not find any executable java binary. Please install java in your PATH or set JAVA_HOME" - exit 1 - fi -} - -checkJava - -log_daemon_msg "Starting $DESC" - -pid=`pidofproc -p $PID_FILE elasticsearch` -if [ -n "$pid" ] ; then - log_begin_msg "Already running." - log_end_msg 0 - exit 0 -fi - -# Prepare environment -mkdir -p "$LOG_DIR" "$DATA_DIR" "$WORK_DIR" && chown "$ES_USER":"$ES_GROUP" "$LOG_DIR" "$DATA_DIR" "$WORK_DIR" -mkdir $ES_HOME/run -touch "$PID_FILE" && chown "$ES_USER":"$ES_GROUP" "$PID_FILE" - -if [ -n "$MAX_MAP_COUNT" ]; then - sysctl -q -w vm.max_map_count=$MAX_MAP_COUNT -fi - -# Start Daemon -exec sudo -u $ES_USER $DAEMON $DAEMON_OPTS \ No newline at end of file From a1bc35bb25201c733cae1b93e28e97036726be5d Mon Sep 17 00:00:00 2001 From: "U-NYUMC\\taoh02" Date: Thu, 8 May 2014 16:32:20 -0400 Subject: [PATCH 40/97] Update master & worker run scripts --- elasticsearch-0.90.13/elasticsearch-base/Dockerfile | 5 +---- .../elasticsearch-master/files/run_elasticsearch_master.sh | 4 +++- .../elasticsearch-worker/files/run_elasticsearch_worker.sh | 4 +++- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/elasticsearch-0.90.13/elasticsearch-base/Dockerfile b/elasticsearch-0.90.13/elasticsearch-base/Dockerfile index 4a0499d..cfdc27e 100755 --- a/elasticsearch-0.90.13/elasticsearch-base/Dockerfile +++ b/elasticsearch-0.90.13/elasticsearch-base/Dockerfile @@ -40,9 +40,6 @@ EXPOSE 9200 EXPOSE 9300 ADD files /root/elasticsearch_files -ADD files/default_cmd $ES_HOME/bin/default_cmd ADD files/elasticsearch.yml $ES_HOME/conf/elasticsearch.yml -RUN chmod 700 $ES_HOME/bin/default_cmd - -RUN chown elasticsearch.elasticsearch $ES_HOME +RUN chown -R elasticsearch.elasticsearch $ES_HOME diff --git a/elasticsearch-0.90.13/elasticsearch-master/files/run_elasticsearch_master.sh b/elasticsearch-0.90.13/elasticsearch-master/files/run_elasticsearch_master.sh index 9e5741f..9433557 100755 --- a/elasticsearch-0.90.13/elasticsearch-master/files/run_elasticsearch_master.sh +++ b/elasticsearch-0.90.13/elasticsearch-master/files/run_elasticsearch_master.sh @@ -12,4 +12,6 @@ sed -i "s/@IP@/$IP/g" $ES_HOME/conf/elasticsearch.yml sed -i "s/@MASTER@/true/g" $ES_HOME/conf/elasticsearch.yml sed -i "s/@DATA@/false/g" $ES_HOME/conf/elasticsearch.yml -$ES_HOME/bin/default_cmd +ENV ES_HEAP_SIZE 1g + +sudo -u elasticsearch $ES_HOME/bin/elasticsearch -f diff --git a/elasticsearch-0.90.13/elasticsearch-worker/files/run_elasticsearch_worker.sh b/elasticsearch-0.90.13/elasticsearch-worker/files/run_elasticsearch_worker.sh index 4bbcba7..39be825 100755 --- a/elasticsearch-0.90.13/elasticsearch-worker/files/run_elasticsearch_worker.sh +++ b/elasticsearch-0.90.13/elasticsearch-worker/files/run_elasticsearch_worker.sh @@ -12,4 +12,6 @@ sed -i "s/@IP@/$IP/g" $ES_HOME/conf/elasticsearch.yml sed -i "s/@MASTER@/false/g" $ES_HOME/conf/elasticsearch.yml sed -i "s/@DATA@/true/g" $ES_HOME/conf/elasticsearch.yml -$ES_HOME/bin/default_cmd \ No newline at end of file +ENV ES_HEAP_SIZE 2g + +sudo -u elasticsearch $ES_HOME/bin/elasticsearch -f \ No newline at end of file From f2870f6f8803875bb52ec7980408939b737ee97d Mon Sep 17 00:00:00 2001 From: "U-NYUMC\\taoh02" Date: Thu, 8 May 2014 16:43:48 -0400 Subject: [PATCH 41/97] Using unix line-endings for bash scripts --- .../files/run_elasticsearch_master.sh | 34 +++++++++---------- .../files/run_elasticsearch_worker.sh | 32 ++++++++--------- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/elasticsearch-0.90.13/elasticsearch-master/files/run_elasticsearch_master.sh b/elasticsearch-0.90.13/elasticsearch-master/files/run_elasticsearch_master.sh index 9433557..7474466 100755 --- a/elasticsearch-0.90.13/elasticsearch-master/files/run_elasticsearch_master.sh +++ b/elasticsearch-0.90.13/elasticsearch-master/files/run_elasticsearch_master.sh @@ -1,17 +1,17 @@ -#!/bin/bash - -env - -echo 'Starting Elasticsearch Master' - -IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }') -echo "ES_MASTER_IP=$IP" - -sed -i "s/@IP@/$IP/g" $ES_HOME/conf/elasticsearch.yml - -sed -i "s/@MASTER@/true/g" $ES_HOME/conf/elasticsearch.yml -sed -i "s/@DATA@/false/g" $ES_HOME/conf/elasticsearch.yml - -ENV ES_HEAP_SIZE 1g - -sudo -u elasticsearch $ES_HOME/bin/elasticsearch -f +#!/bin/bash + +env + +echo 'Starting Elasticsearch Master' + +IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }') +echo "ES_MASTER_IP=$IP" + +sed -i "s/@IP@/$IP/g" $ES_HOME/conf/elasticsearch.yml + +sed -i "s/@MASTER@/true/g" $ES_HOME/conf/elasticsearch.yml +sed -i "s/@DATA@/false/g" $ES_HOME/conf/elasticsearch.yml + +ENV ES_HEAP_SIZE 1g + +sudo -u elasticsearch $ES_HOME/bin/elasticsearch -f diff --git a/elasticsearch-0.90.13/elasticsearch-worker/files/run_elasticsearch_worker.sh b/elasticsearch-0.90.13/elasticsearch-worker/files/run_elasticsearch_worker.sh index 39be825..23ac7e9 100755 --- a/elasticsearch-0.90.13/elasticsearch-worker/files/run_elasticsearch_worker.sh +++ b/elasticsearch-0.90.13/elasticsearch-worker/files/run_elasticsearch_worker.sh @@ -1,17 +1,17 @@ -#!/bin/bash - -env - -echo 'Starting Elasticsearch Worker' - -IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }') -echo "ES_WORKER_IP=$IP" - -sed -i "s/@IP@/$IP/g" $ES_HOME/conf/elasticsearch.yml - -sed -i "s/@MASTER@/false/g" $ES_HOME/conf/elasticsearch.yml -sed -i "s/@DATA@/true/g" $ES_HOME/conf/elasticsearch.yml - -ENV ES_HEAP_SIZE 2g - +#!/bin/bash + +env + +echo 'Starting Elasticsearch Worker' + +IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }') +echo "ES_WORKER_IP=$IP" + +sed -i "s/@IP@/$IP/g" $ES_HOME/conf/elasticsearch.yml + +sed -i "s/@MASTER@/false/g" $ES_HOME/conf/elasticsearch.yml +sed -i "s/@DATA@/true/g" $ES_HOME/conf/elasticsearch.yml + +ENV ES_HEAP_SIZE 2g + sudo -u elasticsearch $ES_HOME/bin/elasticsearch -f \ No newline at end of file From 38dae61db99e7978d5a04a969a1468105ed1c134 Mon Sep 17 00:00:00 2001 From: "U-NYUMC\\taoh02" Date: Thu, 8 May 2014 17:15:16 -0400 Subject: [PATCH 42/97] Replace ES_MASTER_IP with MASTER_IP and ES_WORKER_IP with WORKER_IP --- .../elasticsearch-master/files/run_elasticsearch_master.sh | 2 +- .../elasticsearch-worker/files/run_elasticsearch_worker.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/elasticsearch-0.90.13/elasticsearch-master/files/run_elasticsearch_master.sh b/elasticsearch-0.90.13/elasticsearch-master/files/run_elasticsearch_master.sh index 7474466..de574d2 100755 --- a/elasticsearch-0.90.13/elasticsearch-master/files/run_elasticsearch_master.sh +++ b/elasticsearch-0.90.13/elasticsearch-master/files/run_elasticsearch_master.sh @@ -5,7 +5,7 @@ env echo 'Starting Elasticsearch Master' IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }') -echo "ES_MASTER_IP=$IP" +echo "MASTER_IP=$IP" sed -i "s/@IP@/$IP/g" $ES_HOME/conf/elasticsearch.yml diff --git a/elasticsearch-0.90.13/elasticsearch-worker/files/run_elasticsearch_worker.sh b/elasticsearch-0.90.13/elasticsearch-worker/files/run_elasticsearch_worker.sh index 23ac7e9..0e43212 100755 --- a/elasticsearch-0.90.13/elasticsearch-worker/files/run_elasticsearch_worker.sh +++ b/elasticsearch-0.90.13/elasticsearch-worker/files/run_elasticsearch_worker.sh @@ -5,7 +5,7 @@ env echo 'Starting Elasticsearch Worker' IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }') -echo "ES_WORKER_IP=$IP" +echo "WORKER_IP=$IP" sed -i "s/@IP@/$IP/g" $ES_HOME/conf/elasticsearch.yml From f97b7504357c4c63f4e7c0c610853ea8565cb553 Mon Sep 17 00:00:00 2001 From: "U-NYUMC\\taoh02" Date: Thu, 8 May 2014 17:28:03 -0400 Subject: [PATCH 43/97] There should be no arguments for docker run worker --- deploy/start_elasticsearch_cluster.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deploy/start_elasticsearch_cluster.sh b/deploy/start_elasticsearch_cluster.sh index 6e5f41d..59110f6 100755 --- a/deploy/start_elasticsearch_cluster.sh +++ b/deploy/start_elasticsearch_cluster.sh @@ -35,9 +35,9 @@ function start_workers() { echo "starting worker container" hostname="worker${i}${DOMAINNAME}" if [ "$DEBUG" -gt 0 ]; then - echo sudo docker run -d --dns $NAMESERVER_IP -h $hostname $VOLUME_MAP $1:$2 ${MASTER_IP} + echo sudo docker run -d --dns $NAMESERVER_IP -h $hostname $VOLUME_MAP $1:$2 fi - WORKER=$(sudo docker run -d --dns $NAMESERVER_IP -h $hostname $VOLUME_MAP $1:$2 ${MASTER_IP}) + WORKER=$(sudo docker run -d --dns $NAMESERVER_IP -h $hostname $VOLUME_MAP $1:$2) if [ "$WORKER" = "" ]; then echo "error: could not start worker container from image $1:$2" From 1a41308b9e66cf67b518f9abdef60f64fd514396 Mon Sep 17 00:00:00 2001 From: htaox Date: Fri, 9 May 2014 10:03:16 -0400 Subject: [PATCH 44/97] Added startup JAVA opts for master & worker --- deploy/kill_all.sh | 6 +++--- .../elasticsearch-master/files/run_elasticsearch_master.sh | 4 ++-- .../elasticsearch-worker/files/run_elasticsearch_worker.sh | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/deploy/kill_all.sh b/deploy/kill_all.sh index 5b1e62c..5644b7b 100755 --- a/deploy/kill_all.sh +++ b/deploy/kill_all.sh @@ -24,9 +24,9 @@ if [[ "$clustertype" == "nameserver" ]]; then nameserver=$(sudo docker ps | grep dnsmasq_files | awk '{print $1}' | tr '\n' ' ') kill_containers "$nameserver" else - master=$(sudo docker ps | grep ${clustertype}_master | awk '{print $1}' | tr '\n' ' ') - workers=$(sudo docker ps | grep ${clustertype}_worker | awk '{print $1}' | tr '\n' ' ') - shells=$(sudo docker ps | grep ${clustertype}_shell | awk '{print $1}' | tr '\n' ' ') + master=$(sudo docker ps | grep ${clustertype}.*master | awk '{print $1}' | tr '\n' ' ') + workers=$(sudo docker ps | grep ${clustertype}.*worker | awk '{print $1}' | tr '\n' ' ') + shells=$(sudo docker ps | grep ${clustertype}.*shell | awk '{print $1}' | tr '\n' ' ') kill_containers "$master" kill_containers "$workers" kill_containers "$shells" diff --git a/elasticsearch-0.90.13/elasticsearch-master/files/run_elasticsearch_master.sh b/elasticsearch-0.90.13/elasticsearch-master/files/run_elasticsearch_master.sh index de574d2..6628da9 100755 --- a/elasticsearch-0.90.13/elasticsearch-master/files/run_elasticsearch_master.sh +++ b/elasticsearch-0.90.13/elasticsearch-master/files/run_elasticsearch_master.sh @@ -12,6 +12,6 @@ sed -i "s/@IP@/$IP/g" $ES_HOME/conf/elasticsearch.yml sed -i "s/@MASTER@/true/g" $ES_HOME/conf/elasticsearch.yml sed -i "s/@DATA@/false/g" $ES_HOME/conf/elasticsearch.yml -ENV ES_HEAP_SIZE 1g +ES_HEAP_SIZE=1g -sudo -u elasticsearch $ES_HOME/bin/elasticsearch -f +sudo -u elasticsearch $ES_HOME/bin/elasticsearch -f -Des.config=$ES_HOME/conf/elasticsearch.yml -Xms$ES_HEAP_SIZE -Xmx$ES_HEAP_SIZE diff --git a/elasticsearch-0.90.13/elasticsearch-worker/files/run_elasticsearch_worker.sh b/elasticsearch-0.90.13/elasticsearch-worker/files/run_elasticsearch_worker.sh index 0e43212..9c1a574 100755 --- a/elasticsearch-0.90.13/elasticsearch-worker/files/run_elasticsearch_worker.sh +++ b/elasticsearch-0.90.13/elasticsearch-worker/files/run_elasticsearch_worker.sh @@ -12,6 +12,6 @@ sed -i "s/@IP@/$IP/g" $ES_HOME/conf/elasticsearch.yml sed -i "s/@MASTER@/false/g" $ES_HOME/conf/elasticsearch.yml sed -i "s/@DATA@/true/g" $ES_HOME/conf/elasticsearch.yml -ENV ES_HEAP_SIZE 2g +ES_HEAP_SIZE=2g -sudo -u elasticsearch $ES_HOME/bin/elasticsearch -f \ No newline at end of file +sudo -u elasticsearch $ES_HOME/bin/elasticsearch -f -Des.config=$ES_HOME/conf/elasticsearch.yml -Xms$ES_HEAP_SIZE -Xmx$ES_HEAP_SIZE From 2c8b47b9df7e68f880a2311576a701f13e308255 Mon Sep 17 00:00:00 2001 From: htaox Date: Fri, 9 May 2014 10:18:55 -0400 Subject: [PATCH 45/97] Add README for elasticsearch cluster setup --- README-elasticsearch.md | 50 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100755 README-elasticsearch.md diff --git a/README-elasticsearch.md b/README-elasticsearch.md new file mode 100755 index 0000000..f5d9d48 --- /dev/null +++ b/README-elasticsearch.md @@ -0,0 +1,50 @@ +#### Deploy the HBase fully-distributed cluster + +
+$ NUMBER_OF_DATANODES=3
+$ sudo deploy/deploy_elasticsearch.sh -i htaox/elasticsearch:0.90.13 -w $NUMBER_OF_DATANODES
+
+ +This will (typically) result in the following setup: + +
+NAMESERVER                 10.1.0.3
+ELASTICSEARCH MASTER       10.1.0.4
+ELASTICSEARCH DATANODE     10.1.0.5
+ELASTICSEARCH DATANODE     10.1.0.6
+ELASTICSEARCH DATANODE     10.1.0.7
+
+ +#### Kill the HBase cluster + +
+$ sudo deploy/kill_all.sh elasticsearch
+$ sudo deploy/kill_all.sh nameserver
+
+ +#### After HBase cluster is killed, cleanup +
+$ sudo docker rm `sudo docker ps -a -q`
+$ sudo docker images | grep "" | awk '{print $3}' | xargs sudo docker rmi
+
+ +#### Build locally + +__Download the scripts__ +
+$ git clone -b add-elasticsearch https://github.com/htaox/docker-scripts.git
+
+ +__Change file permissions__ +
    
+$ cd ~/docker-scripts
+$ chmod a+x build/build_all_elasticsearch.sh
+$ chmod a+x elasticsearch-0.90.13/build
+$ chmod a+x deploy/deploy_elasticsearch.sh
+
+ +__Build__ +
    
+$ sudo build/build_all_elasticsearch.sh
+
+ From 96e1ae0e765199d20b4c3a2729a226aecf056fb7 Mon Sep 17 00:00:00 2001 From: htaox Date: Fri, 9 May 2014 10:28:56 -0400 Subject: [PATCH 46/97] Fix typo --- README-elasticsearch.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README-elasticsearch.md b/README-elasticsearch.md index f5d9d48..be71da9 100755 --- a/README-elasticsearch.md +++ b/README-elasticsearch.md @@ -1,4 +1,4 @@ -#### Deploy the HBase fully-distributed cluster +#### Deploy the Elasticsearch distributed cluster
 $ NUMBER_OF_DATANODES=3
@@ -15,14 +15,14 @@ ELASTICSEARCH DATANODE     10.1.0.6
 ELASTICSEARCH DATANODE     10.1.0.7
 
-#### Kill the HBase cluster +#### Kill the Elasticsearch cluster
 $ sudo deploy/kill_all.sh elasticsearch
 $ sudo deploy/kill_all.sh nameserver
 
-#### After HBase cluster is killed, cleanup +#### After Elasticsearch cluster is killed, cleanup
 $ sudo docker rm `sudo docker ps -a -q`
 $ sudo docker images | grep "" | awk '{print $3}' | xargs sudo docker rmi

From e54254af47d825cf4055f0889f2dfe52464651b1 Mon Sep 17 00:00:00 2001
From: htaox 
Date: Fri, 9 May 2014 10:33:25 -0400
Subject: [PATCH 47/97] Added description for Elasticsearch

---
 README.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c6d0d8a..2b7307b 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,10 @@
 This project was based on a fork from amplab / docker-scripts.
 DNS and cluster setup are all credited to the great work of the amplab team.
 The rest of this file is the original README.md.
-For building & running HBase on top of a Hadoop cluster, please check out [README-hbase.md](https://github.com/htaox/docker-scripts/blob/add-hbase/README-hbase.md)    
+For building & running HBase on top of a Hadoop cluster, please check out [README-hbase.md](https://github.com/htaox/docker-scripts/blob/add-hbase/README-hbase.md)
+
+#### Dockerfiles for distributed Elasticsearch setup
+For building & running an Elasticsearch cluster, please check out [README-elasticsearch.md](https://github.com/htaox/docker-scripts/blob/add-hbase/README-elasticsearch.md)     
 
 # Dockerfiles for Spark and Shark
 

From a64d77e169f33159eecdd7c364571c7ddaf236f4 Mon Sep 17 00:00:00 2001
From: htaox 
Date: Sat, 10 May 2014 20:13:41 +0100
Subject: [PATCH 48/97] Initial commit for Faunus cluster setup

---
 build/build_all_faunus.sh                     |  22 ++++
 deploy/deploy_faunus.sh                       | 101 +++++++++++++++
 deploy/start_faunus_cluster.sh                | 120 ++++++++++++++++++
 faunus-0.4.4/build                            |  13 ++
 faunus-0.4.4/faunus-base/Dockerfile           |  22 ++++
 faunus-0.4.4/faunus-base/build                |   4 +
 .../faunus-base/files/configure_faunus.sh     |  36 ++++++
 .../faunus-base/files/log4j.properties        |   8 ++
 faunus-0.4.4/faunus-master/Dockerfile         |   7 +
 faunus-0.4.4/faunus-master/build              |   4 +
 faunus-0.4.4/faunus-master/files/default_cmd  |  25 ++++
 faunus-0.4.4/faunus-worker/Dockerfile         |   9 ++
 faunus-0.4.4/faunus-worker/build              |   4 +
 faunus-0.4.4/faunus-worker/files/default_cmd  |  21 +++
 14 files changed, 396 insertions(+)
 create mode 100644 build/build_all_faunus.sh
 create mode 100644 deploy/deploy_faunus.sh
 create mode 100644 deploy/start_faunus_cluster.sh
 create mode 100644 faunus-0.4.4/build
 create mode 100644 faunus-0.4.4/faunus-base/Dockerfile
 create mode 100644 faunus-0.4.4/faunus-base/build
 create mode 100644 faunus-0.4.4/faunus-base/files/configure_faunus.sh
 create mode 100644 faunus-0.4.4/faunus-base/files/log4j.properties
 create mode 100644 faunus-0.4.4/faunus-master/Dockerfile
 create mode 100644 faunus-0.4.4/faunus-master/build
 create mode 100644 faunus-0.4.4/faunus-master/files/default_cmd
 create mode 100644 faunus-0.4.4/faunus-worker/Dockerfile
 create mode 100644 faunus-0.4.4/faunus-worker/build
 create mode 100644 faunus-0.4.4/faunus-worker/files/default_cmd

diff --git a/build/build_all_faunus.sh b/build/build_all_faunus.sh
new file mode 100644
index 0000000..4c19c44
--- /dev/null
+++ b/build/build_all_faunus.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+if [[ "$USER" != "root" ]]; then
+    echo "please run as: sudo $0"
+    exit 1
+fi
+
+CURDIR=$(pwd)
+BASEDIR=$(cd $(dirname $0); pwd)"/.."
+dir_list=( "faunus-0.4.4" )
+
+export IMAGE_PREFIX="htaox/"
+#"amplab/"
+
+# NOTE: the order matters but this is the right one
+for i in ${dir_list[@]}; do
+	echo building $i;
+	cd ${BASEDIR}/$i
+        cat build
+        ./build
+done
+cd $CURDIR
diff --git a/deploy/deploy_faunus.sh b/deploy/deploy_faunus.sh
new file mode 100644
index 0000000..c3faf4d
--- /dev/null
+++ b/deploy/deploy_faunus.sh
@@ -0,0 +1,101 @@
+#!/bin/bash
+
+DEBUG=0
+BASEDIR=$(cd $(dirname $0); pwd)
+
+faunus_images=( "htaox/faunus:0.4.4")
+NAMESERVER_IMAGE="amplab/dnsmasq-precise"
+
+start_shell=0
+VOLUME_MAP=""
+
+image_type="?"
+image_version="?"
+NUM_WORKERS=2
+
+source $BASEDIR/start_nameserver.sh
+source $BASEDIR/start_faunus_cluster.sh
+
+function check_root() {
+    if [[ "$USER" != "root" ]]; then
+        echo "please run as: sudo $0"
+        exit 1
+    fi
+}
+
+function print_help() {
+    echo "usage: $0 -i  [-w <#workers>] [-v ] [-c]"
+    echo ""
+    echo "  image:    faunus image from:"
+    echo -n "               "
+    for i in ${faunus_images[@]}; do
+        echo -n "  $i"
+    done
+    echo ""    
+}
+
+function parse_options() {
+    while getopts "i:w:cv:h" opt; do
+        case $opt in
+        i)
+            echo "$OPTARG" | grep "faunus:" > /dev/null;
+	    if [ "$?" -eq 0 ]; then
+                image_type="faunus"
+            fi            
+	    image_name=$(echo "$OPTARG" | awk -F ":" '{print $1}')
+            image_version=$(echo "$OPTARG" | awk -F ":" '{print $2}') 
+          ;;
+        w)
+            NUM_WORKERS=$OPTARG
+          ;;
+        h)
+            print_help
+            exit 0
+          ;;
+        c)
+            start_shell=1
+          ;;
+        v)
+            VOLUME_MAP=$OPTARG
+          ;;
+        esac
+    done
+
+    if [ "$image_type" == "?" ]; then
+        echo "missing or invalid option: -i "
+        exit 1
+    fi
+
+    if [ ! "$VOLUME_MAP" == "" ]; then
+        echo "data volume chosen: $VOLUME_MAP"
+        VOLUME_MAP="-v $VOLUME_MAP:/data"
+    fi
+}
+
+check_root
+
+if [[ "$#" -eq 0 ]]; then
+    print_help
+    exit 1
+fi
+
+parse_options $@
+
+if [ "$image_type" == "faunus" ]; then
+    faunus_VERSION="$image_version"
+    echo "*** Starting faunus $faunus_VERSION ***"
+else
+    echo "not starting anything"
+    exit 0
+fi
+
+start_nameserver $NAMESERVER_IMAGE
+wait_for_nameserver
+start_master ${image_name}-master $image_version
+wait_for_master
+
+start_workers ${image_name}-worker $image_version
+echo ""
+print_cluster_info
+#After all the servers are up, we can start the services in sequence
+start_faunus
diff --git a/deploy/start_faunus_cluster.sh b/deploy/start_faunus_cluster.sh
new file mode 100644
index 0000000..45ccee5
--- /dev/null
+++ b/deploy/start_faunus_cluster.sh
@@ -0,0 +1,120 @@
+#!/bin/bash
+
+MASTER=-1
+MASTER_IP=
+NUM_REGISTERED_WORKERS=0
+BASEDIR=$(cd $(dirname $0); pwd)
+FAUNUSSERVERS="${BASEDIR}/faunusservers"
+
+# starts the Faunus master container
+function start_master() {
+    echo "starting master container"
+    if [ "$DEBUG" -gt 0 ]; then
+        echo sudo docker run -d --dns $NAMESERVER_IP -h master${DOMAINNAME} $VOLUME_MAP $1:$2
+    fi
+    MASTER=$(sudo docker run -d --dns $NAMESERVER_IP -h master${DOMAINNAME} $VOLUME_MAP $1:$2)
+
+    if [ "$MASTER" = "" ]; then
+        echo "error: could not start master container from image $1:$2"
+        exit 1
+    fi
+
+    echo "started master container:      $MASTER"
+    sleep 3
+    MASTER_IP=$(sudo docker logs $MASTER 2>&1 | egrep '^MASTER_IP=' | awk -F= '{print $2}' | tr -d -c "[:digit:] .")
+    echo "MASTER_IP:                     $MASTER_IP"
+    echo "address=\"/master/$MASTER_IP\"" >> $DNSFILE
+}
+
+# starts a number of Faunus workers
+function start_workers() {
+	
+	rm -f $FAUNUSSERVERS
+
+    for i in `seq 1 $NUM_WORKERS`; do
+        echo "starting worker container"
+	hostname="worker${i}${DOMAINNAME}"
+        if [ "$DEBUG" -gt 0 ]; then
+	    echo sudo docker run -d --dns $NAMESERVER_IP -h $hostname $VOLUME_MAP $1:$2 ${MASTER_IP}
+        fi
+	WORKER=$(sudo docker run -d --dns $NAMESERVER_IP -h $hostname $VOLUME_MAP $1:$2 ${MASTER_IP})
+
+        if [ "$WORKER" = "" ]; then
+            echo "error: could not start worker container from image $1:$2"
+            exit 1
+        fi
+
+	echo "started worker container:  $WORKER"
+	sleep 3
+	WORKER_IP=$(sudo docker logs $WORKER 2>&1 | egrep '^WORKER_IP=' | awk -F= '{print $2}' | tr -d -c "[:digit:] .")
+	echo "address=\"/$hostname/$WORKER_IP\"" >> $DNSFILE
+    echo "WORKER #${i} IP: $WORKER_IP" 
+    echo $WORKER_IP >> $FAUNUSSERVERS
+    done
+}
+
+# prints out information on the cluster
+function print_cluster_info() {
+    BASEDIR=$(cd $(dirname $0); pwd)"/.."
+    echo ""
+    echo "***********************************************************************"
+    echo ""
+    echo "visit Hadoop Namenode at:   http://$MASTER_IP:50070"
+    echo "ssh into master via:        ssh -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP}"
+    echo ""
+    echo "/data mapped:               $VOLUME_MAP"
+    echo ""
+    echo "kill master via:           sudo docker kill $MASTER"
+    echo ""
+	echo "MASTER_IP: ${MASTER_IP}"
+    echo ""
+    echo "WORKERS:"
+    cat -n $FAUNUSSERVERS
+    echo "***********************************************************************"
+    echo ""
+}
+
+function get_num_registered_workers() {
+    sleep 2
+    NUM_REGISTERED_WORKERS=$(($NUM_REGISTERED_WORKERS+1))    
+}
+
+function wait_for_master {
+    echo -n "waiting for master "
+    sleep 1
+    echo ""
+    echo -n "waiting for nameserver to find master "
+    check_hostname result master "$MASTER_IP"
+    until [ "$result" -eq 0 ]; do
+        echo -n "."
+        sleep 1
+        check_hostname result master "$MASTER_IP"
+    done
+    echo ""
+    sleep 2
+}
+
+function start_faunus {
+    
+	chmod 400 $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa
+	
+    #echo -n "updating faunusservers file"
+    #scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o IdentityFile=$BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa $FAUNUSSERVERS root@$MASTER_IP:/opt/faunus/conf/
+
+    #echo -n "change faunusservers file permission"
+    #ssh -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP} "chown hdfs.hdfs /opt/faunus/conf/faunusservers"
+
+    #update the core-site.xml and faunus-site.xml and start hadoop datanodes
+    while read WORKERADDRESS
+    do
+        echo "updating core-site.xml on ${WORKERADDRESS}"
+        ssh -n -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP} "scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o IdentityFile=/root/.ssh/id_rsa /etc/hadoop/core-site.xml root@${WORKERADDRESS}:/etc/hadoop/"
+        
+        echo "starting datanode on ${WORKERADDRESS}"
+        ssh -n -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${WORKERADDRESS} "service hadoop-datanode start"
+    
+		sleep 2
+	
+    done < $FAUNUSSERVERS
+
+}
diff --git a/faunus-0.4.4/build b/faunus-0.4.4/build
new file mode 100644
index 0000000..3a09ee4
--- /dev/null
+++ b/faunus-0.4.4/build
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+faunus_dirs=$(ls -d faunus*)
+dir_list=("$faunus_dirs")
+
+# NOTE: the order matters but this is the right one
+for i in ${dir_list[@]}; do
+	echo building $i;
+	cd $i;
+	cat build;
+	. build;
+	cd ..;
+done
diff --git a/faunus-0.4.4/faunus-base/Dockerfile b/faunus-0.4.4/faunus-base/Dockerfile
new file mode 100644
index 0000000..58ed602
--- /dev/null
+++ b/faunus-0.4.4/faunus-base/Dockerfile
@@ -0,0 +1,22 @@
+# FAUNUS 0.4.4
+#
+FROM apache-hadoop-hdfs-precise:1.2.1
+MAINTAINER htaox htaox@hotmail.com
+
+# Download and Install HBase
+ENV FAUNUS_VERSION 0.4.4
+
+RUN apt-get install -y -q git maven
+
+RUN git clone https://github.com/Lab41/titan.git
+RUN cd /titan; git checkout dendrite-hadoop2
+RUN mvn install -f /titan/pom.xml -DskipTests
+
+RUN git clone https://github.com/thinkaurelius/faunus.git
+RUN mvn install -f ./faunus/pom.xml -DskipTests
+RUN mv ./faunus /opt/faunus-$FAUNUS_VERSION
+RUN ln -s /opt/faunus-$FAUNUS_VERSION /opt/faunus
+
+ENV FAUNUS_HOME /opt/faunus
+
+ADD files /root/faunus_files 
diff --git a/faunus-0.4.4/faunus-base/build b/faunus-0.4.4/faunus-base/build
new file mode 100644
index 0000000..99b08b4
--- /dev/null
+++ b/faunus-0.4.4/faunus-base/build
@@ -0,0 +1,4 @@
+rm -f files/files.hash
+for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
+mv /tmp/files.hash files/files.hash
+sudo docker build -t ${IMAGE_PREFIX}faunus-base:0.4.4 .
diff --git a/faunus-0.4.4/faunus-base/files/configure_faunus.sh b/faunus-0.4.4/faunus-base/files/configure_faunus.sh
new file mode 100644
index 0000000..5d99235
--- /dev/null
+++ b/faunus-0.4.4/faunus-base/files/configure_faunus.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+source /root/hadoop_files/configure_hadoop.sh
+
+function create_faunus_directories() {
+    create_hadoop_directories
+    rm -rf /opt/faunus-$FAUNUS_VERSION/work
+    mkdir -p /opt/faunus-$FAUNUS_VERSION/work
+    chown hdfs.hdfs /opt/faunus-$FAUNUS_VERSION/work
+    mkdir /tmp/faunus
+    chown hdfs.hdfs /tmp/faunus
+    # this one is for faunus shell logging
+    rm -rf /var/lib/hadoop/hdfs
+    mkdir -p /var/lib/hadoop/hdfs
+    chown hdfs.hdfs /var/lib/hadoop/hdfs
+    #rm -rf /opt/faunus-$FAUNUS_VERSION/logs
+    #mkdir -p /opt/faunus-$FAUNUS_VERSION/logs
+    #chown hdfs.hdfs /opt/faunus-$FAUNUS_VERSION/logs    
+}
+
+function deploy_faunus_files() {
+    deploy_hadoop_files
+    #cp /root/faunus_files/faunus-env.sh /opt/faunus-$FAUNUS_VERSION/conf/
+    #cp /root/faunus_files/log4j.properties /opt/faunus-$FAUNUS_VERSION/conf/
+}		
+
+function configure_faunus() {
+    configure_hadoop $1
+    #sed -i s/__MASTER__/master/ /opt/faunus-$FAUNUS_VERSION/conf/faunus-env.sh
+}
+
+function prepare_faunus() {
+    create_faunus_directories
+    deploy_faunus_files
+    configure_faunus $1
+}
diff --git a/faunus-0.4.4/faunus-base/files/log4j.properties b/faunus-0.4.4/faunus-base/files/log4j.properties
new file mode 100644
index 0000000..d72dbad
--- /dev/null
+++ b/faunus-0.4.4/faunus-base/files/log4j.properties
@@ -0,0 +1,8 @@
+# Set everything to be logged to the console
+log4j.rootCategory=INFO, console
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+log4j.logger.org.eclipse.jetty=WARN
diff --git a/faunus-0.4.4/faunus-master/Dockerfile b/faunus-0.4.4/faunus-master/Dockerfile
new file mode 100644
index 0000000..41620af
--- /dev/null
+++ b/faunus-0.4.4/faunus-master/Dockerfile
@@ -0,0 +1,7 @@
+# HBase
+FROM htaox/faunus-base:0.4.4
+MAINTAINER htaox htaox@hotmail.com
+
+ADD files /root/faunus_master_files
+RUN chmod 700 /root/faunus_master_files/default_cmd
+CMD ["/root/faunus_master_files/default_cmd"]
diff --git a/faunus-0.4.4/faunus-master/build b/faunus-0.4.4/faunus-master/build
new file mode 100644
index 0000000..51ab4b0
--- /dev/null
+++ b/faunus-0.4.4/faunus-master/build
@@ -0,0 +1,4 @@
+rm -f files/files.hash
+for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
+mv /tmp/files.hash files/files.hash
+sudo docker build -t ${IMAGE_PREFIX}faunus-master:0.4.4 .
diff --git a/faunus-0.4.4/faunus-master/files/default_cmd b/faunus-0.4.4/faunus-master/files/default_cmd
new file mode 100644
index 0000000..6e30a6f
--- /dev/null
+++ b/faunus-0.4.4/faunus-master/files/default_cmd
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+env
+
+source /root/faunus_files/configure_faunus.sh
+
+IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }')
+echo "MASTER_IP=$IP"
+
+echo "preparing Faunus"
+prepare_faunus $IP
+
+echo "starting Hadoop Namenode"
+sudo -u hdfs hadoop namenode -format > /dev/null 2>&1
+service hadoop-namenode start > /dev/null 2>&1
+
+echo "starting sshd"
+/usr/sbin/sshd
+
+echo "starting Faunus Master"
+
+#Spin forever
+while true; do sleep 1000; done
+
+# Don't start Faunus yet.  Need to wait for the datanodes to come up.
diff --git a/faunus-0.4.4/faunus-worker/Dockerfile b/faunus-0.4.4/faunus-worker/Dockerfile
new file mode 100644
index 0000000..d13c5c0
--- /dev/null
+++ b/faunus-0.4.4/faunus-worker/Dockerfile
@@ -0,0 +1,9 @@
+#HBase
+FROM htaox/hbase-base:0.94.18
+MAINTAINER htaox htaox@hotmail.com
+
+ADD files /root/hbase_worker_files
+RUN chmod 700 /root/hbase_worker_files/default_cmd
+# Add the entrypoint script for the master
+CMD ["-h"]
+ENTRYPOINT ["/root/hbase_worker_files/default_cmd"]
diff --git a/faunus-0.4.4/faunus-worker/build b/faunus-0.4.4/faunus-worker/build
new file mode 100644
index 0000000..5122e0a
--- /dev/null
+++ b/faunus-0.4.4/faunus-worker/build
@@ -0,0 +1,4 @@
+rm -f files/files.hash
+for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash
+mv /tmp/files.hash files/files.hash
+sudo docker build -t ${IMAGE_PREFIX}faunus-worker:0.4.4 .
diff --git a/faunus-0.4.4/faunus-worker/files/default_cmd b/faunus-0.4.4/faunus-worker/files/default_cmd
new file mode 100644
index 0000000..d271e8a
--- /dev/null
+++ b/faunus-0.4.4/faunus-worker/files/default_cmd
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+source /root/faunus_files/configure_faunus.sh
+
+IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }')
+echo "WORKER_IP=$IP"
+
+echo "preparing HBase"
+prepare_faunus $IP
+
+#echo "starting Hadoop Datanode"
+#service hadoop-datanode start
+
+#Don't start Hadoop yet, need to change core-site.xml from master first
+
+echo "starting sshd"
+/usr/sbin/sshd
+
+while true; do sleep 1000; done
+
+# Don't start Faunus yet.  Need to wait for the datanodes to come up.
\ No newline at end of file

From 0d5241ec81164d9ad4effa7830d426843416e7a4 Mon Sep 17 00:00:00 2001
From: htaox 
Date: Sun, 11 May 2014 19:57:44 +0100
Subject: [PATCH 49/97] Added Ubuntu update repos to sources.list

---
 faunus-0.4.4/faunus-base/Dockerfile | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/faunus-0.4.4/faunus-base/Dockerfile b/faunus-0.4.4/faunus-base/Dockerfile
index 58ed602..07e52ac 100644
--- a/faunus-0.4.4/faunus-base/Dockerfile
+++ b/faunus-0.4.4/faunus-base/Dockerfile
@@ -6,11 +6,12 @@ MAINTAINER htaox htaox@hotmail.com
 # Download and Install HBase
 ENV FAUNUS_VERSION 0.4.4
 
-RUN apt-get install -y -q git maven
+RUN echo 'deb http://us.archive.ubuntu.com/ubuntu/ precise-security main universe' >> /etc/apt/sources.list
+RUN echo 'deb http://us.archive.ubuntu.com/ubuntu/ precise-updates main universe' >> /etc/apt/sources.list
+RUN cat /etc/apt/sources.list
+RUN apt-get update
 
-RUN git clone https://github.com/Lab41/titan.git
-RUN cd /titan; git checkout dendrite-hadoop2
-RUN mvn install -f /titan/pom.xml -DskipTests
+RUN apt-get install -y -q git maven
 
 RUN git clone https://github.com/thinkaurelius/faunus.git
 RUN mvn install -f ./faunus/pom.xml -DskipTests

From 2a990a82451fd1fd92b7124f88f20aa955dbcf64 Mon Sep 17 00:00:00 2001
From: htaox 
Date: Sun, 11 May 2014 20:13:12 +0100
Subject: [PATCH 50/97] Just download Faunus package

---
 faunus-0.4.4/faunus-base/Dockerfile | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/faunus-0.4.4/faunus-base/Dockerfile b/faunus-0.4.4/faunus-base/Dockerfile
index 07e52ac..e347169 100644
--- a/faunus-0.4.4/faunus-base/Dockerfile
+++ b/faunus-0.4.4/faunus-base/Dockerfile
@@ -6,16 +6,21 @@ MAINTAINER htaox htaox@hotmail.com
 # Download and Install HBase
 ENV FAUNUS_VERSION 0.4.4
 
-RUN echo 'deb http://us.archive.ubuntu.com/ubuntu/ precise-security main universe' >> /etc/apt/sources.list
-RUN echo 'deb http://us.archive.ubuntu.com/ubuntu/ precise-updates main universe' >> /etc/apt/sources.list
-RUN cat /etc/apt/sources.list
-RUN apt-get update
+# RUN echo 'deb http://us.archive.ubuntu.com/ubuntu/ precise-security main universe' >> /etc/apt/sources.list
+# RUN echo 'deb http://us.archive.ubuntu.com/ubuntu/ precise-updates main universe' >> /etc/apt/sources.list
+# RUN cat /etc/apt/sources.list
+# RUN apt-get update
+# RUN apt-get install -y -q git maven
+# RUN git clone https://github.com/thinkaurelius/faunus.git
+# RUN mvn install -f ./faunus/pom.xml -DskipTests
+# RUN mv ./faunus /opt/faunus-$FAUNUS_VERSION
 
-RUN apt-get install -y -q git maven
+RUN apt-get install -y unzip
+
+RUN wget http://s3.thinkaurelius.com/downloads/faunus/faunus-$FAUNUS_VERSION.zip
+RUN unzip faunus-$FAUNUS_VERSION.zip 
+RUN mv faunus-$FAUNUS_VERSION /opt/
 
-RUN git clone https://github.com/thinkaurelius/faunus.git
-RUN mvn install -f ./faunus/pom.xml -DskipTests
-RUN mv ./faunus /opt/faunus-$FAUNUS_VERSION
 RUN ln -s /opt/faunus-$FAUNUS_VERSION /opt/faunus
 
 ENV FAUNUS_HOME /opt/faunus

From 172c4d9e15b6ce342dea1ad1dabd8d3523e0c65b Mon Sep 17 00:00:00 2001
From: htaox 
Date: Sun, 11 May 2014 20:51:10 +0100
Subject: [PATCH 51/97] Worker was referencing wrong base image

---
 faunus-0.4.4/faunus-worker/Dockerfile | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/faunus-0.4.4/faunus-worker/Dockerfile b/faunus-0.4.4/faunus-worker/Dockerfile
index d13c5c0..95be404 100644
--- a/faunus-0.4.4/faunus-worker/Dockerfile
+++ b/faunus-0.4.4/faunus-worker/Dockerfile
@@ -1,9 +1,8 @@
 #HBase
-FROM htaox/hbase-base:0.94.18
+FROM htaox/faunus-base:0.4.4
 MAINTAINER htaox htaox@hotmail.com
 
 ADD files /root/hbase_worker_files
 RUN chmod 700 /root/hbase_worker_files/default_cmd
-# Add the entrypoint script for the master
-CMD ["-h"]
-ENTRYPOINT ["/root/hbase_worker_files/default_cmd"]
+# Add the entrypoint script for the worker
+CMD ["/root/hbase_worker_files/default_cmd"]

From d751fba89483a3ca13c45bfe80040f310756b876 Mon Sep 17 00:00:00 2001
From: htaox 
Date: Sun, 11 May 2014 20:57:26 +0100
Subject: [PATCH 52/97] Typo in worker Dockerfile

---
 faunus-0.4.4/faunus-worker/Dockerfile        | 7 +++----
 faunus-0.4.4/faunus-worker/files/default_cmd | 4 ++++
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/faunus-0.4.4/faunus-worker/Dockerfile b/faunus-0.4.4/faunus-worker/Dockerfile
index 95be404..681da98 100644
--- a/faunus-0.4.4/faunus-worker/Dockerfile
+++ b/faunus-0.4.4/faunus-worker/Dockerfile
@@ -2,7 +2,6 @@
 FROM htaox/faunus-base:0.4.4
 MAINTAINER htaox htaox@hotmail.com
 
-ADD files /root/hbase_worker_files
-RUN chmod 700 /root/hbase_worker_files/default_cmd
-# Add the entrypoint script for the worker
-CMD ["/root/hbase_worker_files/default_cmd"]
+ADD files /root/faunus_worker_files
+RUN chmod 700 /root/faunus_worker_files/default_cmd
+CMD ["/root/faunus_worker_files/default_cmd"]
diff --git a/faunus-0.4.4/faunus-worker/files/default_cmd b/faunus-0.4.4/faunus-worker/files/default_cmd
index d271e8a..4a17f3f 100644
--- a/faunus-0.4.4/faunus-worker/files/default_cmd
+++ b/faunus-0.4.4/faunus-worker/files/default_cmd
@@ -1,5 +1,7 @@
 #!/bin/bash
 
+env
+
 source /root/faunus_files/configure_faunus.sh
 
 IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }')
@@ -16,6 +18,8 @@ prepare_faunus $IP
 echo "starting sshd"
 /usr/sbin/sshd
 
+echo "starting Faunus Worker"
+
 while true; do sleep 1000; done
 
 # Don't start Faunus yet.  Need to wait for the datanodes to come up.
\ No newline at end of file

From 361371268b9eb71c2f73f7d10aeab216ef93a3e5 Mon Sep 17 00:00:00 2001
From: htaox 
Date: Sun, 11 May 2014 21:19:36 +0100
Subject: [PATCH 53/97] Modified arguments for docker run worker

---
 deploy/start_faunus_cluster.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/deploy/start_faunus_cluster.sh b/deploy/start_faunus_cluster.sh
index 45ccee5..1fa9ffc 100644
--- a/deploy/start_faunus_cluster.sh
+++ b/deploy/start_faunus_cluster.sh
@@ -35,9 +35,9 @@ function start_workers() {
         echo "starting worker container"
 	hostname="worker${i}${DOMAINNAME}"
         if [ "$DEBUG" -gt 0 ]; then
-	    echo sudo docker run -d --dns $NAMESERVER_IP -h $hostname $VOLUME_MAP $1:$2 ${MASTER_IP}
+	    echo sudo docker run -d --dns $NAMESERVER_IP -h $hostname $VOLUME_MAP $1:$2
         fi
-	WORKER=$(sudo docker run -d --dns $NAMESERVER_IP -h $hostname $VOLUME_MAP $1:$2 ${MASTER_IP})
+	WORKER=$(sudo docker run -d --dns $NAMESERVER_IP -h $hostname $VOLUME_MAP $1:$2)
 
         if [ "$WORKER" = "" ]; then
             echo "error: could not start worker container from image $1:$2"

From d339e0e9a5127bcb7c1ac18dcff370486dd32d3c Mon Sep 17 00:00:00 2001
From: htaox 
Date: Fri, 16 May 2014 13:19:32 -0400
Subject: [PATCH 54/97] Use ipv4 for Zookeeper & HBase

---
 hbase-0.94.18/hbase-base/Dockerfile         |   2 +-
 hbase-0.94.18/hbase-base/files/hbase-env.sh |   2 +-
 hbase-0.94.18/hbase-base/files/zkServer.sh  | 186 ++++++++++++++++++++
 3 files changed, 188 insertions(+), 2 deletions(-)
 create mode 100755 hbase-0.94.18/hbase-base/files/zkServer.sh

diff --git a/hbase-0.94.18/hbase-base/Dockerfile b/hbase-0.94.18/hbase-base/Dockerfile
index acad500..f90c8fa 100644
--- a/hbase-0.94.18/hbase-base/Dockerfile
+++ b/hbase-0.94.18/hbase-base/Dockerfile
@@ -43,6 +43,7 @@ ENV PATH $PATH:$ZOO_HOME/bin
 
 ADD files/zoo.cfg $ZOO_HOME/conf/zoo.cfg
 ADD files/zkEnv.sh $ZOO_HOME/bin/zkEnv.sh
+ADD files/zkServer.sh $ZOO_HOME/bin/zkServer.sh
 
 # Ports reference
 # http://blog.cloudera.com/blog/2013/07/guide-to-using-apache-hbase-ports/
@@ -64,4 +65,3 @@ EXPOSE 8080 8085
 EXPOSE 9090 9095
 
 ADD files /root/hbase_files
-
diff --git a/hbase-0.94.18/hbase-base/files/hbase-env.sh b/hbase-0.94.18/hbase-base/files/hbase-env.sh
index fe4ceee..d862662 100644
--- a/hbase-0.94.18/hbase-base/files/hbase-env.sh
+++ b/hbase-0.94.18/hbase-base/files/hbase-env.sh
@@ -38,7 +38,7 @@
 # Below are what we set by default.  May only work with SUN JVM.
 # For more on why as well as other possible settings,
 # see http://wiki.apache.org/hadoop/PerformanceTuning
-export HBASE_OPTS="-XX:+UseConcMarkSweepGC"
+export HBASE_OPTS="-XX:+UseConcMarkSweepGC -Djava.net.preferIPv4Stack=true"
 
 # Uncomment one of the below three options to enable java garbage collection logging for the server-side processes.
 
diff --git a/hbase-0.94.18/hbase-base/files/zkServer.sh b/hbase-0.94.18/hbase-base/files/zkServer.sh
new file mode 100755
index 0000000..7bae28c
--- /dev/null
+++ b/hbase-0.94.18/hbase-base/files/zkServer.sh
@@ -0,0 +1,186 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# If this scripted is run out of /usr/bin or some other system bin directory
+# it should be linked to and not copied. Things like java jar files are found
+# relative to the canonical path of this script.
+#
+
+# See the following page for extensive details on setting
+# up the JVM to accept JMX remote management:
+# http://java.sun.com/javase/6/docs/technotes/guides/management/agent.html
+# by default we allow local JMX connections
+if [ "x$JMXLOCALONLY" = "x" ]
+then
+    JMXLOCALONLY=false
+fi
+
+if [ "x$JMXDISABLE" = "x" ]
+then
+    echo "JMX enabled by default" >&2
+    # for some reason these two options are necessary on jdk6 on Ubuntu
+    #   accord to the docs they are not necessary, but otw jconsole cannot
+    #   do a local attach
+    ZOOMAIN="-Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.local.only=$JMXLOCALONLY org.apache.zookeeper.server.quorum.QuorumPeerMain"
+else
+    echo "JMX disabled by user request" >&2
+    ZOOMAIN="org.apache.zookeeper.server.quorum.QuorumPeerMain"
+fi
+
+# use POSTIX interface, symlink is followed automatically
+ZOOBIN="${BASH_SOURCE-$0}"
+ZOOBIN="$(dirname "${ZOOBIN}")"
+ZOOBINDIR="$(cd "${ZOOBIN}"; pwd)"
+
+if [ -e "$ZOOBIN/../libexec/zkEnv.sh" ]; then
+  . "$ZOOBINDIR/../libexec/zkEnv.sh"
+else
+  . "$ZOOBINDIR/zkEnv.sh"
+fi
+
+if [ "x$SERVER_JVMFLAGS"  != "x" ]
+then
+    JVMFLAGS="$SERVER_JVMFLAGS $JVMFLAGS"
+fi
+
+if [ "x$2" != "x" ]
+then
+    ZOOCFG="$ZOOCFGDIR/$2"
+fi
+
+# if we give a more complicated path to the config, don't screw around in $ZOOCFGDIR
+if [ "x$(dirname "$ZOOCFG")" != "x$ZOOCFGDIR" ]
+then
+    ZOOCFG="$2"
+fi
+
+if $cygwin
+then
+    ZOOCFG=`cygpath -wp "$ZOOCFG"`
+    # cygwin has a "kill" in the shell itself, gets confused
+    KILL=/bin/kill
+else
+    KILL=kill
+fi
+
+echo "Using config: $ZOOCFG" >&2
+
+if [ -z "$ZOOPIDFILE" ]; then
+    ZOO_DATADIR="$(grep "^[[:space:]]*dataDir" "$ZOOCFG" | sed -e 's/.*=//')"
+    if [ ! -d "$ZOO_DATADIR" ]; then
+        mkdir -p "$ZOO_DATADIR"
+    fi
+    ZOOPIDFILE="$ZOO_DATADIR/zookeeper_server.pid"
+else
+    # ensure it exists, otw stop will fail
+    mkdir -p "$(dirname "$ZOOPIDFILE")"
+fi
+
+if [ ! -w "$ZOO_LOG_DIR" ] ; then
+mkdir -p "$ZOO_LOG_DIR"
+fi
+
+_ZOO_DAEMON_OUT="$ZOO_LOG_DIR/zookeeper.out"
+
+case $1 in
+start)
+    echo  -n "Starting zookeeper ... "
+    if [ -f "$ZOOPIDFILE" ]; then
+      if kill -0 `cat "$ZOOPIDFILE"` > /dev/null 2>&1; then
+         echo $command already running as process `cat "$ZOOPIDFILE"`. 
+         exit 0
+      fi
+    fi
+    nohup "$JAVA" "-Djava.net.preferIPv4Stack=true" "-Dzookeeper.log.dir=${ZOO_LOG_DIR}" "-Dzookeeper.root.logger=${ZOO_LOG4J_PROP}" \
+    -cp "$CLASSPATH" $JVMFLAGS $ZOOMAIN "$ZOOCFG" > "$_ZOO_DAEMON_OUT" 2>&1 < /dev/null &
+    if [ $? -eq 0 ]
+    then
+      if /bin/echo -n $! > "$ZOOPIDFILE"
+      then
+        sleep 1
+        echo STARTED
+      else
+        echo FAILED TO WRITE PID
+        exit 1
+      fi
+    else
+      echo SERVER DID NOT START
+      exit 1
+    fi
+    ;;
+start-foreground)
+    ZOO_CMD=(exec "$JAVA")
+    if [ "${ZOO_NOEXEC}" != "" ]; then
+      ZOO_CMD=("$JAVA")
+    fi
+    "${ZOO_CMD[@]}" "-Djava.net.preferIPv4Stack=true" "-Dzookeeper.log.dir=${ZOO_LOG_DIR}" "-Dzookeeper.root.logger=${ZOO_LOG4J_PROP}" \
+    -cp "$CLASSPATH" $JVMFLAGS $ZOOMAIN "$ZOOCFG"
+    ;;
+print-cmd)
+    echo "\"$JAVA\" -Dzookeeper.log.dir=\"${ZOO_LOG_DIR}\" -Dzookeeper.root.logger=\"${ZOO_LOG4J_PROP}\" -cp \"$CLASSPATH\" $JVMFLAGS $ZOOMAIN \"$ZOOCFG\" > \"$_ZOO_DAEMON_OUT\" 2>&1 < /dev/null"
+    ;;
+stop)
+    echo -n "Stopping zookeeper ... "
+    if [ ! -f "$ZOOPIDFILE" ]
+    then
+      echo "no zookeeper to stop (could not find file $ZOOPIDFILE)"
+    else
+      $KILL -9 $(cat "$ZOOPIDFILE")
+      rm "$ZOOPIDFILE"
+      echo STOPPED
+    fi
+    exit 0
+    ;;
+upgrade)
+    shift
+    echo "upgrading the servers to 3.*"
+    "$JAVA" "-Dzookeeper.log.dir=${ZOO_LOG_DIR}" "-Dzookeeper.root.logger=${ZOO_LOG4J_PROP}" \
+    -cp "$CLASSPATH" $JVMFLAGS org.apache.zookeeper.server.upgrade.UpgradeMain ${@}
+    echo "Upgrading ... "
+    ;;
+restart)
+    shift
+    "$0" stop ${@}
+    sleep 3
+    "$0" start ${@}
+    ;;
+status)
+    # -q is necessary on some versions of linux where nc returns too quickly, and no stat result is output
+    clientPortAddress=`grep "^[[:space:]]*clientPortAddress[^[:alpha:]]" "$ZOOCFG" | sed -e 's/.*=//'`
+    if ! [ $clientPortAddress ]
+    then
+	clientPortAddress="localhost"
+    fi
+    clientPort=`grep "^[[:space:]]*clientPort[^[:alpha:]]" "$ZOOCFG" | sed -e 's/.*=//'`
+    STAT=`"$JAVA" "-Dzookeeper.log.dir=${ZOO_LOG_DIR}" "-Dzookeeper.root.logger=${ZOO_LOG4J_PROP}" \
+             -cp "$CLASSPATH" $JVMFLAGS org.apache.zookeeper.client.FourLetterWordMain \
+             $clientPortAddress $clientPort srvr 2> /dev/null    \
+          | grep Mode`
+    if [ "x$STAT" = "x" ]
+    then
+        echo "Error contacting service. It is probably not running."
+        exit 1
+    else
+        echo $STAT
+        exit 0
+    fi
+    ;;
+*)
+    echo "Usage: $0 {start|start-foreground|stop|restart|status|upgrade|print-cmd}" >&2
+
+esac

From 903ba22e72100d5d92be4be8e684c4268c018d83 Mon Sep 17 00:00:00 2001
From: htaox 
Date: Sun, 18 May 2014 12:40:45 +0100
Subject: [PATCH 55/97] Faunus cluster need to use same nameserver as HBase
 cluster

---
 deploy/deploy_faunus.sh                      | 10 +++++++---
 deploy/start_faunus_cluster.sh               | 10 +++++-----
 faunus-0.4.4/faunus-master/files/default_cmd |  9 +++++++++
 3 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/deploy/deploy_faunus.sh b/deploy/deploy_faunus.sh
index c3faf4d..dbd92ef 100644
--- a/deploy/deploy_faunus.sh
+++ b/deploy/deploy_faunus.sh
@@ -13,7 +13,7 @@ image_type="?"
 image_version="?"
 NUM_WORKERS=2
 
-source $BASEDIR/start_nameserver.sh
+# source $BASEDIR/start_nameserver.sh
 source $BASEDIR/start_faunus_cluster.sh
 
 function check_root() {
@@ -89,8 +89,12 @@ else
     exit 0
 fi
 
-start_nameserver $NAMESERVER_IMAGE
-wait_for_nameserver
+# start_nameserver $NAMESERVER_IMAGE
+# wait_for_nameserver
+
+# The nameserver should be the one used for the HBase cluster
+NAMESERVER_IP=172.17.0.2
+
 start_master ${image_name}-master $image_version
 wait_for_master
 
diff --git a/deploy/start_faunus_cluster.sh b/deploy/start_faunus_cluster.sh
index 1fa9ffc..27cb1af 100644
--- a/deploy/start_faunus_cluster.sh
+++ b/deploy/start_faunus_cluster.sh
@@ -10,9 +10,9 @@ FAUNUSSERVERS="${BASEDIR}/faunusservers"
 function start_master() {
     echo "starting master container"
     if [ "$DEBUG" -gt 0 ]; then
-        echo sudo docker run -d --dns $NAMESERVER_IP -h master${DOMAINNAME} $VOLUME_MAP $1:$2
+        echo sudo docker run -d --dns $NAMESERVER_IP -h faunus-master${DOMAINNAME} $VOLUME_MAP $1:$2
     fi
-    MASTER=$(sudo docker run -d --dns $NAMESERVER_IP -h master${DOMAINNAME} $VOLUME_MAP $1:$2)
+    MASTER=$(sudo docker run -d --dns $NAMESERVER_IP -h faunus-master${DOMAINNAME} $VOLUME_MAP $1:$2)
 
     if [ "$MASTER" = "" ]; then
         echo "error: could not start master container from image $1:$2"
@@ -33,7 +33,7 @@ function start_workers() {
 
     for i in `seq 1 $NUM_WORKERS`; do
         echo "starting worker container"
-	hostname="worker${i}${DOMAINNAME}"
+	hostname="faunus-worker${i}${DOMAINNAME}"
         if [ "$DEBUG" -gt 0 ]; then
 	    echo sudo docker run -d --dns $NAMESERVER_IP -h $hostname $VOLUME_MAP $1:$2
         fi
@@ -84,11 +84,11 @@ function wait_for_master {
     sleep 1
     echo ""
     echo -n "waiting for nameserver to find master "
-    check_hostname result master "$MASTER_IP"
+    check_hostname result faunus-master "$MASTER_IP"
     until [ "$result" -eq 0 ]; do
         echo -n "."
         sleep 1
-        check_hostname result master "$MASTER_IP"
+        check_hostname result faunus-master "$MASTER_IP"
     done
     echo ""
     sleep 2
diff --git a/faunus-0.4.4/faunus-master/files/default_cmd b/faunus-0.4.4/faunus-master/files/default_cmd
index 6e30a6f..77e38c9 100644
--- a/faunus-0.4.4/faunus-master/files/default_cmd
+++ b/faunus-0.4.4/faunus-master/files/default_cmd
@@ -19,6 +19,15 @@ echo "starting sshd"
 
 echo "starting Faunus Master"
 
+# Hack to update /etc/hosts
+# http://stackoverflow.com/questions/19414543/how-can-i-make-etc-hosts-writable-by-root-in-a-docker-container
+#ADD your_hosts_file /tmp/hosts
+# cp /etc/hosts /etc/hosts.bak
+# echo 172.17.0.3 >> /etc/hosts.bak
+#RUN mkdir -p -- /lib-override && cp /lib/x86_64-linux-gnu/libnss_files.so.2 /lib-override
+#RUN perl -pi -e 's:/etc/hosts:/etc/hosts.bak:g' /lib-override/libnss_files.so.2
+#ENV LD_LIBRARY_PATH /lib-override
+
 #Spin forever
 while true; do sleep 1000; done
 

From 383e840ea3dff837341210c4716e376ea77d690f Mon Sep 17 00:00:00 2001
From: htaox 
Date: Sun, 18 May 2014 12:50:36 +0100
Subject: [PATCH 56/97] Add back start_nameserver.sh source reference

---
 deploy/deploy_faunus.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deploy/deploy_faunus.sh b/deploy/deploy_faunus.sh
index dbd92ef..9bf4d7e 100644
--- a/deploy/deploy_faunus.sh
+++ b/deploy/deploy_faunus.sh
@@ -13,7 +13,7 @@ image_type="?"
 image_version="?"
 NUM_WORKERS=2
 
-# source $BASEDIR/start_nameserver.sh
+source $BASEDIR/start_nameserver.sh
 source $BASEDIR/start_faunus_cluster.sh
 
 function check_root() {

From 92af6f64b02d11804935ad61b4a6ea139b824a93 Mon Sep 17 00:00:00 2001
From: htaox 
Date: Sun, 18 May 2014 12:55:47 +0100
Subject: [PATCH 57/97] There is no DNS file to write to

---
 deploy/start_faunus_cluster.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/deploy/start_faunus_cluster.sh b/deploy/start_faunus_cluster.sh
index 27cb1af..01c3f04 100644
--- a/deploy/start_faunus_cluster.sh
+++ b/deploy/start_faunus_cluster.sh
@@ -23,7 +23,7 @@ function start_master() {
     sleep 3
     MASTER_IP=$(sudo docker logs $MASTER 2>&1 | egrep '^MASTER_IP=' | awk -F= '{print $2}' | tr -d -c "[:digit:] .")
     echo "MASTER_IP:                     $MASTER_IP"
-    echo "address=\"/master/$MASTER_IP\"" >> $DNSFILE
+    # echo "address=\"/master/$MASTER_IP\"" >> $DNSFILE
 }
 
 # starts a number of Faunus workers
@@ -47,7 +47,7 @@ function start_workers() {
 	echo "started worker container:  $WORKER"
 	sleep 3
 	WORKER_IP=$(sudo docker logs $WORKER 2>&1 | egrep '^WORKER_IP=' | awk -F= '{print $2}' | tr -d -c "[:digit:] .")
-	echo "address=\"/$hostname/$WORKER_IP\"" >> $DNSFILE
+	# echo "address=\"/$hostname/$WORKER_IP\"" >> $DNSFILE
     echo "WORKER #${i} IP: $WORKER_IP" 
     echo $WORKER_IP >> $FAUNUSSERVERS
     done

From f7677fcb016e67575005e9e39914cff7378b20cd Mon Sep 17 00:00:00 2001
From: htaox 
Date: Sun, 18 May 2014 12:59:19 +0100
Subject: [PATCH 58/97] Just sleep when waiting for master

---
 deploy/start_faunus_cluster.sh | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/deploy/start_faunus_cluster.sh b/deploy/start_faunus_cluster.sh
index 01c3f04..0bb9430 100644
--- a/deploy/start_faunus_cluster.sh
+++ b/deploy/start_faunus_cluster.sh
@@ -81,17 +81,7 @@ function get_num_registered_workers() {
 
 function wait_for_master {
     echo -n "waiting for master "
-    sleep 1
-    echo ""
-    echo -n "waiting for nameserver to find master "
-    check_hostname result faunus-master "$MASTER_IP"
-    until [ "$result" -eq 0 ]; do
-        echo -n "."
-        sleep 1
-        check_hostname result faunus-master "$MASTER_IP"
-    done
-    echo ""
-    sleep 2
+    sleep 3    
 }
 
 function start_faunus {

From b57ac07e742164606848e8094de544783704c556 Mon Sep 17 00:00:00 2001
From: htaox 
Date: Sun, 18 May 2014 21:35:34 +0100
Subject: [PATCH 59/97] Start only one dnsmasq service

---
 deploy/deploy_hbase.sh        | 16 ++++++++++++++--
 deploy/start_hbase_cluster.sh | 12 +++++++-----
 deploy/start_nameserver.sh    |  4 +++-
 3 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/deploy/deploy_hbase.sh b/deploy/deploy_hbase.sh
index bb4bdba..f45b7d0 100755
--- a/deploy/deploy_hbase.sh
+++ b/deploy/deploy_hbase.sh
@@ -72,6 +72,12 @@ function parse_options() {
     fi
 }
 
+function check_nameserver() {
+    nameserver=$(sudo docker ps | grep dnsmasq_files | awk '{print $1}' | tr '\n' ' ')
+    containers=($nameserver)
+    NUM_NAMESERVERS=$(echo ${#containers[@]})
+}
+
 check_root
 
 if [[ "$#" -eq 0 ]]; then
@@ -89,8 +95,14 @@ else
     exit 0
 fi
 
-start_nameserver $NAMESERVER_IMAGE
-wait_for_nameserver
+check_nameserver
+
+if [ "$NUM_NAMESERVERS" -eq 0 ]; then
+    rm -rf $BASEDIR/0hosts
+    start_nameserver $NAMESERVER_IMAGE
+    wait_for_nameserver
+fi
+
 start_master ${image_name}-master $image_version
 wait_for_master
 
diff --git a/deploy/start_hbase_cluster.sh b/deploy/start_hbase_cluster.sh
index 1f0d49a..ba1b9f0 100755
--- a/deploy/start_hbase_cluster.sh
+++ b/deploy/start_hbase_cluster.sh
@@ -5,14 +5,16 @@ MASTER_IP=
 NUM_REGISTERED_WORKERS=0
 BASEDIR=$(cd $(dirname $0); pwd)
 REGIONSERVERS="${BASEDIR}/regionservers"
+MASTER_HOSTNAME=hbase-master
+WORKER_HOSTNAME=hbase-worker
 
 # starts the Spark/Shark master container
 function start_master() {
     echo "starting master container"
     if [ "$DEBUG" -gt 0 ]; then
-        echo sudo docker run -d --dns $NAMESERVER_IP -h master${DOMAINNAME} $VOLUME_MAP $1:$2
+        echo sudo docker run -d --dns $NAMESERVER_IP -h ${MASTER_HOSTNAME}${DOMAINNAME} $VOLUME_MAP $1:$2
     fi
-    MASTER=$(sudo docker run -d --dns $NAMESERVER_IP -h master${DOMAINNAME} $VOLUME_MAP $1:$2)
+    MASTER=$(sudo docker run -d --dns $NAMESERVER_IP -h ${MASTER_HOSTNAME}${DOMAINNAME} $VOLUME_MAP $1:$2)
 
     if [ "$MASTER" = "" ]; then
         echo "error: could not start master container from image $1:$2"
@@ -33,7 +35,7 @@ function start_workers() {
 
     for i in `seq 1 $NUM_WORKERS`; do
         echo "starting worker container"
-	hostname="worker${i}${DOMAINNAME}"
+	hostname="${WORKER_HOSTNAME}${i}${DOMAINNAME}"
         if [ "$DEBUG" -gt 0 ]; then
 	    echo sudo docker run -d --dns $NAMESERVER_IP -h $hostname $VOLUME_MAP $1:$2 ${MASTER_IP}
         fi
@@ -81,11 +83,11 @@ function wait_for_master {
     sleep 1
     echo ""
     echo -n "waiting for nameserver to find master "
-    check_hostname result master "$MASTER_IP"
+    check_hostname result "$MASTER_HOSTNAME" "$MASTER_IP"
     until [ "$result" -eq 0 ]; do
         echo -n "."
         sleep 1
-        check_hostname result master "$MASTER_IP"
+        check_hostname result "$MASTER_HOSTNAME" "$MASTER_IP"
     done
     echo ""
     sleep 2
diff --git a/deploy/start_nameserver.sh b/deploy/start_nameserver.sh
index 59358ca..3342450 100755
--- a/deploy/start_nameserver.sh
+++ b/deploy/start_nameserver.sh
@@ -4,10 +4,12 @@ NAMESERVER=-1
 NAMESERVER_IP=
 DOMAINNAME=
 #".mycluster.com"
+BASEDIR=$(cd $(dirname $0); pwd)
 
 # starts the dnsmasq nameserver
 function start_nameserver() {
-    DNSDIR="/tmp/dnsdir_$RANDOM"
+    #DNSDIR="/tmp/dnsdir_$RANDOM"
+    DNSDIR="${BASEDIR}"
     DNSFILE="${DNSDIR}/0hosts"
     mkdir $DNSDIR
 

From 15df5bb21dd805e4ab242d42574458c65f6c0d3e Mon Sep 17 00:00:00 2001
From: htaox 
Date: Mon, 19 May 2014 16:59:45 -0400
Subject: [PATCH 60/97] Allow only one HBase cluster per nameserver

---
 deploy/deploy_hbase.sh        | 19 ++++++++++---------
 deploy/start_hbase_cluster.sh |  2 +-
 deploy/start_nameserver.sh    | 30 ++++++++++++++++++++++++++++--
 3 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/deploy/deploy_hbase.sh b/deploy/deploy_hbase.sh
index f45b7d0..c121d19 100755
--- a/deploy/deploy_hbase.sh
+++ b/deploy/deploy_hbase.sh
@@ -72,10 +72,11 @@ function parse_options() {
     fi
 }
 
-function check_nameserver() {
-    nameserver=$(sudo docker ps | grep dnsmasq_files | awk '{print $1}' | tr '\n' ' ')
-    containers=($nameserver)
-    NUM_NAMESERVERS=$(echo ${#containers[@]})
+function check_hbase() {
+
+    containers=$(sudo docker ps | grep hbase-master | awk '{print $1}' | tr '\n' ' ')
+    NUM_HBASE_MASTER=$(echo ${#containers[@]})
+    echo "There are $NUM_HBASE_MASTER HBase servers"
 }
 
 check_root
@@ -95,12 +96,12 @@ else
     exit 0
 fi
 
-check_nameserver
+check_start_nameserver $NAMESERVER_IMAGE
+
+check_hbase
 
-if [ "$NUM_NAMESERVERS" -eq 0 ]; then
-    rm -rf $BASEDIR/0hosts
-    start_nameserver $NAMESERVER_IMAGE
-    wait_for_nameserver
+if [ $NUM_HBASE_MASTER -gt 0 ]; then
+    exit 0
 fi
 
 start_master ${image_name}-master $image_version
diff --git a/deploy/start_hbase_cluster.sh b/deploy/start_hbase_cluster.sh
index ba1b9f0..a5e4f8d 100755
--- a/deploy/start_hbase_cluster.sh
+++ b/deploy/start_hbase_cluster.sh
@@ -25,7 +25,7 @@ function start_master() {
     sleep 3
     MASTER_IP=$(sudo docker logs $MASTER 2>&1 | egrep '^MASTER_IP=' | awk -F= '{print $2}' | tr -d -c "[:digit:] .")
     echo "MASTER_IP:                     $MASTER_IP"
-    echo "address=\"/master/$MASTER_IP\"" >> $DNSFILE
+    echo "address=\"/$MASTER_HOSTNAME/$MASTER_IP\"" >> $DNSFILE
 }
 
 # starts a number of Spark/Shark workers
diff --git a/deploy/start_nameserver.sh b/deploy/start_nameserver.sh
index 3342450..9024941 100755
--- a/deploy/start_nameserver.sh
+++ b/deploy/start_nameserver.sh
@@ -8,11 +8,14 @@ BASEDIR=$(cd $(dirname $0); pwd)
 
 # starts the dnsmasq nameserver
 function start_nameserver() {
-    #DNSDIR="/tmp/dnsdir_$RANDOM"
-    DNSDIR="${BASEDIR}"
+    DNSDIR="/tmp/dnsdir_$RANDOM"
+    #DNSDIR="${BASEDIR}"
     DNSFILE="${DNSDIR}/0hosts"
     mkdir $DNSDIR
 
+    rm -rf $BASEDIR/DNSMASQ
+    echo $DNSFILE > "${BASEDIR}/DNSMASQ" 
+
     echo "starting nameserver container"
     if [ "$DEBUG" -gt 0 ]; then
         echo sudo docker run -d -h nameserver${DOMAINNAME} -v $DNSDIR:/etc/dnsmasq.d $1
@@ -76,3 +79,26 @@ function wait_for_nameserver {
     done
     echo ""
 }
+
+function check_nameserver() {
+    nameservers=$(sudo docker ps | grep dnsmasq_files | awk '{print $1}' | tr '\n' ' ')
+    containers=($nameservers)
+    NUM_NAMESERVERS=$(echo ${#containers[@]})
+    echo "There are $NUM_NAMESERVERS nameservers"
+}
+
+function check_start_nameserver() {
+
+    check_nameserver
+
+    if [ "$NUM_NAMESERVERS" -eq 0 ]; then
+        start_nameserver $1
+        # start_nameserver $NAMESERVER_IMAGE
+        wait_for_nameserver
+    else
+        HOSTFILE=$(cat $BASEDIR/DNSMASQ)
+        DNSFILE=$HOSTFILE
+        NAMESERVER_IP=$(cat $HOSTFILE | grep nameserver | grep -oE "[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}")
+        echo "NAMESERVER_IP: $NAMESERVER_IP"        
+    fi    
+}

From 66103d7eaadb6c055b25b1b7e12d49a57e1963e8 Mon Sep 17 00:00:00 2001
From: htaox 
Date: Mon, 19 May 2014 17:18:15 -0400
Subject: [PATCH 61/97] Forgot parentheses

---
 deploy/deploy_hbase.sh | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/deploy/deploy_hbase.sh b/deploy/deploy_hbase.sh
index c121d19..e871a1b 100755
--- a/deploy/deploy_hbase.sh
+++ b/deploy/deploy_hbase.sh
@@ -74,9 +74,10 @@ function parse_options() {
 
 function check_hbase() {
 
-    containers=$(sudo docker ps | grep hbase-master | awk '{print $1}' | tr '\n' ' ')
-    NUM_HBASE_MASTER=$(echo ${#containers[@]})
-    echo "There are $NUM_HBASE_MASTER HBase servers"
+    containers=($(sudo docker ps | grep hbase-master | awk '{print $1}' | tr '\n' ' '))
+    NUM_HBASE_MASTER=$(echo ${#containers[@]})    
+    echo "There are $NUM_HBASE_MASTER HBase servers running"
+
 }
 
 check_root

From 0c1e021d7bc183a2ed76067d0a2f3a1b4e047cda Mon Sep 17 00:00:00 2001
From: htaox 
Date: Tue, 20 May 2014 16:35:58 -0400
Subject: [PATCH 62/97] Limit one elasticsearch cluster per nameserver

---
 deploy/deploy_elasticsearch.sh        | 17 +++++++++++++++--
 deploy/start_elasticsearch_cluster.sh | 14 ++++++++------
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/deploy/deploy_elasticsearch.sh b/deploy/deploy_elasticsearch.sh
index 4e58b52..e48595f 100755
--- a/deploy/deploy_elasticsearch.sh
+++ b/deploy/deploy_elasticsearch.sh
@@ -72,6 +72,14 @@ function parse_options() {
     fi
 }
 
+function check_elasticsearch() {
+
+    containers=($(sudo docker ps | grep elasticsearch-master | awk '{print $1}' | tr '\n' ' '))
+    NUM_ELASTIC_MASTER=$(echo ${#containers[@]})    
+    echo "There are $NUM_ELASTIC_MASTER Elasticsearch servers running"
+
+}
+
 check_root
 
 if [[ "$#" -eq 0 ]]; then
@@ -89,8 +97,13 @@ else
     exit 0
 fi
 
-start_nameserver $NAMESERVER_IMAGE
-wait_for_nameserver
+check_start_nameserver $NAMESERVER_IMAGE
+
+check_elasticsearch
+
+if [ $NUM_ELASTIC_MASTER -gt 0 ]; then
+    exit 0
+fi
 
 start_master ${image_name}-master $image_version
 wait_for_master
diff --git a/deploy/start_elasticsearch_cluster.sh b/deploy/start_elasticsearch_cluster.sh
index 59110f6..4ba59a8 100755
--- a/deploy/start_elasticsearch_cluster.sh
+++ b/deploy/start_elasticsearch_cluster.sh
@@ -5,14 +5,16 @@ MASTER_IP=
 NUM_REGISTERED_WORKERS=0
 BASEDIR=$(cd $(dirname $0); pwd)
 ELASTICSERVERS="${BASEDIR}/elasticservers"
+MASTER_HOSTNAME=elasticsearch-master
+WORKER_HOSTNAME=elasticsearch-worker
 
 # starts the elasticsearch master container
 function start_master() {
     echo "starting master container"
     if [ "$DEBUG" -gt 0 ]; then
-        echo sudo docker run -d --dns $NAMESERVER_IP -h master${DOMAINNAME} $VOLUME_MAP $1:$2
+        echo sudo docker run -d --dns $NAMESERVER_IP -h ${MASTER_HOSTNAME}${DOMAINNAME} $VOLUME_MAP $1:$2
     fi
-    MASTER=$(sudo docker run -d --dns $NAMESERVER_IP -h master${DOMAINNAME} $VOLUME_MAP $1:$2)
+    MASTER=$(sudo docker run -d --dns $NAMESERVER_IP -h ${MASTER_HOSTNAME}${DOMAINNAME} $VOLUME_MAP $1:$2)
 
     if [ "$MASTER" = "" ]; then
         echo "error: could not start master container from image $1:$2"
@@ -23,7 +25,7 @@ function start_master() {
     sleep 3
     MASTER_IP=$(sudo docker logs $MASTER 2>&1 | egrep '^MASTER_IP=' | awk -F= '{print $2}' | tr -d -c "[:digit:] .")
     echo "MASTER_IP:                     $MASTER_IP"
-    echo "address=\"/master/$MASTER_IP\"" >> $DNSFILE
+    echo "address=\"/$MASTER_HOSTNAME/$MASTER_IP\"" >> $DNSFILE
 }
 
 # starts a number of elasticsearch workers
@@ -33,7 +35,7 @@ function start_workers() {
 
     for i in `seq 1 $NUM_WORKERS`; do
         echo "starting worker container"
-	hostname="worker${i}${DOMAINNAME}"
+	hostname="${WORKER_HOSTNAME}${i}${DOMAINNAME}"
         if [ "$DEBUG" -gt 0 ]; then
 	    echo sudo docker run -d --dns $NAMESERVER_IP -h $hostname $VOLUME_MAP $1:$2
         fi
@@ -84,11 +86,11 @@ function wait_for_master {
     sleep 1
     echo ""
     echo -n "waiting for nameserver to find master "
-    check_hostname result master "$MASTER_IP"
+    check_hostname result "$MASTER_HOSTNAME" "$MASTER_IP"
     until [ "$result" -eq 0 ]; do
         echo -n "."
         sleep 1
-        check_hostname result master "$MASTER_IP"
+        check_hostname result "$MASTER_HOSTNAME" "$MASTER_IP"
     done
     echo ""
     sleep 2

From 453e0839fa723db4ec75df715f5c3278ab47e44b Mon Sep 17 00:00:00 2001
From: htaox 
Date: Tue, 20 May 2014 16:45:15 -0400
Subject: [PATCH 63/97] Change location for DNSMASQ file

---
 deploy/start_nameserver.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/deploy/start_nameserver.sh b/deploy/start_nameserver.sh
index 9024941..07f70c9 100755
--- a/deploy/start_nameserver.sh
+++ b/deploy/start_nameserver.sh
@@ -13,8 +13,8 @@ function start_nameserver() {
     DNSFILE="${DNSDIR}/0hosts"
     mkdir $DNSDIR
 
-    rm -rf $BASEDIR/DNSMASQ
-    echo $DNSFILE > "${BASEDIR}/DNSMASQ" 
+    rm -rf /tmp/DNSMASQ
+    echo $DNSFILE > "/tmp/DNSMASQ" 
 
     echo "starting nameserver container"
     if [ "$DEBUG" -gt 0 ]; then
@@ -96,7 +96,7 @@ function check_start_nameserver() {
         # start_nameserver $NAMESERVER_IMAGE
         wait_for_nameserver
     else
-        HOSTFILE=$(cat $BASEDIR/DNSMASQ)
+        HOSTFILE=$(cat /tmp/DNSMASQ)
         DNSFILE=$HOSTFILE
         NAMESERVER_IP=$(cat $HOSTFILE | grep nameserver | grep -oE "[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}")
         echo "NAMESERVER_IP: $NAMESERVER_IP"        

From 0bd59b5961dc4d3e681b94f1d9d55f6ddfee0cab Mon Sep 17 00:00:00 2001
From: htaox 
Date: Tue, 20 May 2014 16:59:15 -0400
Subject: [PATCH 64/97] Limit one faunus cluster per nameserver

---
 deploy/deploy_faunus.sh        | 18 ++++++++++++++----
 deploy/start_faunus_cluster.sh | 16 ++++++----------
 2 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/deploy/deploy_faunus.sh b/deploy/deploy_faunus.sh
index 9bf4d7e..e88c807 100644
--- a/deploy/deploy_faunus.sh
+++ b/deploy/deploy_faunus.sh
@@ -72,6 +72,14 @@ function parse_options() {
     fi
 }
 
+function check_faunus() {
+
+    containers=($(sudo docker ps | grep faunus-master | awk '{print $1}' | tr '\n' ' '))
+    NUM_FAUNUS_MASTER=$(echo ${#containers[@]})    
+    echo "There are $NUM_FAUNUS_MASTER Faunus servers running"
+
+}
+
 check_root
 
 if [[ "$#" -eq 0 ]]; then
@@ -89,11 +97,13 @@ else
     exit 0
 fi
 
-# start_nameserver $NAMESERVER_IMAGE
-# wait_for_nameserver
+check_start_nameserver $NAMESERVER_IMAGE
 
-# The nameserver should be the one used for the HBase cluster
-NAMESERVER_IP=172.17.0.2
+check_faunus
+
+if [ $NUM_FAUNUS_MASTER -gt 0 ]; then
+    exit 0
+fi
 
 start_master ${image_name}-master $image_version
 wait_for_master
diff --git a/deploy/start_faunus_cluster.sh b/deploy/start_faunus_cluster.sh
index 0bb9430..a9be87c 100644
--- a/deploy/start_faunus_cluster.sh
+++ b/deploy/start_faunus_cluster.sh
@@ -5,14 +5,16 @@ MASTER_IP=
 NUM_REGISTERED_WORKERS=0
 BASEDIR=$(cd $(dirname $0); pwd)
 FAUNUSSERVERS="${BASEDIR}/faunusservers"
+MASTER_HOSTNAME=faunus-master
+WORKER_HOSTNAME=faunus-worker
 
 # starts the Faunus master container
 function start_master() {
     echo "starting master container"
     if [ "$DEBUG" -gt 0 ]; then
-        echo sudo docker run -d --dns $NAMESERVER_IP -h faunus-master${DOMAINNAME} $VOLUME_MAP $1:$2
+        echo sudo docker run -d --dns $NAMESERVER_IP -h ${MASTER_HOSTNAME}${DOMAINNAME} $VOLUME_MAP $1:$2
     fi
-    MASTER=$(sudo docker run -d --dns $NAMESERVER_IP -h faunus-master${DOMAINNAME} $VOLUME_MAP $1:$2)
+    MASTER=$(sudo docker run -d --dns $NAMESERVER_IP -h ${MASTER_HOSTNAME}${DOMAINNAME} $VOLUME_MAP $1:$2)
 
     if [ "$MASTER" = "" ]; then
         echo "error: could not start master container from image $1:$2"
@@ -23,7 +25,7 @@ function start_master() {
     sleep 3
     MASTER_IP=$(sudo docker logs $MASTER 2>&1 | egrep '^MASTER_IP=' | awk -F= '{print $2}' | tr -d -c "[:digit:] .")
     echo "MASTER_IP:                     $MASTER_IP"
-    # echo "address=\"/master/$MASTER_IP\"" >> $DNSFILE
+    # echo "address=\"/$MASTER_HOSTNAME/$MASTER_IP\"" >> $DNSFILE
 }
 
 # starts a number of Faunus workers
@@ -33,7 +35,7 @@ function start_workers() {
 
     for i in `seq 1 $NUM_WORKERS`; do
         echo "starting worker container"
-	hostname="faunus-worker${i}${DOMAINNAME}"
+	hostname="${WORKER_HOSTNAME}${i}${DOMAINNAME}"
         if [ "$DEBUG" -gt 0 ]; then
 	    echo sudo docker run -d --dns $NAMESERVER_IP -h $hostname $VOLUME_MAP $1:$2
         fi
@@ -88,12 +90,6 @@ function start_faunus {
     
 	chmod 400 $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa
 	
-    #echo -n "updating faunusservers file"
-    #scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o IdentityFile=$BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa $FAUNUSSERVERS root@$MASTER_IP:/opt/faunus/conf/
-
-    #echo -n "change faunusservers file permission"
-    #ssh -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP} "chown hdfs.hdfs /opt/faunus/conf/faunusservers"
-
     #update the core-site.xml and faunus-site.xml and start hadoop datanodes
     while read WORKERADDRESS
     do

From 1fb31e6b3c48715c6a764c3fd670d1c1f5367395 Mon Sep 17 00:00:00 2001
From: htaox 
Date: Tue, 20 May 2014 17:03:34 -0400
Subject: [PATCH 65/97] Uncomment writing to tmp hosts file

---
 deploy/start_faunus_cluster.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/deploy/start_faunus_cluster.sh b/deploy/start_faunus_cluster.sh
index a9be87c..59a6bed 100644
--- a/deploy/start_faunus_cluster.sh
+++ b/deploy/start_faunus_cluster.sh
@@ -25,7 +25,7 @@ function start_master() {
     sleep 3
     MASTER_IP=$(sudo docker logs $MASTER 2>&1 | egrep '^MASTER_IP=' | awk -F= '{print $2}' | tr -d -c "[:digit:] .")
     echo "MASTER_IP:                     $MASTER_IP"
-    # echo "address=\"/$MASTER_HOSTNAME/$MASTER_IP\"" >> $DNSFILE
+    echo "address=\"/$MASTER_HOSTNAME/$MASTER_IP\"" >> $DNSFILE
 }
 
 # starts a number of Faunus workers
@@ -49,7 +49,7 @@ function start_workers() {
 	echo "started worker container:  $WORKER"
 	sleep 3
 	WORKER_IP=$(sudo docker logs $WORKER 2>&1 | egrep '^WORKER_IP=' | awk -F= '{print $2}' | tr -d -c "[:digit:] .")
-	# echo "address=\"/$hostname/$WORKER_IP\"" >> $DNSFILE
+	echo "address=\"/$hostname/$WORKER_IP\"" >> $DNSFILE
     echo "WORKER #${i} IP: $WORKER_IP" 
     echo $WORKER_IP >> $FAUNUSSERVERS
     done

From 47b5be4145dea4e4dbdb87b61c74aa69f27389a4 Mon Sep 17 00:00:00 2001
From: htaox 
Date: Sat, 24 May 2014 19:20:49 +0100
Subject: [PATCH 66/97] Start jobtracker and tasktracker

---
 deploy/start_faunus_cluster.sh                |   4 +
 .../faunus-base/files/configure_faunus.sh     |  17 +-
 faunus-0.4.4/faunus-base/files/mapred.xml     | 947 ++++++++++++++++++
 faunus-0.4.4/faunus-master/files/default_cmd  |  14 +-
 4 files changed, 963 insertions(+), 19 deletions(-)
 create mode 100644 faunus-0.4.4/faunus-base/files/mapred.xml

diff --git a/deploy/start_faunus_cluster.sh b/deploy/start_faunus_cluster.sh
index 59a6bed..80f4b58 100644
--- a/deploy/start_faunus_cluster.sh
+++ b/deploy/start_faunus_cluster.sh
@@ -99,6 +99,10 @@ function start_faunus {
         echo "starting datanode on ${WORKERADDRESS}"
         ssh -n -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${WORKERADDRESS} "service hadoop-datanode start"
     
+        echo "starting tasktracker on ${WORKERADDRESS}"
+        ssh -n -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${WORKERADDRESS} "service hadoop-tasktracker start"
+    
+
 		sleep 2
 	
     done < $FAUNUSSERVERS
diff --git a/faunus-0.4.4/faunus-base/files/configure_faunus.sh b/faunus-0.4.4/faunus-base/files/configure_faunus.sh
index 5d99235..b3c376f 100644
--- a/faunus-0.4.4/faunus-base/files/configure_faunus.sh
+++ b/faunus-0.4.4/faunus-base/files/configure_faunus.sh
@@ -9,24 +9,25 @@ function create_faunus_directories() {
     chown hdfs.hdfs /opt/faunus-$FAUNUS_VERSION/work
     mkdir /tmp/faunus
     chown hdfs.hdfs /tmp/faunus
-    # this one is for faunus shell logging
+    
     rm -rf /var/lib/hadoop/hdfs
     mkdir -p /var/lib/hadoop/hdfs
-    chown hdfs.hdfs /var/lib/hadoop/hdfs
-    #rm -rf /opt/faunus-$FAUNUS_VERSION/logs
-    #mkdir -p /opt/faunus-$FAUNUS_VERSION/logs
-    #chown hdfs.hdfs /opt/faunus-$FAUNUS_VERSION/logs    
+    chown hdfs.hdfs /var/lib/hadoop/hdfs    
 }
 
 function deploy_faunus_files() {
     deploy_hadoop_files
-    #cp /root/faunus_files/faunus-env.sh /opt/faunus-$FAUNUS_VERSION/conf/
-    #cp /root/faunus_files/log4j.properties /opt/faunus-$FAUNUS_VERSION/conf/
+    echo 'FAUNUS_HOME=/opt/faunus' >> /etc/environment    
 }		
 
 function configure_faunus() {
     configure_hadoop $1
-    #sed -i s/__MASTER__/master/ /opt/faunus-$FAUNUS_VERSION/conf/faunus-env.sh
+
+    # Copy mapred.xml to hadoop conf dir
+    cp /root/faunus_files/mapred.xml /etc/hadoop/mapred.xml
+
+    # Update JobTracker IP
+    sed -i "s/@IP@/$1/g" $HBASE_HOME/conf/mapred.xml
 }
 
 function prepare_faunus() {
diff --git a/faunus-0.4.4/faunus-base/files/mapred.xml b/faunus-0.4.4/faunus-base/files/mapred.xml
new file mode 100644
index 0000000..e76c85a
--- /dev/null
+++ b/faunus-0.4.4/faunus-base/files/mapred.xml
@@ -0,0 +1,947 @@
+
+
+
+
+
+
+
+
+
+
+  hadoop.job.history.location
+  
+   If job tracker is static the history files are stored 
+  in this single well known place. If No value is set here, by default,
+  it is in the local file system at ${hadoop.log.dir}/history.
+  
+
+
+
+  hadoop.job.history.user.location
+  
+   User can specify a location to store the history files of 
+  a particular job. If nothing is specified, the logs are stored in 
+  output directory. The files are stored in "_logs/history/" in the directory.
+  User can stop logging by giving the value "none". 
+  
+
+
+
+  mapred.job.tracker.history.completed.location
+  
+   The completed job history files are stored at this single well 
+  known location. If nothing is specified, the files are stored at 
+  ${hadoop.job.history.location}/done.
+  
+
+
+
+
+
+  io.sort.factor
+  10
+  The number of streams to merge at once while sorting
+  files.  This determines the number of open file handles.
+
+
+
+  io.sort.mb
+  100
+  The total amount of buffer memory to use while sorting 
+  files, in megabytes.  By default, gives each merge stream 1MB, which
+  should minimize seeks.
+
+
+
+  io.sort.record.percent
+  0.05
+  The percentage of io.sort.mb dedicated to tracking record
+  boundaries. Let this value be r, io.sort.mb be x. The maximum number
+  of records collected before the collection thread must block is equal
+  to (r * x) / 4
+
+
+
+  io.sort.spill.percent
+  0.80
+  The soft limit in either the buffer or record collection
+  buffers. Once reached, a thread will begin to spill the contents to disk
+  in the background. Note that this does not imply any chunking of data to
+  the spill. A value less than 0.5 is not recommended.
+
+
+
+  io.map.index.skip
+  0
+  Number of index entries to skip between each entry.
+  Zero by default. Setting this to values larger than zero can
+  facilitate opening large map files using less memory.
+
+
+
+  mapred.job.tracker
+  @IP@
+  The host and port that the MapReduce job tracker runs
+  at.  If "local", then jobs are run in-process as a single map
+  and reduce task.
+  
+
+
+
+  mapred.job.tracker.http.address
+  0.0.0.0:50030
+  
+    The job tracker http server address and port the server will listen on.
+    If the port is 0 then the server will start on a free port.
+  
+
+
+
+  mapred.job.tracker.handler.count
+  10
+  
+    The number of server threads for the JobTracker. This should be roughly
+    4% of the number of tasktracker nodes.
+  
+
+
+
+  mapred.task.tracker.report.address
+  127.0.0.1:0
+  The interface and port that task tracker server listens on. 
+  Since it is only connected to by the tasks, it uses the local interface.
+  EXPERT ONLY. Should only be changed if your host does not have the loopback 
+  interface.
+
+
+
+  mapred.local.dir
+  ${hadoop.tmp.dir}/mapred/local
+  The local directory where MapReduce stores intermediate
+  data files.  May be a comma-separated list of
+  directories on different devices in order to spread disk i/o.
+  Directories that do not exist are ignored.
+  
+
+
+
+  mapred.system.dir
+  ${hadoop.tmp.dir}/mapred/system
+  The shared directory where MapReduce stores control files.
+  
+
+
+
+  mapred.temp.dir
+  ${hadoop.tmp.dir}/mapred/temp
+  A shared directory for temporary files.
+  
+
+
+
+  mapred.local.dir.minspacestart
+  0
+  If the space in mapred.local.dir drops under this, 
+  do not ask for more tasks.
+  Value in bytes.
+  
+
+
+
+  mapred.local.dir.minspacekill
+  0
+  If the space in mapred.local.dir drops under this, 
+    do not ask more tasks until all the current ones have finished and 
+    cleaned up. Also, to save the rest of the tasks we have running, 
+    kill one of them, to clean up some space. Start with the reduce tasks,
+    then go with the ones that have finished the least.
+    Value in bytes.
+  
+
+
+
+  mapred.tasktracker.expiry.interval
+  600000
+  Expert: The time-interval, in miliseconds, after which
+  a tasktracker is declared 'lost' if it doesn't send heartbeats.
+  
+
+
+
+  mapred.tasktracker.instrumentation
+  org.apache.hadoop.mapred.TaskTrackerMetricsInst
+  Expert: The instrumentation class to associate with each TaskTracker.
+  
+
+
+
+  mapred.tasktracker.memory_calculator_plugin
+  
+  
+   Name of the class whose instance will be used to query memory information
+   on the tasktracker.
+   
+   The class must be an instance of 
+   org.apache.hadoop.util.MemoryCalculatorPlugin. If the value is null, the
+   tasktracker attempts to use a class appropriate to the platform. 
+   Currently, the only platform supported is Linux.
+  
+
+
+
+  mapred.tasktracker.taskmemorymanager.monitoring-interval
+  5000
+  The interval, in milliseconds, for which the tasktracker waits
+   between two cycles of monitoring its tasks' memory usage. Used only if
+   tasks' memory management is enabled via mapred.tasktracker.tasks.maxmemory.
+   
+
+
+
+  mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill
+  5000
+  The time, in milliseconds, the tasktracker waits for sending a
+  SIGKILL to a process that has overrun memory limits, after it has been sent
+  a SIGTERM. Used only if tasks' memory management is enabled via
+  mapred.tasktracker.tasks.maxmemory.
+
+
+
+  mapred.map.tasks
+  2
+  The default number of map tasks per job.
+  Ignored when mapred.job.tracker is "local".  
+  
+
+
+
+  mapred.reduce.tasks
+  1
+  The default number of reduce tasks per job. Typically set to 99%
+  of the cluster's reduce capacity, so that if a node fails the reduces can 
+  still be executed in a single wave.
+  Ignored when mapred.job.tracker is "local".
+  
+
+
+
+  mapred.jobtracker.restart.recover
+  false
+  "true" to enable (job) recovery upon restart,
+               "false" to start afresh
+  
+
+
+
+  mapred.jobtracker.job.history.block.size
+  3145728
+  The block size of the job history file. Since the job recovery
+               uses job history, its important to dump job history to disk as 
+               soon as possible. Note that this is an expert level parameter.
+               The default value is set to 3 MB.
+  
+
+
+
+  mapred.jobtracker.taskScheduler
+  org.apache.hadoop.mapred.JobQueueTaskScheduler
+  The class responsible for scheduling the tasks.
+
+
+
+  mapred.jobtracker.taskScheduler.maxRunningTasksPerJob
+  
+  The maximum number of running tasks for a job before
+  it gets preempted. No limits if undefined.
+  
+
+
+
+  mapred.map.max.attempts
+  4
+  Expert: The maximum number of attempts per map task.
+  In other words, framework will try to execute a map task these many number
+  of times before giving up on it.
+  
+
+
+
+  mapred.reduce.max.attempts
+  4
+  Expert: The maximum number of attempts per reduce task.
+  In other words, framework will try to execute a reduce task these many number
+  of times before giving up on it.
+  
+
+
+
+  mapred.reduce.parallel.copies
+  5
+  The default number of parallel transfers run by reduce
+  during the copy(shuffle) phase.
+  
+
+
+
+  mapred.reduce.copy.backoff
+  300
+  The maximum amount of time (in seconds) a reducer spends on 
+  fetching one map output before declaring it as failed.
+  
+
+
+
+  mapred.task.timeout
+  600000
+  The number of milliseconds before a task will be
+  terminated if it neither reads an input, writes an output, nor
+  updates its status string.
+  
+
+
+
+  mapred.tasktracker.map.tasks.maximum
+  2
+  The maximum number of map tasks that will be run
+  simultaneously by a task tracker.
+  
+
+
+
+  mapred.tasktracker.reduce.tasks.maximum
+  2
+  The maximum number of reduce tasks that will be run
+  simultaneously by a task tracker.
+  
+
+
+
+  mapred.jobtracker.completeuserjobs.maximum
+  100
+  The maximum number of complete jobs per user to keep around 
+  before delegating them to the job history.
+
+
+
+  mapred.jobtracker.instrumentation
+  org.apache.hadoop.mapred.JobTrackerMetricsInst
+  Expert: The instrumentation class to associate with each JobTracker.
+  
+
+
+
+  mapred.jobtracker.plugins
+  
+  Comma-separated list of jobtracker plug-ins to be activated.
+  
+
+
+
+  mapred.child.java.opts
+  -Xmx200m
+  Java opts for the task tracker child processes.  
+  The following symbol, if present, will be interpolated: @taskid@ is replaced 
+  by current TaskID. Any other occurrences of '@' will go unchanged.
+  For example, to enable verbose gc logging to a file named for the taskid in
+  /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
+        -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
+  
+  The configuration variable mapred.child.ulimit can be used to control the
+  maximum virtual memory of the child processes. 
+  
+
+
+
+  mapred.child.env
+  
+  User added environment variables for the task tracker child 
+  processes. Example :
+  1) A=foo  This will set the env variable A to foo
+  2) B=$B:c This is inherit tasktracker's B env variable.  
+  
+
+
+
+  mapred.child.ulimit
+  
+  The maximum virtual memory, in KB, of a process launched by the 
+  Map-Reduce framework. This can be used to control both the Mapper/Reducer 
+  tasks and applications using Hadoop Pipes, Hadoop Streaming etc. 
+  By default it is left unspecified to let cluster admins control it via 
+  limits.conf and other such relevant mechanisms.
+  
+  Note: mapred.child.ulimit must be greater than or equal to the -Xmx passed to
+  JavaVM, else the VM might not start. 
+  
+
+
+
+  mapred.child.tmp
+  ./tmp
+   To set the value of tmp directory for map and reduce tasks.
+  If the value is an absolute path, it is directly assigned. Otherwise, it is
+  prepended with task's working directory. The java tasks are executed with
+  option -Djava.io.tmpdir='the absolute path of the tmp dir'. Pipes and
+  streaming are set with environment variable,
+   TMPDIR='the absolute path of the tmp dir'
+  
+
+
+
+  mapred.inmem.merge.threshold
+  1000
+  The threshold, in terms of the number of files 
+  for the in-memory merge process. When we accumulate threshold number of files
+  we initiate the in-memory merge and spill to disk. A value of 0 or less than
+  0 indicates we want to DON'T have any threshold and instead depend only on
+  the ramfs's memory consumption to trigger the merge.
+  
+
+
+
+  mapred.job.shuffle.merge.percent
+  0.66
+  The usage threshold at which an in-memory merge will be
+  initiated, expressed as a percentage of the total memory allocated to
+  storing in-memory map outputs, as defined by
+  mapred.job.shuffle.input.buffer.percent.
+  
+
+
+
+  mapred.job.shuffle.input.buffer.percent
+  0.70
+  The percentage of memory to be allocated from the maximum heap
+  size to storing map outputs during the shuffle.
+  
+
+
+
+  mapred.job.reduce.input.buffer.percent
+  0.0
+  The percentage of memory- relative to the maximum heap size- to
+  retain map outputs during the reduce. When the shuffle is concluded, any
+  remaining map outputs in memory must consume less than this threshold before
+  the reduce can begin.
+  
+
+
+
+  mapred.map.tasks.speculative.execution
+  true
+  If true, then multiple instances of some map tasks 
+               may be executed in parallel.
+
+
+
+  mapred.reduce.tasks.speculative.execution
+  true
+  If true, then multiple instances of some reduce tasks 
+               may be executed in parallel.
+
+
+
+  mapred.job.reuse.jvm.num.tasks
+  1
+  How many tasks to run per jvm. If set to -1, there is
+  no limit. 
+  
+
+
+
+  mapred.min.split.size
+  0
+  The minimum size chunk that map input should be split
+  into.  Note that some file formats may have minimum split sizes that
+  take priority over this setting.
+
+
+
+  mapred.jobtracker.maxtasks.per.job
+  -1
+  The maximum number of tasks for a single job.
+  A value of -1 indicates that there is no maximum.  
+
+
+
+  mapred.submit.replication
+  10
+  The replication level for submitted job files.  This
+  should be around the square root of the number of nodes.
+  
+
+
+
+
+  mapred.tasktracker.dns.interface
+  default
+  The name of the Network Interface from which a task
+  tracker should report its IP address.
+  
+ 
+ 
+
+  mapred.tasktracker.dns.nameserver
+  default
+  The host name or IP address of the name server (DNS)
+  which a TaskTracker should use to determine the host name used by
+  the JobTracker for communication and display purposes.
+  
+ 
+ 
+
+  tasktracker.http.threads
+  40
+  The number of worker threads that for the http server. This is
+               used for map output fetching
+  
+
+
+
+  mapred.task.tracker.http.address
+  0.0.0.0:50060
+  
+    The task tracker http server address and port.
+    If the port is 0 then the server will start on a free port.
+  
+
+
+
+  keep.failed.task.files
+  false
+  Should the files for failed tasks be kept. This should only be 
+               used on jobs that are failing, because the storage is never
+               reclaimed. It also prevents the map outputs from being erased
+               from the reduce directory as they are consumed.
+
+
+
+
+
+
+  mapred.output.compress
+  false
+  Should the job outputs be compressed?
+  
+
+
+
+  mapred.output.compression.type
+  RECORD
+  If the job outputs are to compressed as SequenceFiles, how should
+               they be compressed? Should be one of NONE, RECORD or BLOCK.
+  
+
+
+
+  mapred.output.compression.codec
+  org.apache.hadoop.io.compress.DefaultCodec
+  If the job outputs are compressed, how should they be compressed?
+  
+
+
+
+  mapred.compress.map.output
+  false
+  Should the outputs of the maps be compressed before being
+               sent across the network. Uses SequenceFile compression.
+  
+
+
+
+  mapred.map.output.compression.codec
+  org.apache.hadoop.io.compress.DefaultCodec
+  If the map outputs are compressed, how should they be 
+               compressed?
+  
+
+
+
+  map.sort.class
+  org.apache.hadoop.util.QuickSort
+  The default sort class for sorting keys.
+  
+
+
+
+  mapred.userlog.limit.kb
+  0
+  The maximum size of user-logs of each task in KB. 0 disables the cap.
+  
+
+
+
+  mapred.userlog.retain.hours
+  24
+  The maximum time, in hours, for which the user-logs are to be 
+          retained.
+  
+
+
+
+  mapred.hosts
+  
+  Names a file that contains the list of nodes that may
+  connect to the jobtracker.  If the value is empty, all hosts are
+  permitted.
+
+
+
+  mapred.hosts.exclude
+  
+  Names a file that contains the list of hosts that
+  should be excluded by the jobtracker.  If the value is empty, no
+  hosts are excluded.
+
+
+
+  mapred.max.tracker.blacklists
+  4
+  The number of blacklists for a taskTracker by various jobs
+               after which the task tracker could be blacklisted across
+               all jobs. The tracker will be given a tasks later
+               (after a day). The tracker will become a healthy
+               tracker after a restart.
+  
+ 
+
+
+  mapred.max.tracker.failures
+  4
+  The number of task-failures on a tasktracker of a given job 
+               after which new tasks of that job aren't assigned to it.
+  
+
+
+
+  jobclient.output.filter
+  FAILED
+  The filter for controlling the output of the task's userlogs sent
+               to the console of the JobClient. 
+               The permissible options are: NONE, KILLED, FAILED, SUCCEEDED and 
+               ALL.
+  
+
+
+  
+    jobclient.completion.poll.interval
+    5000
+    The interval (in milliseconds) between which the JobClient
+    polls the JobTracker for updates about job status. You may want to set this
+    to a lower value to make tests run faster on a single node system. Adjusting
+    this value in production may lead to unwanted client-server traffic.
+    
+  
+
+  
+    jobclient.progress.monitor.poll.interval
+    1000
+    The interval (in milliseconds) between which the JobClient
+    reports status to the console and checks for job completion. You may want to set this
+    to a lower value to make tests run faster on a single node system. Adjusting
+    this value in production may lead to unwanted client-server traffic.
+    
+  
+
+  
+    mapred.job.tracker.persist.jobstatus.active
+    false
+    Indicates if persistency of job status information is
+      active or not.
+    
+  
+
+  
+  mapred.job.tracker.persist.jobstatus.hours
+  0
+  The number of hours job status information is persisted in DFS.
+    The job status information will be available after it drops of the memory
+    queue and between jobtracker restarts. With a zero value the job status
+    information is not persisted at all in DFS.
+  
+
+
+  
+    mapred.job.tracker.persist.jobstatus.dir
+    /jobtracker/jobsInfo
+    The directory where the job status information is persisted
+      in a file system to be available after it drops of the memory queue and
+      between jobtracker restarts.
+    
+  
+
+  
+    mapred.task.profile
+    false
+    To set whether the system should collect profiler
+     information for some of the tasks in this job? The information is stored
+     in the user log directory. The value is "true" if task profiling
+     is enabled.
+  
+
+  
+    mapred.task.profile.maps
+    0-2
+     To set the ranges of map tasks to profile.
+    mapred.task.profile has to be set to true for the value to be accounted.
+    
+  
+
+  
+    mapred.task.profile.reduces
+    0-2
+     To set the ranges of reduce tasks to profile.
+    mapred.task.profile has to be set to true for the value to be accounted.
+    
+  
+
+  
+    mapred.line.input.format.linespermap
+    1
+     Number of lines per split in NLineInputFormat.
+    
+  
+  
+  
+    mapred.skip.attempts.to.start.skipping
+    2
+     The number of Task attempts AFTER which skip mode 
+    will be kicked off. When skip mode is kicked off, the 
+    tasks reports the range of records which it will process 
+    next, to the TaskTracker. So that on failures, TT knows which 
+    ones are possibly the bad records. On further executions, 
+    those are skipped.
+    
+  
+  
+  
+    mapred.skip.map.auto.incr.proc.count
+    true
+     The flag which if set to true, 
+    SkipBadRecords.COUNTER_MAP_PROCESSED_RECORDS is incremented 
+    by MapRunner after invoking the map function. This value must be set to 
+    false for applications which process the records asynchronously 
+    or buffer the input records. For example streaming. 
+    In such cases applications should increment this counter on their own.
+    
+  
+  
+  
+    mapred.skip.reduce.auto.incr.proc.count
+    true
+     The flag which if set to true, 
+    SkipBadRecords.COUNTER_REDUCE_PROCESSED_GROUPS is incremented 
+    by framework after invoking the reduce function. This value must be set to 
+    false for applications which process the records asynchronously 
+    or buffer the input records. For example streaming. 
+    In such cases applications should increment this counter on their own.
+    
+  
+  
+  
+    mapred.skip.out.dir
+    
+     If no value is specified here, the skipped records are 
+    written to the output directory at _logs/skip.
+    User can stop writing skipped records by giving the value "none". 
+    
+  
+
+  
+    mapred.skip.map.max.skip.records
+    0
+     The number of acceptable skip records surrounding the bad 
+    record PER bad record in mapper. The number includes the bad record as well.
+    To turn the feature of detection/skipping of bad records off, set the 
+    value to 0.
+    The framework tries to narrow down the skipped range by retrying  
+    until this threshold is met OR all attempts get exhausted for this task. 
+    Set the value to Long.MAX_VALUE to indicate that framework need not try to 
+    narrow down. Whatever records(depends on application) get skipped are 
+    acceptable.
+    
+  
+  
+  
+    mapred.skip.reduce.max.skip.groups
+    0
+     The number of acceptable skip groups surrounding the bad 
+    group PER bad group in reducer. The number includes the bad group as well.
+    To turn the feature of detection/skipping of bad groups off, set the 
+    value to 0.
+    The framework tries to narrow down the skipped range by retrying  
+    until this threshold is met OR all attempts get exhausted for this task. 
+    Set the value to Long.MAX_VALUE to indicate that framework need not try to 
+    narrow down. Whatever groups(depends on application) get skipped are 
+    acceptable.
+    
+  
+  
+
+
+
+
+
+  job.end.retry.attempts
+  0
+  Indicates how many times hadoop should attempt to contact the
+               notification URL 
+
+
+
+  job.end.retry.interval
+   30000
+   Indicates time in milliseconds between notification URL retry
+                calls
+
+  
+
+
+  hadoop.rpc.socket.factory.class.JobSubmissionProtocol
+  
+   SocketFactory to use to connect to a Map/Reduce master
+    (JobTracker). If null or empty, then use hadoop.rpc.socket.class.default.
+  
+
+
+
+  mapred.task.cache.levels
+  2
+   This is the max level of the task cache. For example, if
+    the level is 2, the tasks cached are at the host level and at the rack
+    level.
+  
+
+
+
+  mapred.queue.names
+  default
+   Comma separated list of queues configured for this jobtracker.
+    Jobs are added to queues and schedulers can configure different 
+    scheduling properties for the various queues. To configure a property 
+    for a queue, the name of the queue must match the name specified in this 
+    value. Queue properties that are common to all schedulers are configured 
+    here with the naming convention, mapred.queue.$QUEUE-NAME.$PROPERTY-NAME,
+    for e.g. mapred.queue.default.submit-job-acl.
+    The number of queues configured in this parameter could depend on the
+    type of scheduler being used, as specified in 
+    mapred.jobtracker.taskScheduler. For example, the JobQueueTaskScheduler
+    supports only a single queue, which is the default configured here.
+    Before adding more queues, ensure that the scheduler you've configured
+    supports multiple queues.
+  
+
+
+
+  mapred.acls.enabled
+  false
+   Specifies whether ACLs are enabled, and should be checked
+    for various operations.
+  
+
+
+
+  mapred.queue.default.acl-submit-job
+  *
+   Comma separated list of user and group names that are allowed
+    to submit jobs to the 'default' queue. The user list and the group list
+    are separated by a blank. For e.g. alice,bob group1,group2. 
+    If set to the special value '*', it means all users are allowed to 
+    submit jobs. 
+  
+
+
+
+  mapred.queue.default.acl-administer-jobs
+  *
+   Comma separated list of user and group names that are allowed
+    to delete jobs or modify job's priority for jobs not owned by the current
+    user in the 'default' queue. The user list and the group list
+    are separated by a blank. For e.g. alice,bob group1,group2. 
+    If set to the special value '*', it means all users are allowed to do 
+    this operation.
+  
+
+
+
+  mapred.job.queue.name
+  default
+   Queue to which a job is submitted. This must match one of the
+    queues defined in mapred.queue.names for the system. Also, the ACL setup
+    for the queue must allow the current user to submit a job to the queue.
+    Before specifying a queue, ensure that the system is configured with 
+    the queue, and access is allowed for submitting jobs to the queue.
+  
+
+
+
+  mapred.tasktracker.indexcache.mb
+  10
+   The maximum memory that a task tracker allows for the 
+    index cache that is used when serving map outputs to reducers.
+  
+
+
+
+  mapred.merge.recordsBeforeProgress
+  10000
+   The number of records to process during merge before
+   sending a progress notification to the TaskTracker.
+  
+
+
+
+  mapred.reduce.slowstart.completed.maps
+  0.05
+  Fraction of the number of maps in the job which should be 
+  complete before reduces are scheduled for the job. 
+  
+
+
+
+  mapred.max.maps.per.node
+  -1
+  Per-node limit on running map tasks for the job. A value
+    of -1 signifies no limit.
+
+
+
+  mapred.max.reduces.per.node
+  -1
+  Per-node limit on running reduce tasks for the job. A value
+    of -1 signifies no limit.
+
+
+
+  mapred.running.map.limit
+  -1
+  Cluster-wide limit on running map tasks for the job. A value
+    of -1 signifies no limit.
+
+
+
+  mapred.running.reduce.limit
+  -1
+  Cluster-wide limit on running reduce tasks for the job. A value
+    of -1 signifies no limit.
+
+
+
diff --git a/faunus-0.4.4/faunus-master/files/default_cmd b/faunus-0.4.4/faunus-master/files/default_cmd
index 77e38c9..87ce8a2 100644
--- a/faunus-0.4.4/faunus-master/files/default_cmd
+++ b/faunus-0.4.4/faunus-master/files/default_cmd
@@ -14,21 +14,13 @@ echo "starting Hadoop Namenode"
 sudo -u hdfs hadoop namenode -format > /dev/null 2>&1
 service hadoop-namenode start > /dev/null 2>&1
 
+echo "starting Hadoop Jobtracker"
+service hadoop-jobtracker start > /dev/null 2>&1 
+
 echo "starting sshd"
 /usr/sbin/sshd
 
 echo "starting Faunus Master"
 
-# Hack to update /etc/hosts
-# http://stackoverflow.com/questions/19414543/how-can-i-make-etc-hosts-writable-by-root-in-a-docker-container
-#ADD your_hosts_file /tmp/hosts
-# cp /etc/hosts /etc/hosts.bak
-# echo 172.17.0.3 >> /etc/hosts.bak
-#RUN mkdir -p -- /lib-override && cp /lib/x86_64-linux-gnu/libnss_files.so.2 /lib-override
-#RUN perl -pi -e 's:/etc/hosts:/etc/hosts.bak:g' /lib-override/libnss_files.so.2
-#ENV LD_LIBRARY_PATH /lib-override
-
 #Spin forever
 while true; do sleep 1000; done
-
-# Don't start Faunus yet.  Need to wait for the datanodes to come up.

From 0fd29ef5e33a939ba4a2e3ed7b5525cf115cb66b Mon Sep 17 00:00:00 2001
From: htaox 
Date: Sat, 24 May 2014 19:53:27 +0100
Subject: [PATCH 67/97] mapred-site.xml should only be updated at master

---
 deploy/start_faunus_cluster.sh                |   3 +
 .../faunus-base/files/configure_faunus.sh     |   6 -
 faunus-0.4.4/faunus-base/files/mapred.xml     | 947 ------------------
 faunus-0.4.4/faunus-master/files/default_cmd  |   6 +
 .../faunus-master/files/mapred-site.xml       |  19 +
 5 files changed, 28 insertions(+), 953 deletions(-)
 delete mode 100644 faunus-0.4.4/faunus-base/files/mapred.xml
 create mode 100644 faunus-0.4.4/faunus-master/files/mapred-site.xml

diff --git a/deploy/start_faunus_cluster.sh b/deploy/start_faunus_cluster.sh
index 80f4b58..1137977 100644
--- a/deploy/start_faunus_cluster.sh
+++ b/deploy/start_faunus_cluster.sh
@@ -96,6 +96,9 @@ function start_faunus {
         echo "updating core-site.xml on ${WORKERADDRESS}"
         ssh -n -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP} "scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o IdentityFile=/root/.ssh/id_rsa /etc/hadoop/core-site.xml root@${WORKERADDRESS}:/etc/hadoop/"
         
+        echo "updating mapred-site.xml on ${WORKERADDRESS}"
+        ssh -n -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP} "scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o IdentityFile=/root/.ssh/id_rsa /etc/hadoop/mapred-site.xml root@${WORKERADDRESS}:/etc/hadoop/"
+        
         echo "starting datanode on ${WORKERADDRESS}"
         ssh -n -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${WORKERADDRESS} "service hadoop-datanode start"
     
diff --git a/faunus-0.4.4/faunus-base/files/configure_faunus.sh b/faunus-0.4.4/faunus-base/files/configure_faunus.sh
index b3c376f..17db764 100644
--- a/faunus-0.4.4/faunus-base/files/configure_faunus.sh
+++ b/faunus-0.4.4/faunus-base/files/configure_faunus.sh
@@ -22,12 +22,6 @@ function deploy_faunus_files() {
 
 function configure_faunus() {
     configure_hadoop $1
-
-    # Copy mapred.xml to hadoop conf dir
-    cp /root/faunus_files/mapred.xml /etc/hadoop/mapred.xml
-
-    # Update JobTracker IP
-    sed -i "s/@IP@/$1/g" $HBASE_HOME/conf/mapred.xml
 }
 
 function prepare_faunus() {
diff --git a/faunus-0.4.4/faunus-base/files/mapred.xml b/faunus-0.4.4/faunus-base/files/mapred.xml
deleted file mode 100644
index e76c85a..0000000
--- a/faunus-0.4.4/faunus-base/files/mapred.xml
+++ /dev/null
@@ -1,947 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-  hadoop.job.history.location
-  
-   If job tracker is static the history files are stored 
-  in this single well known place. If No value is set here, by default,
-  it is in the local file system at ${hadoop.log.dir}/history.
-  
-
-
-
-  hadoop.job.history.user.location
-  
-   User can specify a location to store the history files of 
-  a particular job. If nothing is specified, the logs are stored in 
-  output directory. The files are stored in "_logs/history/" in the directory.
-  User can stop logging by giving the value "none". 
-  
-
-
-
-  mapred.job.tracker.history.completed.location
-  
-   The completed job history files are stored at this single well 
-  known location. If nothing is specified, the files are stored at 
-  ${hadoop.job.history.location}/done.
-  
-
-
-
-
-
-  io.sort.factor
-  10
-  The number of streams to merge at once while sorting
-  files.  This determines the number of open file handles.
-
-
-
-  io.sort.mb
-  100
-  The total amount of buffer memory to use while sorting 
-  files, in megabytes.  By default, gives each merge stream 1MB, which
-  should minimize seeks.
-
-
-
-  io.sort.record.percent
-  0.05
-  The percentage of io.sort.mb dedicated to tracking record
-  boundaries. Let this value be r, io.sort.mb be x. The maximum number
-  of records collected before the collection thread must block is equal
-  to (r * x) / 4
-
-
-
-  io.sort.spill.percent
-  0.80
-  The soft limit in either the buffer or record collection
-  buffers. Once reached, a thread will begin to spill the contents to disk
-  in the background. Note that this does not imply any chunking of data to
-  the spill. A value less than 0.5 is not recommended.
-
-
-
-  io.map.index.skip
-  0
-  Number of index entries to skip between each entry.
-  Zero by default. Setting this to values larger than zero can
-  facilitate opening large map files using less memory.
-
-
-
-  mapred.job.tracker
-  @IP@
-  The host and port that the MapReduce job tracker runs
-  at.  If "local", then jobs are run in-process as a single map
-  and reduce task.
-  
-
-
-
-  mapred.job.tracker.http.address
-  0.0.0.0:50030
-  
-    The job tracker http server address and port the server will listen on.
-    If the port is 0 then the server will start on a free port.
-  
-
-
-
-  mapred.job.tracker.handler.count
-  10
-  
-    The number of server threads for the JobTracker. This should be roughly
-    4% of the number of tasktracker nodes.
-  
-
-
-
-  mapred.task.tracker.report.address
-  127.0.0.1:0
-  The interface and port that task tracker server listens on. 
-  Since it is only connected to by the tasks, it uses the local interface.
-  EXPERT ONLY. Should only be changed if your host does not have the loopback 
-  interface.
-
-
-
-  mapred.local.dir
-  ${hadoop.tmp.dir}/mapred/local
-  The local directory where MapReduce stores intermediate
-  data files.  May be a comma-separated list of
-  directories on different devices in order to spread disk i/o.
-  Directories that do not exist are ignored.
-  
-
-
-
-  mapred.system.dir
-  ${hadoop.tmp.dir}/mapred/system
-  The shared directory where MapReduce stores control files.
-  
-
-
-
-  mapred.temp.dir
-  ${hadoop.tmp.dir}/mapred/temp
-  A shared directory for temporary files.
-  
-
-
-
-  mapred.local.dir.minspacestart
-  0
-  If the space in mapred.local.dir drops under this, 
-  do not ask for more tasks.
-  Value in bytes.
-  
-
-
-
-  mapred.local.dir.minspacekill
-  0
-  If the space in mapred.local.dir drops under this, 
-    do not ask more tasks until all the current ones have finished and 
-    cleaned up. Also, to save the rest of the tasks we have running, 
-    kill one of them, to clean up some space. Start with the reduce tasks,
-    then go with the ones that have finished the least.
-    Value in bytes.
-  
-
-
-
-  mapred.tasktracker.expiry.interval
-  600000
-  Expert: The time-interval, in miliseconds, after which
-  a tasktracker is declared 'lost' if it doesn't send heartbeats.
-  
-
-
-
-  mapred.tasktracker.instrumentation
-  org.apache.hadoop.mapred.TaskTrackerMetricsInst
-  Expert: The instrumentation class to associate with each TaskTracker.
-  
-
-
-
-  mapred.tasktracker.memory_calculator_plugin
-  
-  
-   Name of the class whose instance will be used to query memory information
-   on the tasktracker.
-   
-   The class must be an instance of 
-   org.apache.hadoop.util.MemoryCalculatorPlugin. If the value is null, the
-   tasktracker attempts to use a class appropriate to the platform. 
-   Currently, the only platform supported is Linux.
-  
-
-
-
-  mapred.tasktracker.taskmemorymanager.monitoring-interval
-  5000
-  The interval, in milliseconds, for which the tasktracker waits
-   between two cycles of monitoring its tasks' memory usage. Used only if
-   tasks' memory management is enabled via mapred.tasktracker.tasks.maxmemory.
-   
-
-
-
-  mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill
-  5000
-  The time, in milliseconds, the tasktracker waits for sending a
-  SIGKILL to a process that has overrun memory limits, after it has been sent
-  a SIGTERM. Used only if tasks' memory management is enabled via
-  mapred.tasktracker.tasks.maxmemory.
-
-
-
-  mapred.map.tasks
-  2
-  The default number of map tasks per job.
-  Ignored when mapred.job.tracker is "local".  
-  
-
-
-
-  mapred.reduce.tasks
-  1
-  The default number of reduce tasks per job. Typically set to 99%
-  of the cluster's reduce capacity, so that if a node fails the reduces can 
-  still be executed in a single wave.
-  Ignored when mapred.job.tracker is "local".
-  
-
-
-
-  mapred.jobtracker.restart.recover
-  false
-  "true" to enable (job) recovery upon restart,
-               "false" to start afresh
-  
-
-
-
-  mapred.jobtracker.job.history.block.size
-  3145728
-  The block size of the job history file. Since the job recovery
-               uses job history, its important to dump job history to disk as 
-               soon as possible. Note that this is an expert level parameter.
-               The default value is set to 3 MB.
-  
-
-
-
-  mapred.jobtracker.taskScheduler
-  org.apache.hadoop.mapred.JobQueueTaskScheduler
-  The class responsible for scheduling the tasks.
-
-
-
-  mapred.jobtracker.taskScheduler.maxRunningTasksPerJob
-  
-  The maximum number of running tasks for a job before
-  it gets preempted. No limits if undefined.
-  
-
-
-
-  mapred.map.max.attempts
-  4
-  Expert: The maximum number of attempts per map task.
-  In other words, framework will try to execute a map task these many number
-  of times before giving up on it.
-  
-
-
-
-  mapred.reduce.max.attempts
-  4
-  Expert: The maximum number of attempts per reduce task.
-  In other words, framework will try to execute a reduce task these many number
-  of times before giving up on it.
-  
-
-
-
-  mapred.reduce.parallel.copies
-  5
-  The default number of parallel transfers run by reduce
-  during the copy(shuffle) phase.
-  
-
-
-
-  mapred.reduce.copy.backoff
-  300
-  The maximum amount of time (in seconds) a reducer spends on 
-  fetching one map output before declaring it as failed.
-  
-
-
-
-  mapred.task.timeout
-  600000
-  The number of milliseconds before a task will be
-  terminated if it neither reads an input, writes an output, nor
-  updates its status string.
-  
-
-
-
-  mapred.tasktracker.map.tasks.maximum
-  2
-  The maximum number of map tasks that will be run
-  simultaneously by a task tracker.
-  
-
-
-
-  mapred.tasktracker.reduce.tasks.maximum
-  2
-  The maximum number of reduce tasks that will be run
-  simultaneously by a task tracker.
-  
-
-
-
-  mapred.jobtracker.completeuserjobs.maximum
-  100
-  The maximum number of complete jobs per user to keep around 
-  before delegating them to the job history.
-
-
-
-  mapred.jobtracker.instrumentation
-  org.apache.hadoop.mapred.JobTrackerMetricsInst
-  Expert: The instrumentation class to associate with each JobTracker.
-  
-
-
-
-  mapred.jobtracker.plugins
-  
-  Comma-separated list of jobtracker plug-ins to be activated.
-  
-
-
-
-  mapred.child.java.opts
-  -Xmx200m
-  Java opts for the task tracker child processes.  
-  The following symbol, if present, will be interpolated: @taskid@ is replaced 
-  by current TaskID. Any other occurrences of '@' will go unchanged.
-  For example, to enable verbose gc logging to a file named for the taskid in
-  /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
-        -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
-  
-  The configuration variable mapred.child.ulimit can be used to control the
-  maximum virtual memory of the child processes. 
-  
-
-
-
-  mapred.child.env
-  
-  User added environment variables for the task tracker child 
-  processes. Example :
-  1) A=foo  This will set the env variable A to foo
-  2) B=$B:c This is inherit tasktracker's B env variable.  
-  
-
-
-
-  mapred.child.ulimit
-  
-  The maximum virtual memory, in KB, of a process launched by the 
-  Map-Reduce framework. This can be used to control both the Mapper/Reducer 
-  tasks and applications using Hadoop Pipes, Hadoop Streaming etc. 
-  By default it is left unspecified to let cluster admins control it via 
-  limits.conf and other such relevant mechanisms.
-  
-  Note: mapred.child.ulimit must be greater than or equal to the -Xmx passed to
-  JavaVM, else the VM might not start. 
-  
-
-
-
-  mapred.child.tmp
-  ./tmp
-   To set the value of tmp directory for map and reduce tasks.
-  If the value is an absolute path, it is directly assigned. Otherwise, it is
-  prepended with task's working directory. The java tasks are executed with
-  option -Djava.io.tmpdir='the absolute path of the tmp dir'. Pipes and
-  streaming are set with environment variable,
-   TMPDIR='the absolute path of the tmp dir'
-  
-
-
-
-  mapred.inmem.merge.threshold
-  1000
-  The threshold, in terms of the number of files 
-  for the in-memory merge process. When we accumulate threshold number of files
-  we initiate the in-memory merge and spill to disk. A value of 0 or less than
-  0 indicates we want to DON'T have any threshold and instead depend only on
-  the ramfs's memory consumption to trigger the merge.
-  
-
-
-
-  mapred.job.shuffle.merge.percent
-  0.66
-  The usage threshold at which an in-memory merge will be
-  initiated, expressed as a percentage of the total memory allocated to
-  storing in-memory map outputs, as defined by
-  mapred.job.shuffle.input.buffer.percent.
-  
-
-
-
-  mapred.job.shuffle.input.buffer.percent
-  0.70
-  The percentage of memory to be allocated from the maximum heap
-  size to storing map outputs during the shuffle.
-  
-
-
-
-  mapred.job.reduce.input.buffer.percent
-  0.0
-  The percentage of memory- relative to the maximum heap size- to
-  retain map outputs during the reduce. When the shuffle is concluded, any
-  remaining map outputs in memory must consume less than this threshold before
-  the reduce can begin.
-  
-
-
-
-  mapred.map.tasks.speculative.execution
-  true
-  If true, then multiple instances of some map tasks 
-               may be executed in parallel.
-
-
-
-  mapred.reduce.tasks.speculative.execution
-  true
-  If true, then multiple instances of some reduce tasks 
-               may be executed in parallel.
-
-
-
-  mapred.job.reuse.jvm.num.tasks
-  1
-  How many tasks to run per jvm. If set to -1, there is
-  no limit. 
-  
-
-
-
-  mapred.min.split.size
-  0
-  The minimum size chunk that map input should be split
-  into.  Note that some file formats may have minimum split sizes that
-  take priority over this setting.
-
-
-
-  mapred.jobtracker.maxtasks.per.job
-  -1
-  The maximum number of tasks for a single job.
-  A value of -1 indicates that there is no maximum.  
-
-
-
-  mapred.submit.replication
-  10
-  The replication level for submitted job files.  This
-  should be around the square root of the number of nodes.
-  
-
-
-
-
-  mapred.tasktracker.dns.interface
-  default
-  The name of the Network Interface from which a task
-  tracker should report its IP address.
-  
- 
- 
-
-  mapred.tasktracker.dns.nameserver
-  default
-  The host name or IP address of the name server (DNS)
-  which a TaskTracker should use to determine the host name used by
-  the JobTracker for communication and display purposes.
-  
- 
- 
-
-  tasktracker.http.threads
-  40
-  The number of worker threads that for the http server. This is
-               used for map output fetching
-  
-
-
-
-  mapred.task.tracker.http.address
-  0.0.0.0:50060
-  
-    The task tracker http server address and port.
-    If the port is 0 then the server will start on a free port.
-  
-
-
-
-  keep.failed.task.files
-  false
-  Should the files for failed tasks be kept. This should only be 
-               used on jobs that are failing, because the storage is never
-               reclaimed. It also prevents the map outputs from being erased
-               from the reduce directory as they are consumed.
-
-
-
-
-
-
-  mapred.output.compress
-  false
-  Should the job outputs be compressed?
-  
-
-
-
-  mapred.output.compression.type
-  RECORD
-  If the job outputs are to compressed as SequenceFiles, how should
-               they be compressed? Should be one of NONE, RECORD or BLOCK.
-  
-
-
-
-  mapred.output.compression.codec
-  org.apache.hadoop.io.compress.DefaultCodec
-  If the job outputs are compressed, how should they be compressed?
-  
-
-
-
-  mapred.compress.map.output
-  false
-  Should the outputs of the maps be compressed before being
-               sent across the network. Uses SequenceFile compression.
-  
-
-
-
-  mapred.map.output.compression.codec
-  org.apache.hadoop.io.compress.DefaultCodec
-  If the map outputs are compressed, how should they be 
-               compressed?
-  
-
-
-
-  map.sort.class
-  org.apache.hadoop.util.QuickSort
-  The default sort class for sorting keys.
-  
-
-
-
-  mapred.userlog.limit.kb
-  0
-  The maximum size of user-logs of each task in KB. 0 disables the cap.
-  
-
-
-
-  mapred.userlog.retain.hours
-  24
-  The maximum time, in hours, for which the user-logs are to be 
-          retained.
-  
-
-
-
-  mapred.hosts
-  
-  Names a file that contains the list of nodes that may
-  connect to the jobtracker.  If the value is empty, all hosts are
-  permitted.
-
-
-
-  mapred.hosts.exclude
-  
-  Names a file that contains the list of hosts that
-  should be excluded by the jobtracker.  If the value is empty, no
-  hosts are excluded.
-
-
-
-  mapred.max.tracker.blacklists
-  4
-  The number of blacklists for a taskTracker by various jobs
-               after which the task tracker could be blacklisted across
-               all jobs. The tracker will be given a tasks later
-               (after a day). The tracker will become a healthy
-               tracker after a restart.
-  
- 
-
-
-  mapred.max.tracker.failures
-  4
-  The number of task-failures on a tasktracker of a given job 
-               after which new tasks of that job aren't assigned to it.
-  
-
-
-
-  jobclient.output.filter
-  FAILED
-  The filter for controlling the output of the task's userlogs sent
-               to the console of the JobClient. 
-               The permissible options are: NONE, KILLED, FAILED, SUCCEEDED and 
-               ALL.
-  
-
-
-  
-    jobclient.completion.poll.interval
-    5000
-    The interval (in milliseconds) between which the JobClient
-    polls the JobTracker for updates about job status. You may want to set this
-    to a lower value to make tests run faster on a single node system. Adjusting
-    this value in production may lead to unwanted client-server traffic.
-    
-  
-
-  
-    jobclient.progress.monitor.poll.interval
-    1000
-    The interval (in milliseconds) between which the JobClient
-    reports status to the console and checks for job completion. You may want to set this
-    to a lower value to make tests run faster on a single node system. Adjusting
-    this value in production may lead to unwanted client-server traffic.
-    
-  
-
-  
-    mapred.job.tracker.persist.jobstatus.active
-    false
-    Indicates if persistency of job status information is
-      active or not.
-    
-  
-
-  
-  mapred.job.tracker.persist.jobstatus.hours
-  0
-  The number of hours job status information is persisted in DFS.
-    The job status information will be available after it drops of the memory
-    queue and between jobtracker restarts. With a zero value the job status
-    information is not persisted at all in DFS.
-  
-
-
-  
-    mapred.job.tracker.persist.jobstatus.dir
-    /jobtracker/jobsInfo
-    The directory where the job status information is persisted
-      in a file system to be available after it drops of the memory queue and
-      between jobtracker restarts.
-    
-  
-
-  
-    mapred.task.profile
-    false
-    To set whether the system should collect profiler
-     information for some of the tasks in this job? The information is stored
-     in the user log directory. The value is "true" if task profiling
-     is enabled.
-  
-
-  
-    mapred.task.profile.maps
-    0-2
-     To set the ranges of map tasks to profile.
-    mapred.task.profile has to be set to true for the value to be accounted.
-    
-  
-
-  
-    mapred.task.profile.reduces
-    0-2
-     To set the ranges of reduce tasks to profile.
-    mapred.task.profile has to be set to true for the value to be accounted.
-    
-  
-
-  
-    mapred.line.input.format.linespermap
-    1
-     Number of lines per split in NLineInputFormat.
-    
-  
-  
-  
-    mapred.skip.attempts.to.start.skipping
-    2
-     The number of Task attempts AFTER which skip mode 
-    will be kicked off. When skip mode is kicked off, the 
-    tasks reports the range of records which it will process 
-    next, to the TaskTracker. So that on failures, TT knows which 
-    ones are possibly the bad records. On further executions, 
-    those are skipped.
-    
-  
-  
-  
-    mapred.skip.map.auto.incr.proc.count
-    true
-     The flag which if set to true, 
-    SkipBadRecords.COUNTER_MAP_PROCESSED_RECORDS is incremented 
-    by MapRunner after invoking the map function. This value must be set to 
-    false for applications which process the records asynchronously 
-    or buffer the input records. For example streaming. 
-    In such cases applications should increment this counter on their own.
-    
-  
-  
-  
-    mapred.skip.reduce.auto.incr.proc.count
-    true
-     The flag which if set to true, 
-    SkipBadRecords.COUNTER_REDUCE_PROCESSED_GROUPS is incremented 
-    by framework after invoking the reduce function. This value must be set to 
-    false for applications which process the records asynchronously 
-    or buffer the input records. For example streaming. 
-    In such cases applications should increment this counter on their own.
-    
-  
-  
-  
-    mapred.skip.out.dir
-    
-     If no value is specified here, the skipped records are 
-    written to the output directory at _logs/skip.
-    User can stop writing skipped records by giving the value "none". 
-    
-  
-
-  
-    mapred.skip.map.max.skip.records
-    0
-     The number of acceptable skip records surrounding the bad 
-    record PER bad record in mapper. The number includes the bad record as well.
-    To turn the feature of detection/skipping of bad records off, set the 
-    value to 0.
-    The framework tries to narrow down the skipped range by retrying  
-    until this threshold is met OR all attempts get exhausted for this task. 
-    Set the value to Long.MAX_VALUE to indicate that framework need not try to 
-    narrow down. Whatever records(depends on application) get skipped are 
-    acceptable.
-    
-  
-  
-  
-    mapred.skip.reduce.max.skip.groups
-    0
-     The number of acceptable skip groups surrounding the bad 
-    group PER bad group in reducer. The number includes the bad group as well.
-    To turn the feature of detection/skipping of bad groups off, set the 
-    value to 0.
-    The framework tries to narrow down the skipped range by retrying  
-    until this threshold is met OR all attempts get exhausted for this task. 
-    Set the value to Long.MAX_VALUE to indicate that framework need not try to 
-    narrow down. Whatever groups(depends on application) get skipped are 
-    acceptable.
-    
-  
-  
-
-
-
-
-
-  job.end.retry.attempts
-  0
-  Indicates how many times hadoop should attempt to contact the
-               notification URL 
-
-
-
-  job.end.retry.interval
-   30000
-   Indicates time in milliseconds between notification URL retry
-                calls
-
-  
-
-
-  hadoop.rpc.socket.factory.class.JobSubmissionProtocol
-  
-   SocketFactory to use to connect to a Map/Reduce master
-    (JobTracker). If null or empty, then use hadoop.rpc.socket.class.default.
-  
-
-
-
-  mapred.task.cache.levels
-  2
-   This is the max level of the task cache. For example, if
-    the level is 2, the tasks cached are at the host level and at the rack
-    level.
-  
-
-
-
-  mapred.queue.names
-  default
-   Comma separated list of queues configured for this jobtracker.
-    Jobs are added to queues and schedulers can configure different 
-    scheduling properties for the various queues. To configure a property 
-    for a queue, the name of the queue must match the name specified in this 
-    value. Queue properties that are common to all schedulers are configured 
-    here with the naming convention, mapred.queue.$QUEUE-NAME.$PROPERTY-NAME,
-    for e.g. mapred.queue.default.submit-job-acl.
-    The number of queues configured in this parameter could depend on the
-    type of scheduler being used, as specified in 
-    mapred.jobtracker.taskScheduler. For example, the JobQueueTaskScheduler
-    supports only a single queue, which is the default configured here.
-    Before adding more queues, ensure that the scheduler you've configured
-    supports multiple queues.
-  
-
-
-
-  mapred.acls.enabled
-  false
-   Specifies whether ACLs are enabled, and should be checked
-    for various operations.
-  
-
-
-
-  mapred.queue.default.acl-submit-job
-  *
-   Comma separated list of user and group names that are allowed
-    to submit jobs to the 'default' queue. The user list and the group list
-    are separated by a blank. For e.g. alice,bob group1,group2. 
-    If set to the special value '*', it means all users are allowed to 
-    submit jobs. 
-  
-
-
-
-  mapred.queue.default.acl-administer-jobs
-  *
-   Comma separated list of user and group names that are allowed
-    to delete jobs or modify job's priority for jobs not owned by the current
-    user in the 'default' queue. The user list and the group list
-    are separated by a blank. For e.g. alice,bob group1,group2. 
-    If set to the special value '*', it means all users are allowed to do 
-    this operation.
-  
-
-
-
-  mapred.job.queue.name
-  default
-   Queue to which a job is submitted. This must match one of the
-    queues defined in mapred.queue.names for the system. Also, the ACL setup
-    for the queue must allow the current user to submit a job to the queue.
-    Before specifying a queue, ensure that the system is configured with 
-    the queue, and access is allowed for submitting jobs to the queue.
-  
-
-
-
-  mapred.tasktracker.indexcache.mb
-  10
-   The maximum memory that a task tracker allows for the 
-    index cache that is used when serving map outputs to reducers.
-  
-
-
-
-  mapred.merge.recordsBeforeProgress
-  10000
-   The number of records to process during merge before
-   sending a progress notification to the TaskTracker.
-  
-
-
-
-  mapred.reduce.slowstart.completed.maps
-  0.05
-  Fraction of the number of maps in the job which should be 
-  complete before reduces are scheduled for the job. 
-  
-
-
-
-  mapred.max.maps.per.node
-  -1
-  Per-node limit on running map tasks for the job. A value
-    of -1 signifies no limit.
-
-
-
-  mapred.max.reduces.per.node
-  -1
-  Per-node limit on running reduce tasks for the job. A value
-    of -1 signifies no limit.
-
-
-
-  mapred.running.map.limit
-  -1
-  Cluster-wide limit on running map tasks for the job. A value
-    of -1 signifies no limit.
-
-
-
-  mapred.running.reduce.limit
-  -1
-  Cluster-wide limit on running reduce tasks for the job. A value
-    of -1 signifies no limit.
-
-
-
diff --git a/faunus-0.4.4/faunus-master/files/default_cmd b/faunus-0.4.4/faunus-master/files/default_cmd
index 87ce8a2..56e55a8 100644
--- a/faunus-0.4.4/faunus-master/files/default_cmd
+++ b/faunus-0.4.4/faunus-master/files/default_cmd
@@ -14,6 +14,12 @@ echo "starting Hadoop Namenode"
 sudo -u hdfs hadoop namenode -format > /dev/null 2>&1
 service hadoop-namenode start > /dev/null 2>&1
 
+# Copy mapred.xml to hadoop conf dir
+cp /root/faunus_master_files/mapred-site.xml /etc/hadoop/mapred-site.xml
+
+# Update JobTracker IP
+sed -i "s/@IP@/${1}:50030/g" $HBASE_HOME/conf/mapred-site.xml
+
 echo "starting Hadoop Jobtracker"
 service hadoop-jobtracker start > /dev/null 2>&1 
 
diff --git a/faunus-0.4.4/faunus-master/files/mapred-site.xml b/faunus-0.4.4/faunus-master/files/mapred-site.xml
new file mode 100644
index 0000000..ac5de7d
--- /dev/null
+++ b/faunus-0.4.4/faunus-master/files/mapred-site.xml
@@ -0,0 +1,19 @@
+
+
+
+
+
+
+
+
+
+
+  mapred.job.tracker
+  @IP@
+  The host and port that the MapReduce job tracker runs
+  at.  If "local", then jobs are run in-process as a single map
+  and reduce task.
+  
+
+
+

From bc082652fe189a7ae059b47a1116486cf3b2301a Mon Sep 17 00:00:00 2001
From: htaox 
Date: Sat, 24 May 2014 20:25:49 +0100
Subject: [PATCH 68/97] Change permission for mapred.system.dir

---
 faunus-0.4.4/faunus-master/files/default_cmd | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/faunus-0.4.4/faunus-master/files/default_cmd b/faunus-0.4.4/faunus-master/files/default_cmd
index 56e55a8..d8f1809 100644
--- a/faunus-0.4.4/faunus-master/files/default_cmd
+++ b/faunus-0.4.4/faunus-master/files/default_cmd
@@ -18,7 +18,11 @@ service hadoop-namenode start > /dev/null 2>&1
 cp /root/faunus_master_files/mapred-site.xml /etc/hadoop/mapred-site.xml
 
 # Update JobTracker IP
-sed -i "s/@IP@/${1}:50030/g" $HBASE_HOME/conf/mapred-site.xml
+sed -i "s/@IP@/${IP}:9001/g" /etc/hadoop/mapred-site.xml
+
+# change user permissions for default mapred.system.dir
+sudo -u hdfs hadoop fs -mkdir /tmp
+sudo -u hdfs hadoop fs -chmod 777 /tmp
 
 echo "starting Hadoop Jobtracker"
 service hadoop-jobtracker start > /dev/null 2>&1 

From fac7238159dfcf213780751990dec0f28a88ac2c Mon Sep 17 00:00:00 2001
From: htaox 
Date: Sat, 24 May 2014 20:56:23 +0100
Subject: [PATCH 69/97] Increase map reduce tasks allowed

---
 .../faunus-master/files/mapred-site.xml        | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/faunus-0.4.4/faunus-master/files/mapred-site.xml b/faunus-0.4.4/faunus-master/files/mapred-site.xml
index ac5de7d..5ab8bc1 100644
--- a/faunus-0.4.4/faunus-master/files/mapred-site.xml
+++ b/faunus-0.4.4/faunus-master/files/mapred-site.xml
@@ -16,4 +16,22 @@
   
 
 
+
+  mapred.map.tasks
+  8
+  The default number of map tasks per job.
+  Ignored when mapred.job.tracker is "local".  
+  
+
+
+
+  mapred.reduce.tasks
+  4
+  The default number of reduce tasks per job. Typically set to 99%
+  of the cluster's reduce capacity, so that if a node fails the reduces can 
+  still be executed in a single wave.
+  Ignored when mapred.job.tracker is "local".
+  
+
+
 

From c4931ad6c9c582760ff3147284e4e4e26c773218 Mon Sep 17 00:00:00 2001
From: htaox 
Date: Sun, 25 May 2014 20:53:01 +0100
Subject: [PATCH 70/97] Increase JVM heap size

---
 faunus-0.4.4/faunus-master/files/default_cmd   | 11 +++++++----
 .../faunus-master/files/mapred-site.xml        | 18 ------------------
 faunus-0.4.4/faunus-worker/files/default_cmd   |  3 +++
 3 files changed, 10 insertions(+), 22 deletions(-)

diff --git a/faunus-0.4.4/faunus-master/files/default_cmd b/faunus-0.4.4/faunus-master/files/default_cmd
index d8f1809..bbe4440 100644
--- a/faunus-0.4.4/faunus-master/files/default_cmd
+++ b/faunus-0.4.4/faunus-master/files/default_cmd
@@ -10,16 +10,19 @@ echo "MASTER_IP=$IP"
 echo "preparing Faunus"
 prepare_faunus $IP
 
-echo "starting Hadoop Namenode"
-sudo -u hdfs hadoop namenode -format > /dev/null 2>&1
-service hadoop-namenode start > /dev/null 2>&1
-
 # Copy mapred.xml to hadoop conf dir
 cp /root/faunus_master_files/mapred-site.xml /etc/hadoop/mapred-site.xml
 
 # Update JobTracker IP
 sed -i "s/@IP@/${IP}:9001/g" /etc/hadoop/mapred-site.xml
 
+# Increase JVM HEAP SIZE
+sed -i 's/-Xmx128m/-Xmx1024m/g' /etc/hadoop/hadoop-env.sh 
+
+echo "starting Hadoop Namenode"
+sudo -u hdfs hadoop namenode -format > /dev/null 2>&1
+service hadoop-namenode start > /dev/null 2>&1
+
 # change user permissions for default mapred.system.dir
 sudo -u hdfs hadoop fs -mkdir /tmp
 sudo -u hdfs hadoop fs -chmod 777 /tmp
diff --git a/faunus-0.4.4/faunus-master/files/mapred-site.xml b/faunus-0.4.4/faunus-master/files/mapred-site.xml
index 5ab8bc1..ac5de7d 100644
--- a/faunus-0.4.4/faunus-master/files/mapred-site.xml
+++ b/faunus-0.4.4/faunus-master/files/mapred-site.xml
@@ -16,22 +16,4 @@
   
 
 
-
-  mapred.map.tasks
-  8
-  The default number of map tasks per job.
-  Ignored when mapred.job.tracker is "local".  
-  
-
-
-
-  mapred.reduce.tasks
-  4
-  The default number of reduce tasks per job. Typically set to 99%
-  of the cluster's reduce capacity, so that if a node fails the reduces can 
-  still be executed in a single wave.
-  Ignored when mapred.job.tracker is "local".
-  
-
-
 
diff --git a/faunus-0.4.4/faunus-worker/files/default_cmd b/faunus-0.4.4/faunus-worker/files/default_cmd
index 4a17f3f..1277fc2 100644
--- a/faunus-0.4.4/faunus-worker/files/default_cmd
+++ b/faunus-0.4.4/faunus-worker/files/default_cmd
@@ -10,6 +10,9 @@ echo "WORKER_IP=$IP"
 echo "preparing HBase"
 prepare_faunus $IP
 
+# Increase JVM HEAP SIZE
+sed -i 's/-Xmx128m/-Xmx2048m/g' /etc/hadoop/hadoop-env.sh
+
 #echo "starting Hadoop Datanode"
 #service hadoop-datanode start
 

From d186067f8d44ff72ac16caa23e5d9db9bf94996d Mon Sep 17 00:00:00 2001
From: htaox 
Date: Sun, 1 Jun 2014 12:27:04 +0100
Subject: [PATCH 71/97] Add readme for faunus

---
 README-faunus.md                             | 50 ++++++++++++++++++++
 faunus-0.4.4/faunus-worker/files/default_cmd |  2 +
 2 files changed, 52 insertions(+)
 create mode 100644 README-faunus.md

diff --git a/README-faunus.md b/README-faunus.md
new file mode 100644
index 0000000..b8dac5b
--- /dev/null
+++ b/README-faunus.md
@@ -0,0 +1,50 @@
+#### Deploy the Faunus distributed cluster
+
+
+$ NUMBER_OF_DATANODES=3
+$ sudo deploy/deploy_faunus.sh -i htaox/faunus:0.4.4 -w $NUMBER_OF_DATANODES
+
+ +This will (typically) result in the following setup: + +
+NAMESERVER                 10.1.0.3
+FAUNUS MASTER       10.1.0.4
+FAUNUS DATANODE     10.1.0.5
+FAUNUS DATANODE     10.1.0.6
+FAUNUS DATANODE     10.1.0.7
+
+ +#### Kill the Faunus cluster + +
+$ sudo deploy/kill_all.sh faunus
+$ sudo deploy/kill_all.sh nameserver
+
+ +#### After Faunus cluster is killed, cleanup +
+$ sudo docker rm `sudo docker ps -a -q`
+$ sudo docker images | grep "" | awk '{print $3}' | xargs sudo docker rmi
+
+ +#### Build locally + +__Download the scripts__ +
+$ git clone -b add-faunus https://github.com/htaox/docker-scripts.git
+
+ +__Change file permissions__ +
    
+$ cd ~/docker-scripts
+$ chmod a+x build/build_all_faunus.sh
+$ chmod a+x faunus-0.4.4/build
+$ chmod a+x deploy/deploy_faunus.sh
+
+ +__Build__ +
    
+$ sudo build/build_all_faunus.sh
+
+ diff --git a/faunus-0.4.4/faunus-worker/files/default_cmd b/faunus-0.4.4/faunus-worker/files/default_cmd index 1277fc2..ee03e17 100644 --- a/faunus-0.4.4/faunus-worker/files/default_cmd +++ b/faunus-0.4.4/faunus-worker/files/default_cmd @@ -13,6 +13,8 @@ prepare_faunus $IP # Increase JVM HEAP SIZE sed -i 's/-Xmx128m/-Xmx2048m/g' /etc/hadoop/hadoop-env.sh +echo 'export HADOOP_HEAPSIZE=2048' >> /etc/hadoop/hadoop-env.sh + #echo "starting Hadoop Datanode" #service hadoop-datanode start From bb37606e1ef388fb0a2128494fdbc537ea3a96fb Mon Sep 17 00:00:00 2001 From: htaox Date: Sun, 1 Jun 2014 12:29:04 +0100 Subject: [PATCH 72/97] Reference readme for faunus --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 2b7307b..17a031a 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,9 @@ For building & running HBase on top of a Hadoop cluster, please check out [READM #### Dockerfiles for distributed Elasticsearch setup For building & running an Elasticsearch cluster, please check out [README-elasticsearch.md](https://github.com/htaox/docker-scripts/blob/add-hbase/README-elasticsearch.md) +#### Dockerfiles for distributed Faunus setup +For building & running an Faunus cluster, please check out [README-faunus.md](https://github.com/htaox/docker-scripts/blob/add-hbase/README-faunus.md) + # Dockerfiles for Spark and Shark ## Contents From e5543651f8ea26c42e42107b4ba9de92b732b270 Mon Sep 17 00:00:00 2001 From: htaox Date: Sun, 8 Jun 2014 22:12:19 +0100 Subject: [PATCH 73/97] Wait 2 sec for zookeeper to start --- deploy/start_hbase_cluster.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/deploy/start_hbase_cluster.sh b/deploy/start_hbase_cluster.sh index a5e4f8d..afa9f77 100755 --- a/deploy/start_hbase_cluster.sh +++ b/deploy/start_hbase_cluster.sh @@ -119,6 +119,8 @@ function start_hbase { echo "starting zookeeper on ${MASTER_IP}" ssh -n -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP} "/usr/local/zookeeper/bin/zkServer.sh start" + sleep 2 + echo "starting hbase master on ${MASTER_IP}" ssh -n -i $BASEDIR/apache-hadoop-hdfs-precise/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP} "sudo -u hdfs /opt/hbase/bin/hbase-daemon.sh --config /opt/hbase/conf start master" From e7da18e530cf2a1b45aed0159281c5259cffe979 Mon Sep 17 00:00:00 2001 From: htaox Date: Tue, 9 Sep 2014 09:31:26 -0400 Subject: [PATCH 74/97] Base files should import htaox/apache-hadoop-hdfs-precise:1.2.1 --- apache-hadoop-hdfs-precise/Dockerfile | 6 ++- faunus-0.4.4/faunus-base/Dockerfile | 2 +- giraph-1.1.0/giraph-base/Dockerfile | 23 +++++++++ giraph-1.1.0/giraph-base/build | 4 ++ .../giraph-base/files/configure_giraph.sh | 33 +++++++++++++ .../giraph-base/files/log4j.properties | 8 ++++ giraph-1.1.0/giraph-master/Dockerfile | 7 +++ giraph-1.1.0/giraph-master/build | 4 ++ giraph-1.1.0/giraph-master/files/default_cmd | 24 ++++++++++ giraph-1.1.0/giraph-worker/Dockerfile | 9 ++++ giraph-1.1.0/giraph-worker/build | 4 ++ giraph-1.1.0/giraph-worker/files/default_cmd | 20 ++++++++ gremlin-server-3.0.0/Dockerfile | 20 ++++++++ gremlin-server-3.0.0/README.md | 48 +++++++++++++++++++ gremlin-server-3.0.0/files/default_cmd | 19 ++++++++ hbase-0.94.18/hbase-base/Dockerfile | 2 +- oracle-java8-hadoop/Dockerfile | 23 +++++++++ oracle-java8-hadoop/build | 5 ++ oracle-java8/Dockerfile | 17 +++++++ oracle-java8/build | 5 ++ 20 files changed, 279 insertions(+), 4 deletions(-) create mode 100755 giraph-1.1.0/giraph-base/Dockerfile create mode 100755 giraph-1.1.0/giraph-base/build create mode 100755 giraph-1.1.0/giraph-base/files/configure_giraph.sh create mode 100755 giraph-1.1.0/giraph-base/files/log4j.properties create mode 100755 giraph-1.1.0/giraph-master/Dockerfile create mode 100755 giraph-1.1.0/giraph-master/build create mode 100755 giraph-1.1.0/giraph-master/files/default_cmd create mode 100755 giraph-1.1.0/giraph-worker/Dockerfile create mode 100755 giraph-1.1.0/giraph-worker/build create mode 100755 giraph-1.1.0/giraph-worker/files/default_cmd create mode 100755 gremlin-server-3.0.0/Dockerfile create mode 100755 gremlin-server-3.0.0/README.md create mode 100755 gremlin-server-3.0.0/files/default_cmd create mode 100755 oracle-java8-hadoop/Dockerfile create mode 100755 oracle-java8-hadoop/build create mode 100755 oracle-java8/Dockerfile create mode 100755 oracle-java8/build diff --git a/apache-hadoop-hdfs-precise/Dockerfile b/apache-hadoop-hdfs-precise/Dockerfile index 3fa1d49..56de4ce 100644 --- a/apache-hadoop-hdfs-precise/Dockerfile +++ b/apache-hadoop-hdfs-precise/Dockerfile @@ -1,7 +1,9 @@ # Base Ubuntu Precise 12.04 LTS image -# +# Forked/Credit from amplab +# https://github.com/amplab/docker-scripts/tree/master/apache-hadoop-hdfs-precise +# Added Openjdk dependency for Linux 3.8.0-38-generic x86_64 FROM ubuntu:precise -MAINTAINER amplab amp-docker@eecs.berkeley.edu +MAINTAINER htaox htaox@hotmail.com # Setup a volume for data VOLUME ["/data"] diff --git a/faunus-0.4.4/faunus-base/Dockerfile b/faunus-0.4.4/faunus-base/Dockerfile index e347169..0c5af52 100644 --- a/faunus-0.4.4/faunus-base/Dockerfile +++ b/faunus-0.4.4/faunus-base/Dockerfile @@ -1,6 +1,6 @@ # FAUNUS 0.4.4 # -FROM apache-hadoop-hdfs-precise:1.2.1 +FROM htaox/apache-hadoop-hdfs-precise:1.2.1 MAINTAINER htaox htaox@hotmail.com # Download and Install HBase diff --git a/giraph-1.1.0/giraph-base/Dockerfile b/giraph-1.1.0/giraph-base/Dockerfile new file mode 100755 index 0000000..1e59a62 --- /dev/null +++ b/giraph-1.1.0/giraph-base/Dockerfile @@ -0,0 +1,23 @@ +# GIRAPH 1.1.0 +# +FROM htaox/oracle-java8-hadoop:1.2.1 +MAINTAINER htaox htaox@hotmail.com + +RUN cd /opt + +RUN git clone https://github.com/apache/giraph.git + +ENV GIRAPH_HOME /opt/giraph +ENV GIRAPH_VERSION 1.1.0 + +#Use jdk 8 to compile +ENV JAVA_HOME /usr/lib/jvm/java-8-oracle + +RUN cd giraph && mvn clean package -DskipTests + +# core JAR is in $GIRAPH_HOME/giraph-core/target/giraph-1.1.0-SNAPSHOT-for-hadoop-1.2.1-jar-with-dependencies.jar + +ADD files /root/giraph_files + + + diff --git a/giraph-1.1.0/giraph-base/build b/giraph-1.1.0/giraph-base/build new file mode 100755 index 0000000..f89dcfd --- /dev/null +++ b/giraph-1.1.0/giraph-base/build @@ -0,0 +1,4 @@ +rm -f files/files.hash +for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash +mv /tmp/files.hash files/files.hash +sudo docker build -t ${IMAGE_PREFIX}giraph-base:1.1.0 . diff --git a/giraph-1.1.0/giraph-base/files/configure_giraph.sh b/giraph-1.1.0/giraph-base/files/configure_giraph.sh new file mode 100755 index 0000000..38da873 --- /dev/null +++ b/giraph-1.1.0/giraph-base/files/configure_giraph.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +source /root/hadoop_files/configure_hadoop.sh + +function create_giraph_directories() { + create_hadoop_directories + rm -rf /opt/giraph-$GIRAPH_VERSION/work + mkdir -p /opt/giraph-$GIRAPH_VERSION/work + chown hdfs.hdfs /opt/giraph-$GIRAPH_VERSION/work + mkdir /tmp/giraph + chown hdfs.hdfs /tmp/giraph + # this one is for GIRAPH shell logging + rm -rf /var/lib/hadoop/hdfs + mkdir -p /var/lib/hadoop/hdfs + chown hdfs.hdfs /var/lib/hadoop/hdfs + rm -rf /opt/giraph-$GIRAPH_VERSION/logs + mkdir -p /opt/giraph-$GIRAPH_VERSION/logs + chown hdfs.hdfs /opt/giraph-$GIRAPH_VERSION/logs +} + +function deploy_giraph_files() { + deploy_hadoop_files +} + +function configure_giraph() { + configure_hadoop $1 +} + +function prepare_giraph() { + create_giraph_directories + deploy_giraph_files + configure_giraph $1 +} diff --git a/giraph-1.1.0/giraph-base/files/log4j.properties b/giraph-1.1.0/giraph-base/files/log4j.properties new file mode 100755 index 0000000..d72dbad --- /dev/null +++ b/giraph-1.1.0/giraph-base/files/log4j.properties @@ -0,0 +1,8 @@ +# Set everything to be logged to the console +log4j.rootCategory=INFO, console +log4j.appender.console=org.apache.log4j.ConsoleAppender +log4j.appender.console.layout=org.apache.log4j.PatternLayout +log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n + +# Ignore messages below warning level from Jetty, because it's a bit verbose +log4j.logger.org.eclipse.jetty=WARN diff --git a/giraph-1.1.0/giraph-master/Dockerfile b/giraph-1.1.0/giraph-master/Dockerfile new file mode 100755 index 0000000..68e1129 --- /dev/null +++ b/giraph-1.1.0/giraph-master/Dockerfile @@ -0,0 +1,7 @@ +# Giraph +FROM htaox/giraph-base:1.1.0 +MAINTAINER htaox htaox@hotmail.com + +ADD files /root/giraph_master_files +RUN chmod 700 /root/giraph_master_files/default_cmd +CMD ["/root/giraph_master_files/default_cmd"] diff --git a/giraph-1.1.0/giraph-master/build b/giraph-1.1.0/giraph-master/build new file mode 100755 index 0000000..42242e2 --- /dev/null +++ b/giraph-1.1.0/giraph-master/build @@ -0,0 +1,4 @@ +rm -f files/files.hash +for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash +mv /tmp/files.hash files/files.hash +sudo docker build -t ${IMAGE_PREFIX}giraph-master:1.1.0 . diff --git a/giraph-1.1.0/giraph-master/files/default_cmd b/giraph-1.1.0/giraph-master/files/default_cmd new file mode 100755 index 0000000..160bf37 --- /dev/null +++ b/giraph-1.1.0/giraph-master/files/default_cmd @@ -0,0 +1,24 @@ +#!/bin/bash + +env + +source /root/giraph_files/configure_giraph.sh + +IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }') +echo "MASTER_IP=$IP" + +echo "preparing Giraph" +prepare_giraph $IP + +echo "starting Hadoop Namenode" +sudo -u hdfs hadoop namenode -format > /dev/null 2>&1 +service hadoop-namenode start > /dev/null 2>&1 + +echo "starting sshd" +/usr/sbin/sshd + +while [ 1 ]; +do + tail -f "${GIRAPH_HOME}/logs"/*.out + sleep 1 +done diff --git a/giraph-1.1.0/giraph-worker/Dockerfile b/giraph-1.1.0/giraph-worker/Dockerfile new file mode 100755 index 0000000..b407b62 --- /dev/null +++ b/giraph-1.1.0/giraph-worker/Dockerfile @@ -0,0 +1,9 @@ +#Giraph +FROM htaox/giraph-base:1.1.0 +MAINTAINER htaox htaox@hotmail.com + +ADD files /root/giraph_worker_files +RUN chmod 700 /root/giraph_worker_files/default_cmd +# Add the entrypoint script for the master +CMD ["-h"] +ENTRYPOINT ["/root/giraph_worker_files/default_cmd"] diff --git a/giraph-1.1.0/giraph-worker/build b/giraph-1.1.0/giraph-worker/build new file mode 100755 index 0000000..b91aacd --- /dev/null +++ b/giraph-1.1.0/giraph-worker/build @@ -0,0 +1,4 @@ +rm -f files/files.hash +for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash +mv /tmp/files.hash files/files.hash +sudo docker build -t ${IMAGE_PREFIX}giraph-worker:1.1.0 . diff --git a/giraph-1.1.0/giraph-worker/files/default_cmd b/giraph-1.1.0/giraph-worker/files/default_cmd new file mode 100755 index 0000000..e60fde5 --- /dev/null +++ b/giraph-1.1.0/giraph-worker/files/default_cmd @@ -0,0 +1,20 @@ +#!/bin/bash + +source /root/giraph_files/configure_giraph.sh + +IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }') +echo "WORKER_IP=$IP" + +echo "preparing Giraph" +prepare_giraph $IP + +#Don't start Hadoop yet, need to change core-site.xml from master first + +echo "starting sshd" +/usr/sbin/sshd + +while [ 1 ]; +do + tail -f "${HBASE_HOME}/logs"/*.out + sleep 1 +done diff --git a/gremlin-server-3.0.0/Dockerfile b/gremlin-server-3.0.0/Dockerfile new file mode 100755 index 0000000..8eb2f5d --- /dev/null +++ b/gremlin-server-3.0.0/Dockerfile @@ -0,0 +1,20 @@ +# Base Ubuntu Precise 12.04 LTS image w/ Java 8 & Maven installed +# +FROM htaox/oracle-java8:latest +MAINTAINER htaox htaox@hotmail.com + +RUN git clone https://github.com/tinkerpop/tinkerpop3.git + +RUN cd tinkerpop3 && mvn clean package -DskipTests + +RUN cp tinkerpop3/gremlin-server/target/gremlin-server-3.0.0-SNAPSHOT-distribution.zip /opt + +RUN cd /opt && unzip gremlin-server-3.0.0-SNAPSHOT-distribution.zip + +RUN ln -s /opt/gremlin-server-3.0.0-SNAPSHOT /opt/gremlin-server && cd /opt/gremlin-server + +EXPOSE 8182 + +ADD files /root/gremlin_files +RUN chmod 700 /root/gremlin_files/default_cmd +CMD ["/root/gremlin_files/default_cmd"] diff --git a/gremlin-server-3.0.0/README.md b/gremlin-server-3.0.0/README.md new file mode 100755 index 0000000..c7b93d0 --- /dev/null +++ b/gremlin-server-3.0.0/README.md @@ -0,0 +1,48 @@ +The easiest way to test Tinkerpop3 Gremlin Server. +=================================================== + +Github REPO can be found [here](https://github.com/htaox/NEAT/tree/master/docker-scripts/gremlin-server-3.0.0). + +5 minute setup +* * * +Setup variables +============ +```hostname=gremlin-server +image_name=htaox/gremlin-server +image_version=3.0.0 +``` +Pull the image +=========== +```sudo docker pull $image_name:$image_version +``` +Test interactively +===================== +```sudo docker run --rm -i -t -h $hostname $image_name:$image_version /bin/bash +``` +Launch container as daemon +============================== +```sudo docker run -d -h $hostname $image_name:$image_version +``` +Get ip address of container +=============================== +```containers=($(sudo docker ps | grep gremlin-server | awk '{print $1}' | tr '\n' ' ')) +for i in "${containers[@]}"; do IP=$(sudo docker logs "$i" | grep ^IP=); done +``` +Assign the IP variable +========================== +```eval $IP +``` +Test websocket handshake +========================= +```curl -i -N -vv -H "Connection: Upgrade" -H "Upgrade: websocket" -H "Host: localhost" -H "Origin: http://localhost" -k "http://$IP:8182" +``` +Test SSH (ie change properties of gremlin-server.yaml, BTW, root password is "gremlin") +============ +```ssh root@$IP +example: scp ~/gremlin-server.yaml root@$IP:/opt/gremlin-server/config +``` +Kill containers +=================== +```containers=($(sudo docker ps | grep gremlin-server | awk '{print $1}' | tr '\n' ' ')) +for i in "${containers[@]}"; do sudo docker kill "$i"; done +``` \ No newline at end of file diff --git a/gremlin-server-3.0.0/files/default_cmd b/gremlin-server-3.0.0/files/default_cmd new file mode 100755 index 0000000..c1892a7 --- /dev/null +++ b/gremlin-server-3.0.0/files/default_cmd @@ -0,0 +1,19 @@ +#!/bin/bash + +env + +IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }') +echo "IP=$IP" + +sed -i "s|^host:.*|host: $IP|" /opt/gremlin-server/config/gremlin-server.yaml + +mkdir /var/run/sshd + +echo "changing root password" +echo 'root:gremlin' |chpasswd + +echo "starting sshd" +/usr/sbin/sshd + +echo "starting gremlin-server" +/opt/gremlin-server/bin/gremlin-server.sh /opt/gremlin-server/config/gremlin-server.yaml diff --git a/hbase-0.94.18/hbase-base/Dockerfile b/hbase-0.94.18/hbase-base/Dockerfile index f90c8fa..62a1697 100644 --- a/hbase-0.94.18/hbase-base/Dockerfile +++ b/hbase-0.94.18/hbase-base/Dockerfile @@ -1,6 +1,6 @@ # HBASE 0.94.18 # -FROM apache-hadoop-hdfs-precise:1.2.1 +FROM htaox/apache-hadoop-hdfs-precise:1.2.1 MAINTAINER htaox htaox@hotmail.com # Intall cURL diff --git a/oracle-java8-hadoop/Dockerfile b/oracle-java8-hadoop/Dockerfile new file mode 100755 index 0000000..c37cf7d --- /dev/null +++ b/oracle-java8-hadoop/Dockerfile @@ -0,0 +1,23 @@ +# Base Ubuntu Precise 12.04 LTS w/ Hadoop 1.2.1 image +# +FROM htaox/apache-hadoop-hdfs-precise:1.2.1 +MAINTAINER htaox htaox@hotmail.com + +# install Oracle Java 8 +RUN echo "deb http://ppa.launchpad.net/webupd8team/java/ubuntu precise main" | tee -a /etc/apt/sources.list +RUN echo "deb-src http://ppa.launchpad.net/webupd8team/java/ubuntu precise main" | tee -a /etc/apt/sources.list +RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys EEA14886 +RUN apt-get update + +# auto accept oracle jdk license +RUN echo oracle-java8-installer shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections +RUN apt-get install -y oracle-java8-installer ca-certificates + +# Update source.list +RUN echo 'deb http://us.archive.ubuntu.com/ubuntu/ precise-security main universe' >> /etc/apt/sources.list +RUN echo 'deb http://us.archive.ubuntu.com/ubuntu/ precise-updates main universe' >> /etc/apt/sources.list +RUN cat /etc/apt/sources.list +RUN apt-get update + +# Install tools +run apt-get install -y git maven curl \ No newline at end of file diff --git a/oracle-java8-hadoop/build b/oracle-java8-hadoop/build new file mode 100755 index 0000000..7431866 --- /dev/null +++ b/oracle-java8-hadoop/build @@ -0,0 +1,5 @@ +IMAGE_PREFIX="htaox/" +rm -f files/files.hash +for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash +mv /tmp/files.hash files/files.hash +sudo docker build -t ${IMAGE_PREFIX}oracle-java8-hadoop:1.2.1 . \ No newline at end of file diff --git a/oracle-java8/Dockerfile b/oracle-java8/Dockerfile new file mode 100755 index 0000000..e7869e8 --- /dev/null +++ b/oracle-java8/Dockerfile @@ -0,0 +1,17 @@ +# Base Ubuntu Precise 12.04 LTS image +# +FROM ubuntu:precise +MAINTAINER htaox htaox@hotmail.com + +RUN echo "deb http://ppa.launchpad.net/webupd8team/java/ubuntu precise main" | tee -a /etc/apt/sources.list +RUN echo "deb-src http://ppa.launchpad.net/webupd8team/java/ubuntu precise main" | tee -a /etc/apt/sources.list +RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys EEA14886 +RUN apt-get update + +# auto accept oracle jdk license +RUN echo oracle-java8-installer shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections +RUN apt-get install -y oracle-java8-installer ca-certificates + +# Install tools +run apt-get install -y git maven openssh-server + diff --git a/oracle-java8/build b/oracle-java8/build new file mode 100755 index 0000000..9815ca3 --- /dev/null +++ b/oracle-java8/build @@ -0,0 +1,5 @@ +IMAGE_PREFIX="htaox/" +rm -f files/files.hash +for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash +mv /tmp/files.hash files/files.hash +sudo docker build -t ${IMAGE_PREFIX}oracle-java8:latest . \ No newline at end of file From 3a1548204f577cb12d27918f5d8aeeb2c6a11333 Mon Sep 17 00:00:00 2001 From: htaox Date: Wed, 10 Sep 2014 14:48:13 -0400 Subject: [PATCH 75/97] Fix RHEL 6 as host sshd issue --- apache-hadoop-hdfs-precise/files/configure_hadoop.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/apache-hadoop-hdfs-precise/files/configure_hadoop.sh b/apache-hadoop-hdfs-precise/files/configure_hadoop.sh index 6e5ef78..8356056 100755 --- a/apache-hadoop-hdfs-precise/files/configure_hadoop.sh +++ b/apache-hadoop-hdfs-precise/files/configure_hadoop.sh @@ -2,6 +2,13 @@ hadoop_files=( "/root/hadoop_files/core-site.xml" "/root/hadoop_files/hdfs-site.xml" ) + +# Add Fix for Selinux sshd issue if host is RHEL 6 +# re: https://groups.google.com/forum/#!topic/docker-user/73AiwlZEgY4 +function update_selinux() { + wget http://mirrors.kernel.org/ubuntu/pool/main/libs/libselinux/libselinux1_2.1.13-2_amd64.deb && dpkg --install libselinux1_2.1.13-2_amd64.deb +} + function create_hadoop_directories() { rm -rf /root/.ssh mkdir /root/.ssh @@ -27,6 +34,7 @@ function configure_hadoop() { } function prepare_hadoop() { + update_selinux create_hadoop_directories deploy_hadoop_files configure_hadoop $1 From 439ad7e47c19baf526062f33438a4e48efbd2b09 Mon Sep 17 00:00:00 2001 From: htaox Date: Wed, 1 Oct 2014 12:31:16 -0400 Subject: [PATCH 76/97] Add backup script for containers --- deploy/README.md | 69 ++++++++++++++++++++++++++++++++++++++++++++ deploy/backup_all.sh | 38 ++++++++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100755 deploy/README.md create mode 100755 deploy/backup_all.sh diff --git a/deploy/README.md b/deploy/README.md new file mode 100755 index 0000000..21a92d1 --- /dev/null +++ b/deploy/README.md @@ -0,0 +1,69 @@ +##Backing up Docker Containers + +To backup (take a snapshot) of all active containers, run the following: + +```bash +./backup_all.sh +``` + +Please note that if an entry exists in /tmp/DOCKER_BACKUP matching **_CONTAINER_ID:HOST_NAME:DATE_**, +the backup will be **SKIPPED**. + +```bash +Backing up-> d064174dd7ad:elasticsearch-worker3:172.17.0.10 +sudo docker commit d064174dd7ad htaox/elasticsearch-worker3:20141001 + +Backing up-> e22c4107ff23:elasticsearch-worker2:172.17.0.9 +sudo docker commit e22c4107ff23 htaox/elasticsearch-worker2:20141001 + +Backing up-> bf015a0f095d:elasticsearch-worker1:172.17.0.8 +sudo docker commit bf015a0f095d htaox/elasticsearch-worker1:20141001 + +Backing up-> a6bc7b26ed74:elasticsearch-master:172.17.0.7 +sudo docker commit a6bc7b26ed74 htaox/elasticsearch-master:20141001 + +Backing up-> 8b0d762224b4:hbase-worker3:172.17.0.6 +sudo docker commit 8b0d762224b4 htaox/hbase-worker3:20141001 + +Backing up-> 6f569653b398:hbase-worker2:172.17.0.5 +sudo docker commit 6f569653b398 htaox/hbase-worker2:20141001 + +Backing up-> 4ecd6675e6f3:hbase-worker1:172.17.0.4 +sudo docker commit 4ecd6675e6f3 htaox/hbase-worker1:20141001 + +Backing up-> 7a8b7d479c89:hbase-master:172.17.0.3 +sudo docker commit 7a8b7d479c89 htaox/hbase-master:20141001 + +Backing up-> 4c83f2aa7918:nameserver:172.17.0.2 +sudo docker commit 4c83f2aa7918 htaox/nameserver:20141001 +``` + +```bash +$ cat /tmp/DOCKER_BACKUP + +d064174dd7ad:elasticsearch-worker3:172.17.0.10 +e22c4107ff23:elasticsearch-worker2:172.17.0.9 +bf015a0f095d:elasticsearch-worker1:172.17.0.8 +a6bc7b26ed74:elasticsearch-master:172.17.0.7 +8b0d762224b4:hbase-worker3:172.17.0.6 +6f569653b398:hbase-worker2:172.17.0.5 +4ecd6675e6f3:hbase-worker1:172.17.0.4 +7a8b7d479c89:hbase-master:172.17.0.3 +4c83f2aa7918:nameserver:172.17.0.2 +``` + +```bash +$ sudo docker images + +REPOSITORY TAG IMAGE ID CREATED VIRTUAL SIZE +htaox/nameserver 20141001 316e4ba56e2d 3 minutes ago 205.8 MB +htaox/hbase-master 20141001 e82e9fad6d5d 3 minutes ago 991.8 MB +htaox/hbase-worker1 20141001 d2887870d6ab 3 minutes ago 1.038 GB +htaox/hbase-worker2 20141001 4c885a47f030 5 minutes ago 6.915 GB +htaox/hbase-worker3 20141001 d00af05694a1 6 minutes ago 6.672 GB +htaox/elasticsearch-master 20141001 a833c55e5774 6 minutes ago 923.7 MB +htaox/elasticsearch-worker1 20141001 f7715f662858 9 minutes ago 11.63 GB +htaox/elasticsearch-worker2 20141001 571861378dc8 11 minutes ago 11.11 GB +htaox/elasticsearch-worker3 20141001 8afd67c785e3 15 minutes ago 17.68 GB + +``` \ No newline at end of file diff --git a/deploy/backup_all.sh b/deploy/backup_all.sh new file mode 100755 index 0000000..42b593e --- /dev/null +++ b/deploy/backup_all.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +#Back up all active containers (ie snapshot) +#Will use the date as the TAG +CONTAINERS=$(sudo docker ps | awk '{print $1}' | tail -n +2) +DNSMASQ=$(cat $(cat /tmp/DNSMASQ)) +MAINTAINER=htaox +TAG=`date +%Y%m%d` +LOGFILE=/tmp/DOCKER_BACKUP + +while read CID +do + # Get IP for each container + IP=$(sudo docker inspect --format '{{ .NetworkSettings.IPAddress }}' ${CID}) + # Find the corresponding hostname for the IP + for ENTRY in $DNSMASQ + do + MATCH=$(echo $ENTRY | grep $IP) + if [ ! -z $MATCH ] + then + IFS='/' read -ra ADDR <<< "$MATCH" + #name of running container ie hbase-master, hbase-worker1, etc + IMAGE="${ADDR[1]}" + + COMMIT="${CID}:${ADDR[1]}:${IP}" + COMMITTED=$(cat $LOGFILE | grep "${COMMIT}") + if [ ! -z $COMMITED ] || [[ "$COMMITED" =~ ".*No\ such\ file.*" ]] + then + echo "Backing up-> ${COMMIT}" + # log the backup + echo $COMMIT >> $LOGFILE + sudo docker commit "${CID}" "${MAINTAINER}/${IMAGE}:${TAG}" + else + echo "Backup already exist-> ${COMMIT}" + fi + fi + done +done <<<"$CONTAINERS" \ No newline at end of file From ca1627be188e8afc4be2ec36e74ea3c6a2867624 Mon Sep 17 00:00:00 2001 From: htaox Date: Sun, 9 Nov 2014 16:39:22 -0500 Subject: [PATCH 77/97] Add vista-ewdjs-0.0.1 --- vista-ewdjs-0.0.1/Dockerfile | 26 + vista-ewdjs-0.0.1/README.html | 828 ++++++++++++++++++ vista-ewdjs-0.0.1/README.md | 31 + vista-ewdjs-0.0.1/build | 4 + vista-ewdjs-0.0.1/files/authorized_keys | 1 + .../files/configure_vista_ewdjs.sh | 27 + vista-ewdjs-0.0.1/files/default_cmd | 14 + vista-ewdjs-0.0.1/files/id_rsa | 27 + 8 files changed, 958 insertions(+) create mode 100755 vista-ewdjs-0.0.1/Dockerfile create mode 100755 vista-ewdjs-0.0.1/README.html create mode 100755 vista-ewdjs-0.0.1/README.md create mode 100755 vista-ewdjs-0.0.1/build create mode 100755 vista-ewdjs-0.0.1/files/authorized_keys create mode 100755 vista-ewdjs-0.0.1/files/configure_vista_ewdjs.sh create mode 100755 vista-ewdjs-0.0.1/files/default_cmd create mode 100755 vista-ewdjs-0.0.1/files/id_rsa diff --git a/vista-ewdjs-0.0.1/Dockerfile b/vista-ewdjs-0.0.1/Dockerfile new file mode 100755 index 0000000..03dc629 --- /dev/null +++ b/vista-ewdjs-0.0.1/Dockerfile @@ -0,0 +1,26 @@ +# Base Ubuntu Precise 12.04 LTS image +# Based on: http://robtweed.wordpress.com/2014/03/02/ewd-js-and-vista-lets-get-started/ +FROM ubuntu:precise +MAINTAINER htaox htaox@hotmail.com + +# Setup a volume for data +VOLUME ["/data"] + +# Set correct source list +RUN echo "deb http://archive.ubuntu.com/ubuntu precise main universe" > /etc/apt/sources.list + +# install a few other useful packages +RUN apt-get update && apt-get upgrade -y && apt-get install -y git + +# Docker messes up /etc/hosts and adds two entries for 127.0.0.1 +# we try to recover from that by giving /etc/resolv.conf and therefore +# the nameserver priority +RUN sed -i s/"files dns"/"dns files"/ /etc/nsswitch.conf + +EXPOSE 8080 + +# add vistA-ewdjs config file templates +ADD files /root/vista_ewdjs_files + +RUN chmod 700 /root/vista_ewdjs_files/default_cmd +CMD ["/root/vista_ewdjs_files/default_cmd"] diff --git a/vista-ewdjs-0.0.1/README.html b/vista-ewdjs-0.0.1/README.html new file mode 100755 index 0000000..8bd8dc8 --- /dev/null +++ b/vista-ewdjs-0.0.1/README.html @@ -0,0 +1,828 @@ +README

+EWD.js and VistA

+ +

Based on the fantastic work of Robert Tweed and VistA. +Find out more

+ +

+Build the image

+ +
+sudo docker build -t="dockerfile/vista_ewdjs-0.0.1" github.com/htaox/docker-scripts/vista_ewdjs-0.0.1
+
+ +

+Deploy with persistence

+ +
+sudo mkdir -p /opt/vista_ewdjs
+docker run -d -p 27017:27017 -v /opt/vista_ewdjs:/data/db --name vista_ewdjs dockerfile/vista_ewdjs-0.0.1
+
+ +

+Inspect

+ +
+sudo bash -v "VISTA_EWDJS_IP=$(docker inspect --format {{.NetworkSettings.IPAddress}} vista-ewdjs )
+
+ +

+SSH using private key

+ +
+BASEDIR=~/docker-scripts
+MASTER_IP=$VISTA_EWDJS_IP
+chmod 400 $BASEDIR/vista_ewdjs-0.0.1/files/id_rsa
+ssh -i $BASEDIR/vista_ewdjs-0.0.1/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP}
+
+
\ No newline at end of file diff --git a/vista-ewdjs-0.0.1/README.md b/vista-ewdjs-0.0.1/README.md new file mode 100755 index 0000000..e6e5738 --- /dev/null +++ b/vista-ewdjs-0.0.1/README.md @@ -0,0 +1,31 @@ +EWD.js and VistA +================ + +Based on the fantastic work of Robert Tweed and VistA. +Find out [more](http://robtweed.wordpress.com/2014/03/02/ewd-js-and-vista-lets-get-started/) + +####Build the image +
+sudo docker build -t="dockerfile/vista_ewdjs-0.0.1" github.com/htaox/docker-scripts/vista_ewdjs-0.0.1
+
+ +####Deploy with persistence +
+sudo mkdir -p /opt/vista_ewdjs
+docker run -d -p 27017:27017 -v /opt/vista_ewdjs:/data/db --name vista_ewdjs dockerfile/vista_ewdjs-0.0.1
+
+ +####Inspect +
+sudo bash -v "VISTA_EWDJS_IP=$(docker inspect --format {{.NetworkSettings.IPAddress}} vista-ewdjs )
+
+ +####SSH using private key +
+BASEDIR=~/docker-scripts
+MASTER_IP=$VISTA_EWDJS_IP
+chmod 400 $BASEDIR/vista_ewdjs-0.0.1/files/id_rsa
+ssh -i $BASEDIR/vista_ewdjs-0.0.1/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP}
+
+ + diff --git a/vista-ewdjs-0.0.1/build b/vista-ewdjs-0.0.1/build new file mode 100755 index 0000000..9e0efc0 --- /dev/null +++ b/vista-ewdjs-0.0.1/build @@ -0,0 +1,4 @@ +rm -f files/files.hash +for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash +mv /tmp/files.hash files/files.hash +sudo docker build -t ${IMAGE_PREFIX}vista-ewdjs:0.0.1 . diff --git a/vista-ewdjs-0.0.1/files/authorized_keys b/vista-ewdjs-0.0.1/files/authorized_keys new file mode 100755 index 0000000..5fd90fc --- /dev/null +++ b/vista-ewdjs-0.0.1/files/authorized_keys @@ -0,0 +1 @@ +ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDp2atNK3bux0z3d2Aojkl231Lf6X7HZUYIBt3XzUs+wnTzzB/eH2ubS5Wdwyy5daA4itsvX6hI1o/LQOfRBdjXqIVl+IFXFdwNQ0saCSNh65O2ynuMwsxUXhBJAGoBg6sTXq1ZPNQk1JqopUBP6+H4jpnKFW3JosON9QopQdkkYIz/frHs3HojfbydQesGNovanKrGYV3QeFVQDPxseufRZtHjrTk1hQ3FEayQCTyqJ8JDE6DMrirNEVBTuuNZ/Z2afPLWcZIKQ46E73p9HhqcaWEph6xQ3Ha/WV9oK0jenfz4b+sGrUItTbzuP8SsUiA4yZrZaN4BubDi4oPALOr/ root@423e412aa505 diff --git a/vista-ewdjs-0.0.1/files/configure_vista_ewdjs.sh b/vista-ewdjs-0.0.1/files/configure_vista_ewdjs.sh new file mode 100755 index 0000000..64b03bd --- /dev/null +++ b/vista-ewdjs-0.0.1/files/configure_vista_ewdjs.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# Add Fix for Selinux sshd issue if host is RHEL 6 +# re: https://groups.google.com/forum/#!topic/docker-user/73AiwlZEgY4 +function update_selinux() { + wget http://mirrors.kernel.org/ubuntu/pool/main/libs/libselinux/libselinux1_2.1.13-2_amd64.deb && dpkg --install libselinux1_2.1.13-2_amd64.deb +} + +function create_vista_ewdjs_directories() { + rm -rf /root/.ssh + mkdir /root/.ssh + chmod go-rx /root/.ssh + mkdir /var/run/sshd +} + +function deploy_vista_ewdjs_files() { + cp /root/vista_ewdjs_files/id_rsa /root/.ssh + chmod go-rwx /root/.ssh/id_rsa + cp /root/vista_ewdjs_files/authorized_keys /root/.ssh/authorized_keys + chmod go-wx /root/.ssh/authorized_keys +} + +function prepare_vista_ewdjs() { + update_selinux + create_vista_ewdjs_directories + deploy_vista_ewdjs_files +} diff --git a/vista-ewdjs-0.0.1/files/default_cmd b/vista-ewdjs-0.0.1/files/default_cmd new file mode 100755 index 0000000..b46466e --- /dev/null +++ b/vista-ewdjs-0.0.1/files/default_cmd @@ -0,0 +1,14 @@ +#!/bin/bash + +env + +source /root/vista_ewdjs_files/configure_vista_ewdjs.sh + +echo "preparing VistA EWD.js" +prepare_vista_ewdjs + + + + + + diff --git a/vista-ewdjs-0.0.1/files/id_rsa b/vista-ewdjs-0.0.1/files/id_rsa new file mode 100755 index 0000000..0cba776 --- /dev/null +++ b/vista-ewdjs-0.0.1/files/id_rsa @@ -0,0 +1,27 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIEpAIBAAKCAQEA6dmrTSt27sdM93dgKI5Jdt9S3+l+x2VGCAbd181LPsJ088wf +3h9rm0uVncMsuXWgOIrbL1+oSNaPy0Dn0QXY16iFZfiBVxXcDUNLGgkjYeuTtsp7 +jMLMVF4QSQBqAYOrE16tWTzUJNSaqKVAT+vh+I6ZyhVtyaLDjfUKKUHZJGCM/36x +7Nx6I328nUHrBjaL2pyqxmFd0HhVUAz8bHrn0WbR4605NYUNxRGskAk8qifCQxOg +zK4qzRFQU7rjWf2dmnzy1nGSCkOOhO96fR4anGlhKYesUNx2v1lfaCtI3p38+G/r +Bq1CLU287j/ErFIgOMma2WjeAbmw4uKDwCzq/wIDAQABAoIBAQCBgFZZ/Pj3EI2x ++XzZ2LocR144u7DGsXHP3iWabYj+72ce3+rB8np/3KK1ZDFvXxFkXpk1Ke8irxeg +gogd+/PysdN1/eF6nZNoEN0VRPxALNp3frhe4j2PdyvjkYQi5IynxGWRJpuA7e/b +9u+fksxn/mhyPd23rRhIk+uVn26lsnccHhCkfqr+Szm/xFsTUhYQ1B8bfrqhA1Le +WRrBa03JXocd2y3TdzeaQ+AtvbpAy9Fc28N7xkDsuh+H1y74jRhFzBXd4WnYuxze +/PAD3hpgtCDGGnGpwE2SMM8fZJ7vLOPAsMUuz1tvLbKcoTTdaUw4fBur/XQHloW7 +k7adoW6BAoGBAP0bdE1uynnwZOFDhmpMvdYfodwlv3Far+QZwVroSa64YWBaeAef +v0AO75p/EiQJEGWB9bgOAyrbOFdRqLtUF14lQw4ZLUV7sQu/o2Z0sVMSRCVWuNDf +W8sk74RtH3WB7lutOMP3WyYopOUZtTK1rZrRNxD4+edq7+utAba+DLS/AoGBAOyF +31hype9DkOHgD/jWU7tNrJprLkNkSHe/Aq5JdKesgw84AOSKO4W1/uXOly4VOt6Z +54eeW1gt+uKT292GEl66TO8PIxszfsUzpYpTKkSzrl5OsM9hUlitJwpff/D9Mbxw +fZWt0EjKlBQWc83sMBwCe8ZyNh/WueBIKH5HjhnBAoGAEwFRvVK5X2iemo+Qc0Dp +7D8Zz0cCVgeiN3V7oFDa34S2wx5n7uKe4Ld+ZFJwUUZg9c5JXhWnRTuKwnu+OLq6 +unX/z/ox/Qqpo6EzKslOW1d+yHL3k6+B3AIc/guXliI4fKfIIGbdcEMTBqTkhzc/ +HuXgxaR8V1UfSMoH2+nvWE8CgYAcw4MP3JF1cYATGA6ZMmdoZd/Rv6sWowF1HpOS +4nf/VCl0Fll1caIfdqyTAfa8sfRA0fKoOYfeR2k1WMnqPL3LK1jj0bFxQ2ftT4SY +N9jyFe/kpCk4bxt2kUgoKMkEY6ZCxmNfao3j7E7pynk217xaC6tFzOnsIU7liaDz +CnyrgQKBgQDtjairs6ehaqRu8Uk44gQoNIlReJ8qp7YmfPlK8ylFNTALs37c4308 +Qbjp+jLt7w+XMYnNaZPSNN1mt6EyWFSqUc+5QbfQpbw1cZRI1UBIQDwJjZUS04Ou +H75Rif72nQxHh9Ly5CMNCEyioin7kq945vQbyAwyEr7+tomhUZaq9g== +-----END RSA PRIVATE KEY----- From 9bd735007dd42cb7561aa389804ce30bde1c485c Mon Sep 17 00:00:00 2001 From: htaox Date: Sun, 9 Nov 2014 16:41:11 -0500 Subject: [PATCH 78/97] Remove README.html --- vista-ewdjs-0.0.1/README.html | 828 ---------------------------------- 1 file changed, 828 deletions(-) delete mode 100755 vista-ewdjs-0.0.1/README.html diff --git a/vista-ewdjs-0.0.1/README.html b/vista-ewdjs-0.0.1/README.html deleted file mode 100755 index 8bd8dc8..0000000 --- a/vista-ewdjs-0.0.1/README.html +++ /dev/null @@ -1,828 +0,0 @@ -README

-EWD.js and VistA

- -

Based on the fantastic work of Robert Tweed and VistA. -Find out more

- -

-Build the image

- -
-sudo docker build -t="dockerfile/vista_ewdjs-0.0.1" github.com/htaox/docker-scripts/vista_ewdjs-0.0.1
-
- -

-Deploy with persistence

- -
-sudo mkdir -p /opt/vista_ewdjs
-docker run -d -p 27017:27017 -v /opt/vista_ewdjs:/data/db --name vista_ewdjs dockerfile/vista_ewdjs-0.0.1
-
- -

-Inspect

- -
-sudo bash -v "VISTA_EWDJS_IP=$(docker inspect --format {{.NetworkSettings.IPAddress}} vista-ewdjs )
-
- -

-SSH using private key

- -
-BASEDIR=~/docker-scripts
-MASTER_IP=$VISTA_EWDJS_IP
-chmod 400 $BASEDIR/vista_ewdjs-0.0.1/files/id_rsa
-ssh -i $BASEDIR/vista_ewdjs-0.0.1/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP}
-
-
\ No newline at end of file From 1d5fd797f5406ab8122d9e87f0ea3823c2385759 Mon Sep 17 00:00:00 2001 From: htaox Date: Sun, 9 Nov 2014 16:42:53 -0500 Subject: [PATCH 79/97] Update README.md --- vista-ewdjs-0.0.1/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vista-ewdjs-0.0.1/README.md b/vista-ewdjs-0.0.1/README.md index e6e5738..fc1d4cc 100755 --- a/vista-ewdjs-0.0.1/README.md +++ b/vista-ewdjs-0.0.1/README.md @@ -6,13 +6,13 @@ Find out [more](http://robtweed.wordpress.com/2014/03/02/ewd-js-and-vista-lets-g ####Build the image
-sudo docker build -t="dockerfile/vista_ewdjs-0.0.1" github.com/htaox/docker-scripts/vista_ewdjs-0.0.1
+sudo docker build -t="htaox/vista_ewdjs-0.0.1" github.com/htaox/docker-scripts/tree/add-hbase/vista-ewdjs-0.0.1
 
####Deploy with persistence
 sudo mkdir -p /opt/vista_ewdjs
-docker run -d -p 27017:27017 -v /opt/vista_ewdjs:/data/db --name vista_ewdjs dockerfile/vista_ewdjs-0.0.1
+docker run -d -p 27017:27017 -v /opt/vista_ewdjs:/data/db --name vista_ewdjs htaox/vista_ewdjs-0.0.1
 
####Inspect From 40184d978130e5d8249ef317cee6970924a56da2 Mon Sep 17 00:00:00 2001 From: htaox Date: Sun, 9 Nov 2014 16:51:34 -0500 Subject: [PATCH 80/97] Add git-core dependencies --- vista-ewdjs-0.0.1/Dockerfile | 4 +++- vista-ewdjs-0.0.1/README.md | 6 +++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/vista-ewdjs-0.0.1/Dockerfile b/vista-ewdjs-0.0.1/Dockerfile index 03dc629..4bbbcd1 100755 --- a/vista-ewdjs-0.0.1/Dockerfile +++ b/vista-ewdjs-0.0.1/Dockerfile @@ -10,7 +10,9 @@ VOLUME ["/data"] RUN echo "deb http://archive.ubuntu.com/ubuntu precise main universe" > /etc/apt/sources.list # install a few other useful packages -RUN apt-get update && apt-get upgrade -y && apt-get install -y git +RUN apt-get update && apt-get upgrade -y +RUN apt-get install -y libcurl4-gnutls-dev libexpat1-dev gettext libz-dev libssl-dev build-essential +RUN apt-get install -y git-core # Docker messes up /etc/hosts and adds two entries for 127.0.0.1 # we try to recover from that by giving /etc/resolv.conf and therefore diff --git a/vista-ewdjs-0.0.1/README.md b/vista-ewdjs-0.0.1/README.md index fc1d4cc..81dd4e3 100755 --- a/vista-ewdjs-0.0.1/README.md +++ b/vista-ewdjs-0.0.1/README.md @@ -6,7 +6,11 @@ Find out [more](http://robtweed.wordpress.com/2014/03/02/ewd-js-and-vista-lets-g ####Build the image
-sudo docker build -t="htaox/vista_ewdjs-0.0.1" github.com/htaox/docker-scripts/tree/add-hbase/vista-ewdjs-0.0.1
+git clone https://github.com/htaox/docker-scripts.git
+cd docker_scripts
+cd vista-ewdjs-0.0.1
+IMAGE_PREFIX=htaox/
+sudo ./build
 
####Deploy with persistence From 70afbc60804c7a27f74bf8c6ffc0528a0254cab2 Mon Sep 17 00:00:00 2001 From: htaox Date: Sun, 9 Nov 2014 16:55:48 -0500 Subject: [PATCH 81/97] Add git-core dependencies --- vista-ewdjs-0.0.1/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vista-ewdjs-0.0.1/Dockerfile b/vista-ewdjs-0.0.1/Dockerfile index 4bbbcd1..91e67d4 100755 --- a/vista-ewdjs-0.0.1/Dockerfile +++ b/vista-ewdjs-0.0.1/Dockerfile @@ -11,7 +11,7 @@ RUN echo "deb http://archive.ubuntu.com/ubuntu precise main universe" > /etc/apt # install a few other useful packages RUN apt-get update && apt-get upgrade -y -RUN apt-get install -y libcurl4-gnutls-dev libexpat1-dev gettext libz-dev libssl-dev build-essential +##RUN apt-get install -y libcurl4-gnutls-dev libexpat1-dev gettext libz-dev libssl-dev build-essential RUN apt-get install -y git-core # Docker messes up /etc/hosts and adds two entries for 127.0.0.1 From 545af01adcc9795c54244495032fc940f6fe1cb5 Mon Sep 17 00:00:00 2001 From: htaox Date: Sun, 9 Nov 2014 16:57:50 -0500 Subject: [PATCH 82/97] Add git-core dependencies --- vista-ewdjs-0.0.1/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vista-ewdjs-0.0.1/Dockerfile b/vista-ewdjs-0.0.1/Dockerfile index 91e67d4..fc6033c 100755 --- a/vista-ewdjs-0.0.1/Dockerfile +++ b/vista-ewdjs-0.0.1/Dockerfile @@ -12,7 +12,7 @@ RUN echo "deb http://archive.ubuntu.com/ubuntu precise main universe" > /etc/apt # install a few other useful packages RUN apt-get update && apt-get upgrade -y ##RUN apt-get install -y libcurl4-gnutls-dev libexpat1-dev gettext libz-dev libssl-dev build-essential -RUN apt-get install -y git-core +RUN apt-get install -y git # Docker messes up /etc/hosts and adds two entries for 127.0.0.1 # we try to recover from that by giving /etc/resolv.conf and therefore From 5316ce9817bd77dc7630774113eba24a0ac51d5b Mon Sep 17 00:00:00 2001 From: htaox Date: Sun, 9 Nov 2014 17:03:18 -0500 Subject: [PATCH 83/97] Add git-core dependencies --- vista-ewdjs-0.0.1/Dockerfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vista-ewdjs-0.0.1/Dockerfile b/vista-ewdjs-0.0.1/Dockerfile index fc6033c..cd2f575 100755 --- a/vista-ewdjs-0.0.1/Dockerfile +++ b/vista-ewdjs-0.0.1/Dockerfile @@ -10,8 +10,9 @@ VOLUME ["/data"] RUN echo "deb http://archive.ubuntu.com/ubuntu precise main universe" > /etc/apt/sources.list # install a few other useful packages -RUN apt-get update && apt-get upgrade -y -##RUN apt-get install -y libcurl4-gnutls-dev libexpat1-dev gettext libz-dev libssl-dev build-essential +RUN apt-get -y update +RUN apt-get upgrade -y + RUN apt-get install -y git # Docker messes up /etc/hosts and adds two entries for 127.0.0.1 From 1f7339f00f83e448f707306054694d6c331fc08b Mon Sep 17 00:00:00 2001 From: htaox Date: Sun, 9 Nov 2014 17:05:45 -0500 Subject: [PATCH 84/97] Add git-core dependencies --- vista-ewdjs-0.0.1/Dockerfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vista-ewdjs-0.0.1/Dockerfile b/vista-ewdjs-0.0.1/Dockerfile index cd2f575..59814f9 100755 --- a/vista-ewdjs-0.0.1/Dockerfile +++ b/vista-ewdjs-0.0.1/Dockerfile @@ -1,13 +1,14 @@ # Base Ubuntu Precise 12.04 LTS image # Based on: http://robtweed.wordpress.com/2014/03/02/ewd-js-and-vista-lets-get-started/ -FROM ubuntu:precise +FROM ubuntu:12.04 MAINTAINER htaox htaox@hotmail.com # Setup a volume for data VOLUME ["/data"] # Set correct source list -RUN echo "deb http://archive.ubuntu.com/ubuntu precise main universe" > /etc/apt/sources.list +RUN echo "deb http://archive.ubuntu.com/ubuntu precise main universe" > /etc/apt/sources.list && \ + echo 'deb http://archive.ubuntu.com/ubuntu precise-updates universe' >> /etc/apt/sources.list # install a few other useful packages RUN apt-get -y update From 766e30b72bd35caaa8ec4e065402201980cd2ea5 Mon Sep 17 00:00:00 2001 From: htaox Date: Sat, 14 Feb 2015 13:14:53 -0500 Subject: [PATCH 85/97] Remove vista-ewdjs --- vista-ewdjs-0.0.1/Dockerfile | 30 ---------------- vista-ewdjs-0.0.1/README.md | 35 ------------------- vista-ewdjs-0.0.1/build | 4 --- vista-ewdjs-0.0.1/files/authorized_keys | 1 - .../files/configure_vista_ewdjs.sh | 27 -------------- vista-ewdjs-0.0.1/files/default_cmd | 14 -------- vista-ewdjs-0.0.1/files/id_rsa | 27 -------------- 7 files changed, 138 deletions(-) delete mode 100755 vista-ewdjs-0.0.1/Dockerfile delete mode 100755 vista-ewdjs-0.0.1/README.md delete mode 100755 vista-ewdjs-0.0.1/build delete mode 100755 vista-ewdjs-0.0.1/files/authorized_keys delete mode 100755 vista-ewdjs-0.0.1/files/configure_vista_ewdjs.sh delete mode 100755 vista-ewdjs-0.0.1/files/default_cmd delete mode 100755 vista-ewdjs-0.0.1/files/id_rsa diff --git a/vista-ewdjs-0.0.1/Dockerfile b/vista-ewdjs-0.0.1/Dockerfile deleted file mode 100755 index 59814f9..0000000 --- a/vista-ewdjs-0.0.1/Dockerfile +++ /dev/null @@ -1,30 +0,0 @@ -# Base Ubuntu Precise 12.04 LTS image -# Based on: http://robtweed.wordpress.com/2014/03/02/ewd-js-and-vista-lets-get-started/ -FROM ubuntu:12.04 -MAINTAINER htaox htaox@hotmail.com - -# Setup a volume for data -VOLUME ["/data"] - -# Set correct source list -RUN echo "deb http://archive.ubuntu.com/ubuntu precise main universe" > /etc/apt/sources.list && \ - echo 'deb http://archive.ubuntu.com/ubuntu precise-updates universe' >> /etc/apt/sources.list - -# install a few other useful packages -RUN apt-get -y update -RUN apt-get upgrade -y - -RUN apt-get install -y git - -# Docker messes up /etc/hosts and adds two entries for 127.0.0.1 -# we try to recover from that by giving /etc/resolv.conf and therefore -# the nameserver priority -RUN sed -i s/"files dns"/"dns files"/ /etc/nsswitch.conf - -EXPOSE 8080 - -# add vistA-ewdjs config file templates -ADD files /root/vista_ewdjs_files - -RUN chmod 700 /root/vista_ewdjs_files/default_cmd -CMD ["/root/vista_ewdjs_files/default_cmd"] diff --git a/vista-ewdjs-0.0.1/README.md b/vista-ewdjs-0.0.1/README.md deleted file mode 100755 index 81dd4e3..0000000 --- a/vista-ewdjs-0.0.1/README.md +++ /dev/null @@ -1,35 +0,0 @@ -EWD.js and VistA -================ - -Based on the fantastic work of Robert Tweed and VistA. -Find out [more](http://robtweed.wordpress.com/2014/03/02/ewd-js-and-vista-lets-get-started/) - -####Build the image -
-git clone https://github.com/htaox/docker-scripts.git
-cd docker_scripts
-cd vista-ewdjs-0.0.1
-IMAGE_PREFIX=htaox/
-sudo ./build
-
- -####Deploy with persistence -
-sudo mkdir -p /opt/vista_ewdjs
-docker run -d -p 27017:27017 -v /opt/vista_ewdjs:/data/db --name vista_ewdjs htaox/vista_ewdjs-0.0.1
-
- -####Inspect -
-sudo bash -v "VISTA_EWDJS_IP=$(docker inspect --format {{.NetworkSettings.IPAddress}} vista-ewdjs )
-
- -####SSH using private key -
-BASEDIR=~/docker-scripts
-MASTER_IP=$VISTA_EWDJS_IP
-chmod 400 $BASEDIR/vista_ewdjs-0.0.1/files/id_rsa
-ssh -i $BASEDIR/vista_ewdjs-0.0.1/files/id_rsa -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@${MASTER_IP}
-
- - diff --git a/vista-ewdjs-0.0.1/build b/vista-ewdjs-0.0.1/build deleted file mode 100755 index 9e0efc0..0000000 --- a/vista-ewdjs-0.0.1/build +++ /dev/null @@ -1,4 +0,0 @@ -rm -f files/files.hash -for i in `find . -type f | sed s/"\.\/"//`; do git hash-object $i | tr -d '\n'; echo -e "\t$i"; done > /tmp/files.hash -mv /tmp/files.hash files/files.hash -sudo docker build -t ${IMAGE_PREFIX}vista-ewdjs:0.0.1 . diff --git a/vista-ewdjs-0.0.1/files/authorized_keys b/vista-ewdjs-0.0.1/files/authorized_keys deleted file mode 100755 index 5fd90fc..0000000 --- a/vista-ewdjs-0.0.1/files/authorized_keys +++ /dev/null @@ -1 +0,0 @@ -ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDp2atNK3bux0z3d2Aojkl231Lf6X7HZUYIBt3XzUs+wnTzzB/eH2ubS5Wdwyy5daA4itsvX6hI1o/LQOfRBdjXqIVl+IFXFdwNQ0saCSNh65O2ynuMwsxUXhBJAGoBg6sTXq1ZPNQk1JqopUBP6+H4jpnKFW3JosON9QopQdkkYIz/frHs3HojfbydQesGNovanKrGYV3QeFVQDPxseufRZtHjrTk1hQ3FEayQCTyqJ8JDE6DMrirNEVBTuuNZ/Z2afPLWcZIKQ46E73p9HhqcaWEph6xQ3Ha/WV9oK0jenfz4b+sGrUItTbzuP8SsUiA4yZrZaN4BubDi4oPALOr/ root@423e412aa505 diff --git a/vista-ewdjs-0.0.1/files/configure_vista_ewdjs.sh b/vista-ewdjs-0.0.1/files/configure_vista_ewdjs.sh deleted file mode 100755 index 64b03bd..0000000 --- a/vista-ewdjs-0.0.1/files/configure_vista_ewdjs.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash - -# Add Fix for Selinux sshd issue if host is RHEL 6 -# re: https://groups.google.com/forum/#!topic/docker-user/73AiwlZEgY4 -function update_selinux() { - wget http://mirrors.kernel.org/ubuntu/pool/main/libs/libselinux/libselinux1_2.1.13-2_amd64.deb && dpkg --install libselinux1_2.1.13-2_amd64.deb -} - -function create_vista_ewdjs_directories() { - rm -rf /root/.ssh - mkdir /root/.ssh - chmod go-rx /root/.ssh - mkdir /var/run/sshd -} - -function deploy_vista_ewdjs_files() { - cp /root/vista_ewdjs_files/id_rsa /root/.ssh - chmod go-rwx /root/.ssh/id_rsa - cp /root/vista_ewdjs_files/authorized_keys /root/.ssh/authorized_keys - chmod go-wx /root/.ssh/authorized_keys -} - -function prepare_vista_ewdjs() { - update_selinux - create_vista_ewdjs_directories - deploy_vista_ewdjs_files -} diff --git a/vista-ewdjs-0.0.1/files/default_cmd b/vista-ewdjs-0.0.1/files/default_cmd deleted file mode 100755 index b46466e..0000000 --- a/vista-ewdjs-0.0.1/files/default_cmd +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - -env - -source /root/vista_ewdjs_files/configure_vista_ewdjs.sh - -echo "preparing VistA EWD.js" -prepare_vista_ewdjs - - - - - - diff --git a/vista-ewdjs-0.0.1/files/id_rsa b/vista-ewdjs-0.0.1/files/id_rsa deleted file mode 100755 index 0cba776..0000000 --- a/vista-ewdjs-0.0.1/files/id_rsa +++ /dev/null @@ -1,27 +0,0 @@ ------BEGIN RSA PRIVATE KEY----- -MIIEpAIBAAKCAQEA6dmrTSt27sdM93dgKI5Jdt9S3+l+x2VGCAbd181LPsJ088wf -3h9rm0uVncMsuXWgOIrbL1+oSNaPy0Dn0QXY16iFZfiBVxXcDUNLGgkjYeuTtsp7 -jMLMVF4QSQBqAYOrE16tWTzUJNSaqKVAT+vh+I6ZyhVtyaLDjfUKKUHZJGCM/36x -7Nx6I328nUHrBjaL2pyqxmFd0HhVUAz8bHrn0WbR4605NYUNxRGskAk8qifCQxOg -zK4qzRFQU7rjWf2dmnzy1nGSCkOOhO96fR4anGlhKYesUNx2v1lfaCtI3p38+G/r -Bq1CLU287j/ErFIgOMma2WjeAbmw4uKDwCzq/wIDAQABAoIBAQCBgFZZ/Pj3EI2x -+XzZ2LocR144u7DGsXHP3iWabYj+72ce3+rB8np/3KK1ZDFvXxFkXpk1Ke8irxeg -gogd+/PysdN1/eF6nZNoEN0VRPxALNp3frhe4j2PdyvjkYQi5IynxGWRJpuA7e/b -9u+fksxn/mhyPd23rRhIk+uVn26lsnccHhCkfqr+Szm/xFsTUhYQ1B8bfrqhA1Le -WRrBa03JXocd2y3TdzeaQ+AtvbpAy9Fc28N7xkDsuh+H1y74jRhFzBXd4WnYuxze -/PAD3hpgtCDGGnGpwE2SMM8fZJ7vLOPAsMUuz1tvLbKcoTTdaUw4fBur/XQHloW7 -k7adoW6BAoGBAP0bdE1uynnwZOFDhmpMvdYfodwlv3Far+QZwVroSa64YWBaeAef -v0AO75p/EiQJEGWB9bgOAyrbOFdRqLtUF14lQw4ZLUV7sQu/o2Z0sVMSRCVWuNDf -W8sk74RtH3WB7lutOMP3WyYopOUZtTK1rZrRNxD4+edq7+utAba+DLS/AoGBAOyF -31hype9DkOHgD/jWU7tNrJprLkNkSHe/Aq5JdKesgw84AOSKO4W1/uXOly4VOt6Z -54eeW1gt+uKT292GEl66TO8PIxszfsUzpYpTKkSzrl5OsM9hUlitJwpff/D9Mbxw -fZWt0EjKlBQWc83sMBwCe8ZyNh/WueBIKH5HjhnBAoGAEwFRvVK5X2iemo+Qc0Dp -7D8Zz0cCVgeiN3V7oFDa34S2wx5n7uKe4Ld+ZFJwUUZg9c5JXhWnRTuKwnu+OLq6 -unX/z/ox/Qqpo6EzKslOW1d+yHL3k6+B3AIc/guXliI4fKfIIGbdcEMTBqTkhzc/ -HuXgxaR8V1UfSMoH2+nvWE8CgYAcw4MP3JF1cYATGA6ZMmdoZd/Rv6sWowF1HpOS -4nf/VCl0Fll1caIfdqyTAfa8sfRA0fKoOYfeR2k1WMnqPL3LK1jj0bFxQ2ftT4SY -N9jyFe/kpCk4bxt2kUgoKMkEY6ZCxmNfao3j7E7pynk217xaC6tFzOnsIU7liaDz -CnyrgQKBgQDtjairs6ehaqRu8Uk44gQoNIlReJ8qp7YmfPlK8ylFNTALs37c4308 -Qbjp+jLt7w+XMYnNaZPSNN1mt6EyWFSqUc+5QbfQpbw1cZRI1UBIQDwJjZUS04Ou -H75Rif72nQxHh9Ly5CMNCEyioin7kq945vQbyAwyEr7+tomhUZaq9g== ------END RSA PRIVATE KEY----- From 5c7814482ab64f68679985882f82653fee2d5a24 Mon Sep 17 00:00:00 2001 From: htaox Date: Thu, 23 Apr 2015 15:53:24 -0400 Subject: [PATCH 86/97] Create separate host directories for each worker volume --- deploy/start_elasticsearch_cluster.sh | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/deploy/start_elasticsearch_cluster.sh b/deploy/start_elasticsearch_cluster.sh index 4ba59a8..0929846 100755 --- a/deploy/start_elasticsearch_cluster.sh +++ b/deploy/start_elasticsearch_cluster.sh @@ -11,6 +11,7 @@ WORKER_HOSTNAME=elasticsearch-worker # starts the elasticsearch master container function start_master() { echo "starting master container" + if [ "$DEBUG" -gt 0 ]; then echo sudo docker run -d --dns $NAMESERVER_IP -h ${MASTER_HOSTNAME}${DOMAINNAME} $VOLUME_MAP $1:$2 fi @@ -33,13 +34,28 @@ function start_workers() { rm -f $ELASTICSERVERS + # split the volume syntax by :, then use the array to build new volume map + IFS=' ' read -ra VOLUME_MAP_ARR_PRE <<< "$VOLUME_MAP" + IFS=':' read -ra VOLUME_MAP_ARR <<< "${VOLUME_MAP_ARR_PRE[1]}" + for i in `seq 1 $NUM_WORKERS`; do echo "starting worker container" hostname="${WORKER_HOSTNAME}${i}${DOMAINNAME}" + # rename $VOLUME_MAP by adding worker number as suffix if it is not empty + WORKER_VOLUME_MAP=$VOLUME_MAP + if [ "$VOLUME_MAP" ]; then + WORKER_VOLUME_DIR="${VOLUME_MAP_ARR[0]}-${i}" + echo "Creating directory ${WORKER_VOLUME_DIR}" + mkdir -p "${WORKER_VOLUME_DIR}" + # volume will now be like /host/dir/data-1:/data if original volume was /home/dir/data + WORKER_VOLUME_MAP="-v ${WORKER_VOLUME_DIR}:${VOLUME_MAP_ARR[1]}" + fi + echo "WORKER ${i} VOLUME_MAP => ${WORKER_VOLUME_MAP}" + if [ "$DEBUG" -gt 0 ]; then - echo sudo docker run -d --dns $NAMESERVER_IP -h $hostname $VOLUME_MAP $1:$2 + echo sudo docker run -d --dns $NAMESERVER_IP -h $hostname $WORKER_VOLUME_MAP $1:$2 fi - WORKER=$(sudo docker run -d --dns $NAMESERVER_IP -h $hostname $VOLUME_MAP $1:$2) + WORKER=$(sudo docker run -d --dns $NAMESERVER_IP -h $hostname $WORKER_VOLUME_MAP $1:$2) if [ "$WORKER" = "" ]; then echo "error: could not start worker container from image $1:$2" From 1ddea75f72b1742c2d558b6d5e9c9f657dafb9d1 Mon Sep 17 00:00:00 2001 From: htaox Date: Sun, 26 Apr 2015 07:56:54 -0400 Subject: [PATCH 87/97] Update elasticsearch.yml so data files write to host directories correctly --- deploy/start_elasticsearch_cluster.sh | 7 +++++++ .../elasticsearch-base/files/elasticsearch.yml | 4 +++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/deploy/start_elasticsearch_cluster.sh b/deploy/start_elasticsearch_cluster.sh index 0929846..7d26786 100755 --- a/deploy/start_elasticsearch_cluster.sh +++ b/deploy/start_elasticsearch_cluster.sh @@ -12,6 +12,13 @@ WORKER_HOSTNAME=elasticsearch-worker function start_master() { echo "starting master container" + # split the volume syntax by :, then use the array to build new volume map + IFS=' ' read -ra VOLUME_MAP_ARR_PRE <<< "$VOLUME_MAP" + IFS=':' read -ra VOLUME_MAP_ARR <<< "${VOLUME_MAP_ARR_PRE[1]}" + MASTER_VOLUME_DIR="${VOLUME_MAP_ARR[0]}" + echo "Creating directory ${MASTER_VOLUME_DIR}" + mkdir -p "${MASTER_VOLUME_DIR}" + if [ "$DEBUG" -gt 0 ]; then echo sudo docker run -d --dns $NAMESERVER_IP -h ${MASTER_HOSTNAME}${DOMAINNAME} $VOLUME_MAP $1:$2 fi diff --git a/elasticsearch-0.90.13/elasticsearch-base/files/elasticsearch.yml b/elasticsearch-0.90.13/elasticsearch-base/files/elasticsearch.yml index e69bed4..329d963 100755 --- a/elasticsearch-0.90.13/elasticsearch-base/files/elasticsearch.yml +++ b/elasticsearch-0.90.13/elasticsearch-base/files/elasticsearch.yml @@ -169,7 +169,9 @@ node.data: @DATA@ # # path.plugins: /path/to/plugins - +path: + logs: /data/log + data: /data/data #################################### Plugin ################################### # If a plugin listed here is not installed for current node, the node will not start. From fcb456c1da3aef837db41bbb80da7f2d5c156406 Mon Sep 17 00:00:00 2001 From: htaox Date: Sun, 3 May 2015 08:05:10 -0400 Subject: [PATCH 88/97] Make sure containers restart on reboot --- deploy/start_elasticsearch_cluster.sh | 8 ++++---- deploy/start_nameserver.sh | 16 +++++++++++----- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/deploy/start_elasticsearch_cluster.sh b/deploy/start_elasticsearch_cluster.sh index 7d26786..41bf56b 100755 --- a/deploy/start_elasticsearch_cluster.sh +++ b/deploy/start_elasticsearch_cluster.sh @@ -20,9 +20,9 @@ function start_master() { mkdir -p "${MASTER_VOLUME_DIR}" if [ "$DEBUG" -gt 0 ]; then - echo sudo docker run -d --dns $NAMESERVER_IP -h ${MASTER_HOSTNAME}${DOMAINNAME} $VOLUME_MAP $1:$2 + echo sudo docker run -d --restart always --dns $NAMESERVER_IP -h ${MASTER_HOSTNAME}${DOMAINNAME} $VOLUME_MAP $1:$2 fi - MASTER=$(sudo docker run -d --dns $NAMESERVER_IP -h ${MASTER_HOSTNAME}${DOMAINNAME} $VOLUME_MAP $1:$2) + MASTER=$(sudo docker run -d --restart always --dns $NAMESERVER_IP -h ${MASTER_HOSTNAME}${DOMAINNAME} $VOLUME_MAP $1:$2) if [ "$MASTER" = "" ]; then echo "error: could not start master container from image $1:$2" @@ -60,9 +60,9 @@ function start_workers() { echo "WORKER ${i} VOLUME_MAP => ${WORKER_VOLUME_MAP}" if [ "$DEBUG" -gt 0 ]; then - echo sudo docker run -d --dns $NAMESERVER_IP -h $hostname $WORKER_VOLUME_MAP $1:$2 + echo sudo docker run -d --restart always --dns $NAMESERVER_IP -h $hostname $WORKER_VOLUME_MAP $1:$2 fi - WORKER=$(sudo docker run -d --dns $NAMESERVER_IP -h $hostname $WORKER_VOLUME_MAP $1:$2) + WORKER=$(sudo docker run -d --restart always --dns $NAMESERVER_IP -h $hostname $WORKER_VOLUME_MAP $1:$2) if [ "$WORKER" = "" ]; then echo "error: could not start worker container from image $1:$2" diff --git a/deploy/start_nameserver.sh b/deploy/start_nameserver.sh index 07f70c9..9d3b78b 100755 --- a/deploy/start_nameserver.sh +++ b/deploy/start_nameserver.sh @@ -8,13 +8,15 @@ BASEDIR=$(cd $(dirname $0); pwd) # starts the dnsmasq nameserver function start_nameserver() { - DNSDIR="/tmp/dnsdir_$RANDOM" + DNSDIR="/opt/docker-dns/dnsdir_$RANDOM" #DNSDIR="${BASEDIR}" DNSFILE="${DNSDIR}/0hosts" - mkdir $DNSDIR + sudo mkdir -p $DNSDIR - rm -rf /tmp/DNSMASQ - echo $DNSFILE > "/tmp/DNSMASQ" + sudo rm -rf /opt/docker-dns/DNSMASQ + sudo touch /opt/docker-dns/DNSMASQ + echo $DNSFILE | sudo tee --append /opt/docker-dns/DNSMASQ + sudo chmod 777 /opt/docker-dns/DNSMASQ echo "starting nameserver container" if [ "$DEBUG" -gt 0 ]; then @@ -30,9 +32,13 @@ function start_nameserver() { echo "started nameserver container: $NAMESERVER" echo "DNS host->IP file mapped: $DNSFILE" sleep 2 + + sudo chmod -R 777 $DNSDIR + NAMESERVER_IP=$(sudo docker logs $NAMESERVER 2>&1 | egrep '^NAMESERVER_IP=' | awk -F= '{print $2}' | tr -d -c "[:digit:] .") echo "NAMESERVER_IP: $NAMESERVER_IP" echo "address=\"/nameserver/$NAMESERVER_IP\"" > $DNSFILE + } # contact nameserver container and resolve IP address (used for checking whether nameserver has registered @@ -96,7 +102,7 @@ function check_start_nameserver() { # start_nameserver $NAMESERVER_IMAGE wait_for_nameserver else - HOSTFILE=$(cat /tmp/DNSMASQ) + HOSTFILE=$(cat /opt/docker-dns/DNSMASQ) DNSFILE=$HOSTFILE NAMESERVER_IP=$(cat $HOSTFILE | grep nameserver | grep -oE "[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}") echo "NAMESERVER_IP: $NAMESERVER_IP" From cfbd4e5551f84f49bb8280e53a82b74750794bf2 Mon Sep 17 00:00:00 2001 From: htaox Date: Sun, 3 May 2015 08:15:44 -0400 Subject: [PATCH 89/97] Just restart DNS --- deploy/start_elasticsearch_cluster.sh | 8 ++++---- deploy/start_nameserver.sh | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/deploy/start_elasticsearch_cluster.sh b/deploy/start_elasticsearch_cluster.sh index 41bf56b..7d26786 100755 --- a/deploy/start_elasticsearch_cluster.sh +++ b/deploy/start_elasticsearch_cluster.sh @@ -20,9 +20,9 @@ function start_master() { mkdir -p "${MASTER_VOLUME_DIR}" if [ "$DEBUG" -gt 0 ]; then - echo sudo docker run -d --restart always --dns $NAMESERVER_IP -h ${MASTER_HOSTNAME}${DOMAINNAME} $VOLUME_MAP $1:$2 + echo sudo docker run -d --dns $NAMESERVER_IP -h ${MASTER_HOSTNAME}${DOMAINNAME} $VOLUME_MAP $1:$2 fi - MASTER=$(sudo docker run -d --restart always --dns $NAMESERVER_IP -h ${MASTER_HOSTNAME}${DOMAINNAME} $VOLUME_MAP $1:$2) + MASTER=$(sudo docker run -d --dns $NAMESERVER_IP -h ${MASTER_HOSTNAME}${DOMAINNAME} $VOLUME_MAP $1:$2) if [ "$MASTER" = "" ]; then echo "error: could not start master container from image $1:$2" @@ -60,9 +60,9 @@ function start_workers() { echo "WORKER ${i} VOLUME_MAP => ${WORKER_VOLUME_MAP}" if [ "$DEBUG" -gt 0 ]; then - echo sudo docker run -d --restart always --dns $NAMESERVER_IP -h $hostname $WORKER_VOLUME_MAP $1:$2 + echo sudo docker run -d --dns $NAMESERVER_IP -h $hostname $WORKER_VOLUME_MAP $1:$2 fi - WORKER=$(sudo docker run -d --restart always --dns $NAMESERVER_IP -h $hostname $WORKER_VOLUME_MAP $1:$2) + WORKER=$(sudo docker run -d --dns $NAMESERVER_IP -h $hostname $WORKER_VOLUME_MAP $1:$2) if [ "$WORKER" = "" ]; then echo "error: could not start worker container from image $1:$2" diff --git a/deploy/start_nameserver.sh b/deploy/start_nameserver.sh index 9d3b78b..f649d03 100755 --- a/deploy/start_nameserver.sh +++ b/deploy/start_nameserver.sh @@ -20,9 +20,9 @@ function start_nameserver() { echo "starting nameserver container" if [ "$DEBUG" -gt 0 ]; then - echo sudo docker run -d -h nameserver${DOMAINNAME} -v $DNSDIR:/etc/dnsmasq.d $1 + echo sudo docker run -d --restart always -h nameserver${DOMAINNAME} -v $DNSDIR:/etc/dnsmasq.d $1 fi - NAMESERVER=$(sudo docker run -d -h nameserver${DOMAINNAME} -v $DNSDIR:/etc/dnsmasq.d $1) + NAMESERVER=$(sudo docker run -d --restart always -h nameserver${DOMAINNAME} -v $DNSDIR:/etc/dnsmasq.d $1) if [ "$NAMESERVER" = "" ]; then echo "error: could not start nameserver container from image $1" From c95a034210f50802b85ad67a17195e6aa7d1037d Mon Sep 17 00:00:00 2001 From: htaox Date: Sun, 3 May 2015 16:58:55 -0400 Subject: [PATCH 90/97] Change elasticsearch restart to on-failure:10 --- deploy/start_elasticsearch_cluster.sh | 16 +++++++++++----- .../files/run_elasticsearch_master.sh | 1 + .../files/run_elasticsearch_worker.sh | 1 + 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/deploy/start_elasticsearch_cluster.sh b/deploy/start_elasticsearch_cluster.sh index 7d26786..7882900 100755 --- a/deploy/start_elasticsearch_cluster.sh +++ b/deploy/start_elasticsearch_cluster.sh @@ -20,9 +20,9 @@ function start_master() { mkdir -p "${MASTER_VOLUME_DIR}" if [ "$DEBUG" -gt 0 ]; then - echo sudo docker run -d --dns $NAMESERVER_IP -h ${MASTER_HOSTNAME}${DOMAINNAME} $VOLUME_MAP $1:$2 + echo sudo docker run -d --restart on-failure:10 --dns $NAMESERVER_IP -h ${MASTER_HOSTNAME}${DOMAINNAME} $VOLUME_MAP $1:$2 fi - MASTER=$(sudo docker run -d --dns $NAMESERVER_IP -h ${MASTER_HOSTNAME}${DOMAINNAME} $VOLUME_MAP $1:$2) + MASTER=$(sudo docker run -d --restart on-failure:10 --dns $NAMESERVER_IP -h ${MASTER_HOSTNAME}${DOMAINNAME} $VOLUME_MAP $1:$2) if [ "$MASTER" = "" ]; then echo "error: could not start master container from image $1:$2" @@ -31,6 +31,9 @@ function start_master() { echo "started master container: $MASTER" sleep 3 + echo "Removing $MASTER_HOSTNAME from $DNSFILE" + sed -i "/$MASTER_HOSTNAME/d" "$DNSFILE" + MASTER_IP=$(sudo docker logs $MASTER 2>&1 | egrep '^MASTER_IP=' | awk -F= '{print $2}' | tr -d -c "[:digit:] .") echo "MASTER_IP: $MASTER_IP" echo "address=\"/$MASTER_HOSTNAME/$MASTER_IP\"" >> $DNSFILE @@ -60,9 +63,9 @@ function start_workers() { echo "WORKER ${i} VOLUME_MAP => ${WORKER_VOLUME_MAP}" if [ "$DEBUG" -gt 0 ]; then - echo sudo docker run -d --dns $NAMESERVER_IP -h $hostname $WORKER_VOLUME_MAP $1:$2 + echo sudo docker run -d --restart on-failure:10 --dns $NAMESERVER_IP -h $hostname $WORKER_VOLUME_MAP $1:$2 fi - WORKER=$(sudo docker run -d --dns $NAMESERVER_IP -h $hostname $WORKER_VOLUME_MAP $1:$2) + WORKER=$(sudo docker run -d --restart on-failure:10 --dns $NAMESERVER_IP -h $hostname $WORKER_VOLUME_MAP $1:$2) if [ "$WORKER" = "" ]; then echo "error: could not start worker container from image $1:$2" @@ -71,7 +74,10 @@ function start_workers() { echo "started worker container: $WORKER" sleep 3 - WORKER_IP=$(sudo docker logs $WORKER 2>&1 | egrep '^WORKER_IP=' | awk -F= '{print $2}' | tr -d -c "[:digit:] .") + echo "Removing $hostname from $DNSFILE" + sed -i "/$hostname/d" "$DNSFILE" + + WORKER_IP=$(sudo docker logs $WORKER 2>&1 | egrep '^WORKER_IP=' | awk -F= '{print $2}' | tr -d -c "[:digit:] .") echo "address=\"/$hostname/$WORKER_IP\"" >> $DNSFILE echo "WORKER #${i} IP: $WORKER_IP" echo $WORKER_IP >> $ELASTICSERVERS diff --git a/elasticsearch-0.90.13/elasticsearch-master/files/run_elasticsearch_master.sh b/elasticsearch-0.90.13/elasticsearch-master/files/run_elasticsearch_master.sh index 6628da9..bfa8ba7 100755 --- a/elasticsearch-0.90.13/elasticsearch-master/files/run_elasticsearch_master.sh +++ b/elasticsearch-0.90.13/elasticsearch-master/files/run_elasticsearch_master.sh @@ -8,6 +8,7 @@ IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { p echo "MASTER_IP=$IP" sed -i "s/@IP@/$IP/g" $ES_HOME/conf/elasticsearch.yml +# sed -i "s|^network.host:.*|network.host: $IP|" $ES_HOME/conf/elasticsearch.yml sed -i "s/@MASTER@/true/g" $ES_HOME/conf/elasticsearch.yml sed -i "s/@DATA@/false/g" $ES_HOME/conf/elasticsearch.yml diff --git a/elasticsearch-0.90.13/elasticsearch-worker/files/run_elasticsearch_worker.sh b/elasticsearch-0.90.13/elasticsearch-worker/files/run_elasticsearch_worker.sh index 9c1a574..8f0ec66 100755 --- a/elasticsearch-0.90.13/elasticsearch-worker/files/run_elasticsearch_worker.sh +++ b/elasticsearch-0.90.13/elasticsearch-worker/files/run_elasticsearch_worker.sh @@ -8,6 +8,7 @@ IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { p echo "WORKER_IP=$IP" sed -i "s/@IP@/$IP/g" $ES_HOME/conf/elasticsearch.yml +#sed -i "s|^network.host:.*|network.host: $IP|" $ES_HOME/conf/elasticsearch.yml sed -i "s/@MASTER@/false/g" $ES_HOME/conf/elasticsearch.yml sed -i "s/@DATA@/true/g" $ES_HOME/conf/elasticsearch.yml From 634fb95bd8f88c80af678af4471572d1595dde65 Mon Sep 17 00:00:00 2001 From: htaox Date: Mon, 4 May 2015 17:04:43 -0400 Subject: [PATCH 91/97] Delete stopped containers before deploying --- deploy/deploy_elasticsearch.sh | 6 ++++++ deploy/start_elasticsearch_cluster.sh | 8 ++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/deploy/deploy_elasticsearch.sh b/deploy/deploy_elasticsearch.sh index e48595f..eaf561e 100755 --- a/deploy/deploy_elasticsearch.sh +++ b/deploy/deploy_elasticsearch.sh @@ -80,6 +80,10 @@ function check_elasticsearch() { } +function remove_stopped_containers() { + sudo docker ps -a | grep elasticsearch | awk '{print $1}' | xargs --no-run-if-empty docker rm +} + check_root if [[ "$#" -eq 0 ]]; then @@ -105,6 +109,8 @@ if [ $NUM_ELASTIC_MASTER -gt 0 ]; then exit 0 fi +remove_stopped_containers + start_master ${image_name}-master $image_version wait_for_master diff --git a/deploy/start_elasticsearch_cluster.sh b/deploy/start_elasticsearch_cluster.sh index 7882900..51669c5 100755 --- a/deploy/start_elasticsearch_cluster.sh +++ b/deploy/start_elasticsearch_cluster.sh @@ -20,9 +20,9 @@ function start_master() { mkdir -p "${MASTER_VOLUME_DIR}" if [ "$DEBUG" -gt 0 ]; then - echo sudo docker run -d --restart on-failure:10 --dns $NAMESERVER_IP -h ${MASTER_HOSTNAME}${DOMAINNAME} $VOLUME_MAP $1:$2 + echo sudo docker run -d --restart no --dns $NAMESERVER_IP -h ${MASTER_HOSTNAME}${DOMAINNAME} $VOLUME_MAP $1:$2 fi - MASTER=$(sudo docker run -d --restart on-failure:10 --dns $NAMESERVER_IP -h ${MASTER_HOSTNAME}${DOMAINNAME} $VOLUME_MAP $1:$2) + MASTER=$(sudo docker run -d --restart no --dns $NAMESERVER_IP -h ${MASTER_HOSTNAME}${DOMAINNAME} $VOLUME_MAP $1:$2) if [ "$MASTER" = "" ]; then echo "error: could not start master container from image $1:$2" @@ -63,9 +63,9 @@ function start_workers() { echo "WORKER ${i} VOLUME_MAP => ${WORKER_VOLUME_MAP}" if [ "$DEBUG" -gt 0 ]; then - echo sudo docker run -d --restart on-failure:10 --dns $NAMESERVER_IP -h $hostname $WORKER_VOLUME_MAP $1:$2 + echo sudo docker run -d --restart no --dns $NAMESERVER_IP -h $hostname $WORKER_VOLUME_MAP $1:$2 fi - WORKER=$(sudo docker run -d --restart on-failure:10 --dns $NAMESERVER_IP -h $hostname $WORKER_VOLUME_MAP $1:$2) + WORKER=$(sudo docker run -d --restart no --dns $NAMESERVER_IP -h $hostname $WORKER_VOLUME_MAP $1:$2) if [ "$WORKER" = "" ]; then echo "error: could not start worker container from image $1:$2" From 3ddfedf29f754f78a1ce7d13b820e1b56c3abd55 Mon Sep 17 00:00:00 2001 From: htaox Date: Tue, 5 May 2015 14:11:26 -0400 Subject: [PATCH 92/97] pull from docker/java:oracle-java7 --- elasticsearch-0.90.13/elasticsearch-base/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elasticsearch-0.90.13/elasticsearch-base/Dockerfile b/elasticsearch-0.90.13/elasticsearch-base/Dockerfile index cfdc27e..a97532b 100755 --- a/elasticsearch-0.90.13/elasticsearch-base/Dockerfile +++ b/elasticsearch-0.90.13/elasticsearch-base/Dockerfile @@ -5,7 +5,7 @@ # # Pull base image. -FROM dockerfile/java +FROM dockerfile/java:oracle-java7 # Install ElasticSearch. RUN cd /tmp && wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-0.90.13.tar.gz From d5dc57dca30d1bf957ff796dff8860740d37704d Mon Sep 17 00:00:00 2001 From: htaox Date: Tue, 5 May 2015 14:22:53 -0400 Subject: [PATCH 93/97] pull from htaox/oracle-java8:latest --- elasticsearch-0.90.13/elasticsearch-base/Dockerfile | 2 +- gremlin-server-3.0.0/Dockerfile | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/elasticsearch-0.90.13/elasticsearch-base/Dockerfile b/elasticsearch-0.90.13/elasticsearch-base/Dockerfile index a97532b..d254e2d 100755 --- a/elasticsearch-0.90.13/elasticsearch-base/Dockerfile +++ b/elasticsearch-0.90.13/elasticsearch-base/Dockerfile @@ -5,7 +5,7 @@ # # Pull base image. -FROM dockerfile/java:oracle-java7 +FROM htaox/oracle-java8:latest # Install ElasticSearch. RUN cd /tmp && wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-0.90.13.tar.gz diff --git a/gremlin-server-3.0.0/Dockerfile b/gremlin-server-3.0.0/Dockerfile index 8eb2f5d..61f2f0b 100755 --- a/gremlin-server-3.0.0/Dockerfile +++ b/gremlin-server-3.0.0/Dockerfile @@ -3,11 +3,11 @@ FROM htaox/oracle-java8:latest MAINTAINER htaox htaox@hotmail.com -RUN git clone https://github.com/tinkerpop/tinkerpop3.git +RUN git clone https://github.com/apache/incubator-tinkerpop.git -RUN cd tinkerpop3 && mvn clean package -DskipTests +RUN cd incubator-tinkerpop && mvn clean package -DskipTests -RUN cp tinkerpop3/gremlin-server/target/gremlin-server-3.0.0-SNAPSHOT-distribution.zip /opt +RUN cp incubator-tinkerpop/gremlin-server/target/gremlin-server-3.0.0-SNAPSHOT-distribution.zip /opt RUN cd /opt && unzip gremlin-server-3.0.0-SNAPSHOT-distribution.zip From 0d0a830e2bc95968f7188f965d202e95c21f2fd8 Mon Sep 17 00:00:00 2001 From: htaox Date: Tue, 5 May 2015 15:18:01 -0400 Subject: [PATCH 94/97] Install sudo in base --- elasticsearch-0.90.13/elasticsearch-base/Dockerfile | 3 +++ oracle-java8/Dockerfile | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/elasticsearch-0.90.13/elasticsearch-base/Dockerfile b/elasticsearch-0.90.13/elasticsearch-base/Dockerfile index d254e2d..e15f5aa 100755 --- a/elasticsearch-0.90.13/elasticsearch-base/Dockerfile +++ b/elasticsearch-0.90.13/elasticsearch-base/Dockerfile @@ -6,6 +6,9 @@ # Pull base image. FROM htaox/oracle-java8:latest + +RUN apt-get update +RUN apt-get install sudo # Install ElasticSearch. RUN cd /tmp && wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-0.90.13.tar.gz diff --git a/oracle-java8/Dockerfile b/oracle-java8/Dockerfile index e7869e8..60a960f 100755 --- a/oracle-java8/Dockerfile +++ b/oracle-java8/Dockerfile @@ -13,5 +13,5 @@ RUN echo oracle-java8-installer shared/accepted-oracle-license-v1-1 select true RUN apt-get install -y oracle-java8-installer ca-certificates # Install tools -run apt-get install -y git maven openssh-server +RUN apt-get install -y git maven openssh-server From f6bf5b21c4d7fcd62b0e00c103364aa8b6487c8f Mon Sep 17 00:00:00 2001 From: htaox Date: Mon, 15 Jun 2015 15:03:00 -0400 Subject: [PATCH 95/97] Use official java image --- gremlin-server-3.0.0/Dockerfile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/gremlin-server-3.0.0/Dockerfile b/gremlin-server-3.0.0/Dockerfile index 61f2f0b..7eaab52 100755 --- a/gremlin-server-3.0.0/Dockerfile +++ b/gremlin-server-3.0.0/Dockerfile @@ -1,8 +1,12 @@ # Base Ubuntu Precise 12.04 LTS image w/ Java 8 & Maven installed # -FROM htaox/oracle-java8:latest +# FROM htaox/oracle-java8:latest +FROM java:oracle-java8 MAINTAINER htaox htaox@hotmail.com +# Install tools +RUN apt-get install -y git maven openssh-server + RUN git clone https://github.com/apache/incubator-tinkerpop.git RUN cd incubator-tinkerpop && mvn clean package -DskipTests From 4a58159ec2112862207b1f4a99d7f6d25a9839a8 Mon Sep 17 00:00:00 2001 From: htaox Date: Mon, 15 Jun 2015 15:09:18 -0400 Subject: [PATCH 96/97] Use official java image --- gremlin-server-3.0.0/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gremlin-server-3.0.0/Dockerfile b/gremlin-server-3.0.0/Dockerfile index 7eaab52..7717c0c 100755 --- a/gremlin-server-3.0.0/Dockerfile +++ b/gremlin-server-3.0.0/Dockerfile @@ -1,7 +1,7 @@ # Base Ubuntu Precise 12.04 LTS image w/ Java 8 & Maven installed # # FROM htaox/oracle-java8:latest -FROM java:oracle-java8 +FROM java:8u45-jdk MAINTAINER htaox htaox@hotmail.com # Install tools From 934373c6cffb98ab19afcfb710af91704058f15a Mon Sep 17 00:00:00 2001 From: htaox Date: Mon, 15 Jun 2015 15:16:27 -0400 Subject: [PATCH 97/97] Use official java image --- gremlin-server-3.0.0/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/gremlin-server-3.0.0/Dockerfile b/gremlin-server-3.0.0/Dockerfile index 7717c0c..7a19cc4 100755 --- a/gremlin-server-3.0.0/Dockerfile +++ b/gremlin-server-3.0.0/Dockerfile @@ -5,6 +5,7 @@ FROM java:8u45-jdk MAINTAINER htaox htaox@hotmail.com # Install tools +RUN apt-cache search maven RUN apt-get install -y git maven openssh-server RUN git clone https://github.com/apache/incubator-tinkerpop.git