diff --git a/Dockerfile b/Dockerfile index b1a55d9..9ce5f42 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,7 +10,7 @@ USER root # install dev tools RUN yum clean all; \ rpm --rebuilddb; \ - yum install -y curl which tar sudo openssh-server openssh-clients rsync + yum install -y yum-plugin-ovl curl which tar sudo openssh-server openssh-clients rsync # update libselinux. see https://github.com/sequenceiq/hadoop-docker/issues/14 RUN yum update -y libselinux @@ -22,13 +22,8 @@ RUN cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys # java -RUN curl -LO 'http://download.oracle.com/otn-pub/java/jdk/7u71-b14/jdk-7u71-linux-x64.rpm' -H 'Cookie: oraclelicense=accept-securebackup-cookie' -RUN rpm -i jdk-7u71-linux-x64.rpm -RUN rm jdk-7u71-linux-x64.rpm - -ENV JAVA_HOME /usr/java/default -ENV PATH $PATH:$JAVA_HOME/bin -RUN rm /usr/bin/java && ln -s $JAVA_HOME/bin/java /usr/bin/java +RUN yum -y install java-1.8.0-openjdk-devel.x86_64 && yum clean all +COPY java_env.sh /etc/profile.d/java_env.sh # download native support RUN mkdir -p /tmp/native @@ -46,7 +41,7 @@ ENV HADOOP_YARN_HOME /usr/local/hadoop ENV HADOOP_CONF_DIR /usr/local/hadoop/etc/hadoop ENV YARN_CONF_DIR $HADOOP_PREFIX/etc/hadoop -RUN sed -i '/^export JAVA_HOME/ s:.*:export JAVA_HOME=/usr/java/default\nexport HADOOP_PREFIX=/usr/local/hadoop\nexport HADOOP_HOME=/usr/local/hadoop\n:' $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh +RUN sed -i '/^export JAVA_HOME/ s:.*:export JAVA_HOME=/etc/alternatives/java_sdk\nexport HADOOP_PREFIX=/usr/local/hadoop\nexport HADOOP_HOME=/usr/local/hadoop\n:' $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh RUN sed -i '/^export HADOOP_CONF_DIR/ s:.*:export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop/:' $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh #RUN . $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh @@ -61,6 +56,11 @@ ADD hdfs-site.xml $HADOOP_PREFIX/etc/hadoop/hdfs-site.xml ADD mapred-site.xml $HADOOP_PREFIX/etc/hadoop/mapred-site.xml ADD yarn-site.xml $HADOOP_PREFIX/etc/hadoop/yarn-site.xml +# prepare tez installation +ADD tez-site.xml $HADOOP_PREFIX/etc/hadoop/tez-site.xml +RUN mkdir -p /root/tez +RUN curl -s http://www-eu.apache.org/dist/tez/0.8.5/apache-tez-0.8.5-bin.tar.gz | tar -xz -C /root/tez + RUN $HADOOP_PREFIX/bin/hdfs namenode -format # fixing the libhadoop.so like a boss diff --git a/README.md b/README.md index 1d7cace..00bab86 100644 --- a/README.md +++ b/README.md @@ -1,43 +1,20 @@ -# Apache Hadoop 2.7.1 Docker image +[![Docker Build Statu](https://img.shields.io/docker/build/jrottenberg/ffmpeg.svg)](https://hub.docker.com/r/ouyi/hadoop-docker/) +[![Docker Automated buil](https://img.shields.io/docker/automated/jrottenberg/ffmpeg.svg)](https://hub.docker.com/r/ouyi/hadoop-docker/) -[![DockerPulls](https://img.shields.io/docker/pulls/sequenceiq/hadoop-docker.svg)](https://registry.hub.docker.com/u/sequenceiq/hadoop-docker/) -[![DockerStars](https://img.shields.io/docker/stars/sequenceiq/hadoop-docker.svg)](https://registry.hub.docker.com/u/sequenceiq/hadoop-docker/) +# Apache Hadoop 2.7.1 in a Docker container +Hadoop in a pseudo distributed mode in a Docker container, forked from [sequenceiq](https://github.com/sequenceiq/hadoop-docker). The added values are: +- Bug [fix](https://github.com/sequenceiq/hadoop-docker/pull/75) +- Use openjdk +- Tez installation -_Note: this is the master branch - for a particular Hadoop version always check the related branch_ - -A few weeks ago we released an Apache Hadoop 2.3 Docker image - this quickly become the most [popular](https://registry.hub.docker.com/search?q=hadoop&s=downloads) Hadoop image in the Docker [registry](https://registry.hub.docker.com/). - - -Following the success of our previous Hadoop Docker [images](https://registry.hub.docker.com/u/sequenceiq/hadoop-docker/), the feedback and feature requests we received, we aligned with the Hadoop release cycle, so we have released an Apache Hadoop 2.7.1 Docker image - same as the previous version, it's available as a trusted and automated build on the official Docker [registry](https://registry.hub.docker.com/). - - -_FYI: All the former Hadoop releases (2.3, 2.4.0, 2.4.1, 2.5.0, 2.5.1, 2.5.2, 2.6.0) are available in the GitHub branches or our [Docker Registry](https://registry.hub.docker.com/u/sequenceiq/hadoop-docker/) - check the tags._ - -# Build the image - -If you'd like to try directly from the Dockerfile you can build the image as: - -``` -docker build -t sequenceiq/hadoop-docker:2.7.1 . -``` -# Pull the image - -The image is also released as an official Docker image from Docker's automated build repository - you can always pull or refer the image when launching containers. - -``` -docker pull sequenceiq/hadoop-docker:2.7.1 -``` - -# Start a container - -In order to use the Docker image you have just build or pulled use: +## Start a container **Make sure that SELinux is disabled on the host. If you are using boot2docker you don't need to do anything.** ``` -docker run -it sequenceiq/hadoop-docker:2.7.1 /etc/bootstrap.sh -bash +docker run -P -it ouyi/hadoop-docker /etc/bootstrap.sh -bash ``` ## Testing @@ -50,13 +27,5 @@ cd $HADOOP_PREFIX bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.1.jar grep input output 'dfs[a-z.]+' # check the output -bin/hdfs dfs -cat output/* +bin/hadoop fs -cat output/{*} ``` - -## Hadoop native libraries, build, Bintray, etc - -The Hadoop build process is no easy task - requires lots of libraries and their right version, protobuf, etc and takes some time - we have simplified all these, made the build and released a 64b version of Hadoop nativelibs on this [Bintray repo](https://bintray.com/sequenceiq/sequenceiq-bin/hadoop-native-64bit/2.7.0/view/files). Enjoy. - -## Automate everything - -As we have mentioned previousely, a Docker file was created and released in the official [Docker repository](https://registry.hub.docker.com/u/sequenceiq/hadoop-docker/) diff --git a/bootstrap.sh b/bootstrap.sh index 4cf0e55..02821bd 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -18,6 +18,15 @@ $HADOOP_PREFIX/sbin/start-dfs.sh $HADOOP_PREFIX/sbin/start-yarn.sh $HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh start historyserver +# install Tez +export PATH=$HADOOP_PREFIX/sbin:$HADOOP_PREFIX/bin:$PATH +hdfs dfsadmin -safemode wait +hadoop fs -mkdir -p /apps/tez +hadoop fs -copyFromLocal /root/tez/apache-tez-0.8.5-bin/share/tez.tar.gz /apps/apache-tez-0.8.5-bin.tar.gz +export TEZ_CONF_DIR=/usr/local/hadoop/etc/hadoop/ +export TEZ_JARS=/root/tez/apache-tez-0.8.5-bin +export HADOOP_CLASSPATH=${TEZ_CONF_DIR}:${TEZ_JARS}/*:${TEZ_JARS}/lib/* + if [[ $1 == "-d" ]]; then while true; do sleep 1000; done fi diff --git a/java_env.sh b/java_env.sh new file mode 100644 index 0000000..65410e1 --- /dev/null +++ b/java_env.sh @@ -0,0 +1,2 @@ +export JAVA_HOME=/etc/alternatives/java_sdk +export PATH=${JAVA_HOME}/bin:${PATH} diff --git a/mapred-site.xml b/mapred-site.xml index dba582f..e3371b3 100644 --- a/mapred-site.xml +++ b/mapred-site.xml @@ -3,4 +3,20 @@ mapreduce.framework.name yarn + + mapreduce.map.memory.mb + 256 + + + mapreduce.reduce.memory.mb + 512 + + + mapreduce.map.java.opts + -Xmx150m + + + mapreduce.reduce.java.opts + -Xmx300m + diff --git a/tez-site.xml b/tez-site.xml new file mode 100644 index 0000000..116d389 --- /dev/null +++ b/tez-site.xml @@ -0,0 +1,1151 @@ + + + + + + + + + + tez.dag.recovery.enabled + true + Boolean value. Enable recovery of DAGs. This allows a restarted app master to recover the + incomplete DAGs from the previous instance of the app master. + boolean + + + + tez.dag.recovery.io.buffer.size + 8192 + Int value. Size in bytes for the IO buffer size while processing the recovery file. + Expert level setting. + integer + + + + tez.dag.recovery.flush.interval.secs + 30 + Int value. Interval, in seconds, between flushing recovery data to the recovery log. + integer + + + + tez.dag.recovery.max.unflushed.events + 100 + Int value. Number of recovery events to buffer before flushing them to the recovery log. + integer + + + + tez.task.heartbeat.timeout.check-ms + 30000 + Int value. Time interval, in milliseconds, between checks for lost tasks. + Expert level setting. + integer + + + + tez.task.timeout-ms + 300000 + Int value. Time interval, in milliseconds, within which a task must heartbeat to the app master + before its considered lost. + Expert level setting. + integer + + + + tez.am.acls.enabled + true + Boolean value. Configuration to enable/disable ACL checks. + boolean + + + + tez.allow.disabled.timeline-domains + false + Boolean value. + Allow disabling of Timeline Domains even if Timeline is being used. + boolean + true + + + + tez.am.client.am.port-range + String value. Range of ports that the AM can use when binding for client connections. Leave blank + to use all possible ports. Expert level setting. It's hadoop standard range configuration. + For example 50000-50050,50100-50200 + string + + + + tez.am.client.am.thread-count + 1 + Int value. Number of threads to handle client RPC requests. Expert level setting. + integer + + + + tez.am.commit-all-outputs-on-dag-success + true + Boolean value. Determines when the final outputs to data sinks are committed. Commit is an + output specific operation and typically involves making the output visible for consumption. + If the config is true, then the outputs are committed at the end of DAG completion after all + constituent vertices have completed. If false, outputs for each vertex are committed after that + vertex succeeds. Depending on the desired output visibility and downstream consumer dependencies + this value must be appropriately chosen. Defaults to the safe choice of true. + boolean + + + + tez.am.containerlauncher.thread-count-limit + 500 + Int value. Upper limit on the number of threads user to launch containers in the app + master. Expert level setting. + integer + + + + tez.am.container.idle.release-timeout-max.millis + 10000 + Int value. The maximum amount of time to hold on to a container if no task can be + assigned to it immediately. Only active when reuse is enabled. The value + must be +ve and >= + TezConfiguration#TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MIN_MILLIS. + Containers will have an expire time set to a random value between + TezConfiguration#TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MIN_MILLIS && + TezConfiguration#TEZ_AM_CONTAINER_IDLE_RELEASE_TIMEOUT_MAX_MILLIS. This + creates a graceful reduction in the amount of idle resources held + long + + + + tez.am.container.idle.release-timeout-min.millis + 5000 + Int value. The minimum amount of time to hold on to a container that is idle. Only active when + reuse is enabled. Set to -1 to never release idle containers (not recommended). + integer + + + + tez.am.container.reuse.enabled + true + Boolean value. Configuration to specify whether container should be reused across tasks. + This improves performance by not incurring recurring launch overheads. + boolean + + + + tez.am.container.reuse.locality.delay-allocation-millis + 250 + Int value. The amount of time to wait before assigning a container to the next level + of locality. NODE -> RACK -> NON_LOCAL. Delay scheduling parameter. Expert level setting. + long + + + + tez.am.container.reuse.non-local-fallback.enabled + false + Boolean value. Whether to reuse containers for non-local tasks. Active only if reuse is + enabled. Turning this on can severely affect locality and can be bad for jobs with high data + volume being read from the primary data sources. + boolean + + + + tez.am.container.reuse.rack-fallback.enabled + true + Boolean value. Whether to reuse containers for rack local tasks. Active only if reuse is + enabled. + boolean + + + + tez.am.credentials-merge + Boolean value. If true then Tez will add the ApplicationMaster credentials + to all task credentials. + boolean + + + + tez.am.dag.scheduler.class + org.apache.tez.dag.app.dag.impl.DAGSchedulerNaturalOrder + String value. The class to be used for DAG Scheduling. Expert level setting. + string + + + + tez.am.disable.client-version-check + false + Boolean value. + Disable version check between client and AM/DAG. Default false. + boolean + true + + + + tez.am.inline.task.execution.enabled + false + Tez AM Inline Mode flag. Not valid till Tez-684 get checked-in + boolean + true + + + + tez.am.inline.task.execution.max-tasks + 1 + Int value. + The maximium number of tasks running in parallel within the app master process. + integer + + + + tez.am.launch.cluster-default.cmd-opts + -server -Djava.net.preferIPv4Stack=true -Dhadoop.metrics.log.level=WARN + String value. Command line options which will be prepended to {@link #TEZ_AM_LAUNCH_CMD_OPTS} + during the launch of the AppMaster process. This property will typically be configured to + include default options meant to be used by all jobs in a cluster. If required, the values can + be overridden per job. + string + + + + tez.am.launch.cluster-default.env + String value. Env settings will be merged with {@link #TEZ_AM_LAUNCH_ENV} + during the launch of the AppMaster process. This property will typically be configured to + include default system env meant to be used by all jobs in a cluster. If required, the values can + be appended to per job. + string + + + + tez.am.launch.cmd-opts + -XX:+PrintGCDetails -verbose:gc -XX:+PrintGCTimeStamps -XX:+UseNUMA -XX:+UseParallelGC + String value. Command line options provided during the launch of the Tez + AppMaster process. Its recommended to not set any Xmx or Xms in these launch opts so that + Tez can determine them automatically. + string + + + + tez.am.launch.env + String value. Env settings for the Tez AppMaster process. + Should be specified as a comma-separated of key-value pairs where each pair + is defined as KEY=VAL + e.g. "LD_LIBRARY_PATH=.,USERNAME=foo" + These take least precedence compared to other methods of setting env. + These get added to the app master environment prior to launching it. + This setting will prepend existing settings in the cluster default + string + + + + tez.am.legacy.speculative.slowtask.threshold + Float value. Specifies how many standard deviations away from the mean task execution time + should be considered as an outlier/slow task. + float + true + + + + tez.am.log.level + INFO + Root Logging level passed to the Tez app master. + + Simple configuration: Set the log level for all loggers. + e.g. INFO + This sets the log level to INFO for all loggers. + + Advanced configuration: Set the log level for all classes, along with a different level for some. + e.g. DEBUG;org.apache.hadoop.ipc=INFO;org.apache.hadoop.security=INFO + This sets the log level for all loggers to DEBUG, expect for the + org.apache.hadoop.ipc and org.apache.hadoop.security, which are set to INFO + + Note: The global log level must always be the first parameter. + DEBUG;org.apache.hadoop.ipc=INFO;org.apache.hadoop.security=INFO is valid + org.apache.hadoop.ipc=INFO;org.apache.hadoop.security=INFO is not valid + string + + + + tez.am.max.allowed.time-sec.for-read-error + 300 + int value. Represents the maximum time in seconds for which a consumer attempt can report + a read error against its producer attempt, after which the producer attempt will be re-run + to re-generate the output. There are other heuristics which determine the retry and mainly + try to guard against a flurry of re-runs due to intermittent read errors + (due to network issues). This configuration puts a time limit on those heuristics to ensure + jobs dont hang indefinitely due to lack of closure in those heuristics + + Expert level setting. + integer + + + + tez.am.max.app.attempts + 2 + Int value. Specifies the number of times the app master can be launched in order to recover + from app master failure. Typically app master failures are non-recoverable. This parameter + is for cases where the app master is not at fault but is lost due to system errors. + Expert level setting. + integer + + + + tez.am.maxtaskfailures.per.node + 10 + Int value. Specifies the number of task failures on a node before the node is considered faulty. + integer + + + + tez.am.modify-acls + String value. + AM modify ACLs. This allows the specified users/groups to run modify operations on the AM + such as submitting DAGs, pre-warming the session, killing DAGs or shutting down the session. + Comma separated list of users, followed by whitespace, followed by a comma separated list of + groups + string + + + + tez.am.node-blacklisting.enabled + true + Boolean value. Enabled blacklisting of nodes of nodes that are considered faulty. These nodes + will not be used to execute tasks. + boolean + + + + tez.am.node-blacklisting.ignore-threshold-node-percent + 33 + Int value. Specifies the percentage of nodes in the cluster that may be considered faulty. + This limits the number of nodes that are blacklisted in an effort to minimize the effects of + temporary surges in failures (e.g. due to network outages). + integer + + + + tez.am.node-unhealthy-reschedule-tasks + false + Boolean value. Enable task rescheduling for node updates. + When enabled the task scheduler will reschedule task attempts that + are associated with an unhealthy node to avoid potential data transfer + errors from downstream tasks. + boolean + + + + tez.am.preemption.heartbeats-between-preemptions + 3 + Int value. The number of RM heartbeats to wait after preempting running tasks before preempting + more running tasks. After preempting a task, we need to wait at least 1 heartbeat so that the + RM can act on the released resources and assign new ones to us. Expert level setting. + integer + + + + tez.am.preemption.max.wait-time-ms + 60000 + Int value. Time (in millisecs) that an unsatisfied request will wait before preempting other + resources. In rare cases, the cluster says there are enough free resources but does not end + up getting enough on a node to actually assign it to the job. This configuration tries to put + a deadline on such wait to prevent indefinite job hangs. + integer + + + + tez.am.preemption.percentage + 10 + Int value. Specifies the percentage of tasks eligible to be preempted that + will actually be preempted in a given round of Tez internal preemption. + This slows down preemption and gives more time for free resources to be + allocated by the cluster (if any) and gives more time for preemptable tasks + to finish. Valid values are 0-100. Higher values will preempt quickly at + the cost of losing work. Setting to 0 turns off preemption. Expert level + setting. + integer + + + + tez.am.resource.cpu.vcores + 1 + Int value. The number of virtual cores to be used by the app master + integer + + + + tez.am.resource.memory.mb + 1024 + Int value. The amount of memory in MB to be used by the AppMaster + integer + + + + tez.am.am-rm.heartbeat.interval-ms.max + 1000 + Int value. The maximum heartbeat interval between the AM and RM in milliseconds + Increasing this reduces the communication between the AM and the RM and can + help in scaling up. Expert level setting. + integer + + + + tez.am.session.min.held-containers + 0 + Int value. The minimum number of containers that will be held in session mode. Not active in + non-session mode. Enables an idle session (not running any DAG) to hold on to a minimum number + of containers to provide fast response times for the next DAG. + integer + + + + tez.am.mode.session + false + Boolean value. Execution mode for the Tez application. True implies session mode. If the client + code is written according to best practices then the same code can execute in either mode based + on this configuration. Session mode is more aggressive in reserving execution resources and is + typically used for interactive applications where multiple DAGs are submitted in quick succession + by the same user. For long running applications, one-off executions, batch jobs etc non-session + mode is recommended. If session mode is enabled then container reuse is recommended. + boolean + + + + tez.am.speculation.enabled + false + boolean + true + + + + tez.staging-dir + String value. Specifies a directory where Tez can create temporary job artifacts. + string + + + + tez.am.staging.scratch-data.auto-delete + true + Boolean value. If true then Tez will try to automatically delete temporary job + artifacts that it creates within the specified staging dir. Does not affect any user data. + boolean + + + + tez.am.task.listener.thread-count + 30 + Int value. The number of threads used to listen to task heartbeat requests. + Expert level setting. + integer + + + + tez.am.task.max.failed.attempts + 4 + Int value. The maximum number of attempts that can fail for a particular task before the task is failed. + This does not count killed attempts. Task failure results in DAG failure. + integer + + + + tez.am.tez-ui.history-url.template + __HISTORY_URL_BASE__/#/tez-app/__APPLICATION_ID__ + String value + Tez UI URL template for the application. + Expert level setting. + + The AM will redirect the user to the Tez UI via this url. Template supports the following + parameters to be replaced with the actual runtime information: + + __APPLICATION_ID__ : Replaces this with application ID + __HISTORY_URL_BASE__: replaces this with TEZ_HISTORY_URL_BASE + + For example, "http://uihost:9001/#/tez-app/__APPLICATION_ID__/ will be replaced to + http://uihost:9001/#/tez-app/application_1421880306565_0001/ + string + + + + tez.am.vertex.max-task-concurrency + -1 + Int value. The maximum number of attempts that can run concurrently for a given vertex. + Setting <=0 implies no limit + integer + + + + tez.am.view-acls + String value. + AM view ACLs. This allows the specified users/groups to view the status of the AM and all DAGs + that run within this AM. + Comma separated list of users, followed by whitespace, followed by a comma separated list of + groups + string + + + + tez.am.tez-ui.webservice.enable + true + String value + Allow disabling of the Tez AM webservice. If set to false the Tez-UI wont show progress + updates for running application. + boolean + + + + tez.application.tags + String value. Tags for the job that will be passed to YARN at submission + time. Queries to YARN for applications can filter on these tags. + string + + + + tez.aux.uris + Auxiliary resources to be localized for the Tez AM and all its containers. + + Value is comma-separated list of fully-resolved directories or file paths. All resources + are made available into the working directory of the AM and/or containers i.e. $CWD. + + If directories are specified, they are not traversed recursively. Only files directly under the + specified directory are localized. + + All duplicate resources are ignored. + string + ${fs.defaultFS}/apps/tez + + + + tez.cancel.delegation.tokens.on.completion + true + boolean + true + + + + tez.client.asynchronous-stop + true + Boolean value. Backwards compatibility setting. Changes TezClient stop to be a + synchronous call waiting until AM is in a final state before returning to the user. + Expert level setting. + boolean + + + + tez.client.diagnostics.wait.timeout-ms + 3000 + Long value + Time to wait (in milliseconds) for yarn app's diagnotics is available + Workaround for YARN-2560 + long + true + + + + tez.client.timeout-ms + 30000 + Long value. Time interval, in milliseconds, for client to wait during client-requested + AM shutdown before issuing a hard kill to the RM for this application. + Expert level setting. + long + + + + tez.java.opts.checker.class + String value. + Ability to provide a different implementation to check/verify java opts defined + for vertices/tasks. + Class has to be an instance of JavaOptsChecker + string + true + + + + tez.java.opts.checker.enabled + true + Boolean value. Default true. + Ability to disable the Java Opts Checker + boolean + true + + + + tez.container.max.java.heap.fraction + 0.8 + Double value. Tez automatically determines the Xmx for the JVMs used to run + Tez tasks and app masters. This feature is enabled if the user has not + specified Xmx or Xms values in the launch command opts. Doing automatic Xmx + calculation is preferred because Tez can determine the best value based on + actual allocation of memory to tasks the cluster. The value if used as a + fraction that is applied to the memory allocated Factor to size Xmx based + on container memory size. Value should be greater than 0 and less than 1. + float + + + + tez.counters.counter-name.max-length + 64 + Int value. Configuration to limit the length of counter names. This can be used to + limit the amount of memory being used in the app master to store the + counters. Expert level setting. + integer + true + + + + tez.counters.group-name.max-length + 256 + Int value. Configuration to limit the counter group names per app master. This can be used to + limit the amount of memory being used in the app master to store the + counters. Expert level setting. + integer + true + + + + tez.counters.max + 1200 + Int value. Configuration to limit the counters per dag (AppMaster and Task). This can be used + to + limit the amount of memory being used in the app master to store the + counters. Expert level setting. + integer + true + + + + tez.counters.max.groups + 500 + Int value. Configuration to limit the number of counter groups for a DAG. This can be used to + limit the amount of memory being used in the app master to store the + counters. Expert level setting. + integer + true + + + + tez.credentials.path + String value that is a file path. + Path to a credentials file (with serialized credentials) located on the local file system. + string + + + + tez.dag.status.pollinterval-ms + 500 + Long value + Status Poll interval in Milliseconds used when getting DAG status with timeout. + long + + + + tez.generate.debug.artifacts + false + boolean + true + + + + tez.history.logging.log.level + Enum value. Config to limit the type of events published to the history logging service. + The valid log levels are defined in the enum {@link HistoryLogLevel}. The default value is + defined in {@link HistoryLogLevel#DEFAULT}. + string + + + + tez.history.logging.service.class + org.apache.tez.dag.history.logging.impl.SimpleHistoryLoggingService + String value that is a class name. + Specify the class to use for logging history data. + To disable, set this to "org.apache.tez.dag.history.logging.impl.DevNullHistoryLoggingService" + string + + + + tez.history.logging.taskattempt-filters + List of comma separated enum values. Specifies the list of task attempt termination causes, + which have to be suppressed from being logged to ATS. The valid filters are defined in the + enum TaskAttemptTerminationCause. The filters are applied only if tez.history.logging.log.level + is set to TASK_ATTEMPT. + string + + + + tez.history.logging.timeline-cache-plugin.old-num-dags-per-group + Comma separated list of Integers. These are the values that were set for the config value + for {@value #TEZ_HISTORY_LOGGING_TIMELINE_NUM_DAGS_PER_GROUP}. The older values are required so + that the groupIds generated previously will continue to be generated by the plugin. If an older + value is not present then the UI may not show information for DAGs which were created + with a different grouping value. + + Note: Do not add too many values here as it will affect the performance of Yarn Timeline + Server/Tez UI due to the need to scan for more log files. + string + true + true + + + + tez.history.logging.timeline.num-dags-per-group + 1 + Integer value. Number of DAGs to be grouped together. This is used by the history logging + service to generate groupIds such that numDagsPerGroup will have same groupId in a given + session. If the value is set to 1 then we disable grouping. This config is used to control the + number of DAGs written into one log file, and hence controls number of files created in + the Filesystem used by YARN Timeline. + integer + true + true + + + + tez.tez-ui.history-url.base + String value + Tez-UI Url base. This gets replaced in the TEZ_AM_TEZ_UI_HISTORY_URL_TEMPLATE + ex http://ui-host:9001 or if its hosted with a prefix http://ui-host:9001/~user + if the ui is hosted on the default port (80 for http and 443 for https), the port should not + be specified. + string + + + + tez.ignore.lib.uris + Boolean value. Allows to ignore 'tez.lib.uris'. Useful during development as well as + raw Tez application where classpath is propagated with application + via {@link LocalResource}s. This is mainly useful for developer/debugger scenarios. + boolean + true + + + + tez.ipc.payload.reserved.bytes + 5242880 + Int value. SubmitDAGPlanRequest cannot be larger than Max IPC message size minus this number; otherwise, it will + be serialized to HDFS and we transfer the path to server. Server will deserialize the request from HDFS. + int + true + + + + tez.tez.jvm.system-properties-to-log + String value. Determines what JVM properties will be logged for debugging purposes + in the AM and Task runtime logs. + string + + + + tez.lib.uris + String value to a file path. + The location of the Tez libraries which will be localized for DAGs. + This follows the following semantics + <ol> + <li> To use .tar.gz or .tgz files (generated by the tez or hadoop builds), the full path to this + file (including filename) should be specified. The internal structure of the uncompressed tgz + will be defined by 'tez.lib.uris.classpath'</li> + + <li> If a single file is specified without the above mentioned extensions - it will be treated as + a regular file. This means it will not be uncompressed during runtime. </li> + + <li> If multiple entries exist + <ul> + <li> Regular Files: will be treated as regular files (not uncompressed during runtime) </li> + <li> Archive Files: will be treated as archives and will be uncompressed during runtime </li> + <li> Directories: all files under the directory (non-recursive) will be made available (but not + uncompressed during runtime). </li> + </ul> + </ol> + string + ${fs.defaultFS}/apps/apache-tez-0.8.5-bin.tar.gz + + + + tez.lib.uris.classpath + Specify additional user classpath information to be used for Tez AM and all containers. + This will be appended to the classpath after PWD + + 'tez.lib.uris.classpath' defines the relative classpath into the archives + that are set in 'tez.lib.uris' + string + + + + tez.local.mode + false + Boolean value. Enable local mode execution in Tez. Enables tasks to run in the same process as + the app master. Primarily used for debugging. + boolean + + + + tez.queue.name + String value. The queue name for all jobs being submitted from a given client. + string + + + + tez.session.am.dag.submit.timeout.secs + 300 + Int value. Time (in seconds) for which the Tez AM should wait for a DAG to be submitted before + shutting down. Only relevant in session mode. Any negative value will disable this check and + allow the AM to hang around forever in idle mode. + integer + + + + tez.session.client.timeout.secs + 120 + Int value. Time (in seconds) to wait for AM to come up when trying to submit a DAG + from the client. Only relevant in session mode. If the cluster is busy and cannot launch the + AM then this timeout may be hit. In those case, using non-session mode is recommended if + applicable. Otherwise increase the timeout (set to -1 for infinity. Not recommended) + integer + + + + tez.simple.history.logging.dir + String value. The directory into which history data will be written. This defaults to the + container logging directory. This is relevant only when SimpleHistoryLoggingService is being + used for {@link TezConfiguration#TEZ_HISTORY_LOGGING_SERVICE_CLASS} + string + + + + tez.simple.history.max.errors + 10 + Int value. Maximum errors allowed while logging history data. After crossing this limit history + logging gets disabled. The job continues to run after this. + integer + + + + tez.task.am.heartbeat.counter.interval-ms.max + 4000 + Int value. Interval, in milliseconds, after which counters are sent to AM in heartbeat from + tasks. This reduces the amount of network traffice between AM and tasks to send high-volume + counters. Improves AM scalability. Expert level setting. + integer + + + + tez.task.am.heartbeat.interval-ms.max + 100 + Int value. The maximum heartbeat interval, in milliseconds, between the app master and tasks. + Increasing this can help improve app master scalability for a large number of concurrent tasks. + Expert level setting. + integer + + + + tez.task.generate.counters.per.io + false + Whether to generate counters per IO or not. Enabling this will rename + CounterGroups / CounterNames to making them unique per Vertex + + Src|Destination + boolean + true + true + + + + tez.task.get-task.sleep.interval-ms.max + 200 + Int value. The maximum amount of time, in milliseconds, to wait before a task asks an + AM for another task. Increasing this can help improve app master scalability for a large + number of concurrent tasks. Expert level setting. + integer + + + + tez.task.initialize-processor-first + false + Boolean value. Backwards compatibility setting for initializing IO processor before + inputs and outputs. + Expert level setting. + boolean + + + + tez.task.initialize-processor-io-serially + false + Boolean value. Backwards compatibility setting for initializing inputs and outputs + serially instead of the parallel default. + Expert level setting. + boolean + + + + tez.task.launch.cluster-default.cmd-opts + -server -Djava.net.preferIPv4Stack=true -Dhadoop.metrics.log.level=WARN + String value. Command line options which will be prepended to {@link + #TEZ_TASK_LAUNCH_CMD_OPTS} during the launch of Tez tasks. This property will typically be configured to + include default options meant to be used by all jobs in a cluster. If required, the values can + be overridden per job. + string + + + + tez.task.launch.cluster-default.env + String value. Env settings will be merged with {@link #TEZ_TASK_LAUNCH_ENV} + during the launch of the task process. This property will typically be configured to + include default system env meant to be used by all jobs in a cluster. If required, the values can + be appended to per job. + string + + + + tez.task.launch.cmd-opts + -XX:+PrintGCDetails -verbose:gc -XX:+PrintGCTimeStamps -XX:+UseNUMA -XX:+UseParallelGC + String value. Command line options provided during the launch of Tez Task + processes. Its recommended to not set any Xmx or Xms in these launch opts + so that Tez can determine them automatically. + string + + + + tez.task.launch.env + String value. Env settings for the Tez Task processes. + Should be specified as a comma-separated of key-value pairs where each pair + is defined as KEY=VAL + e.g. "LD_LIBRARY_PATH=.,USERNAME=foo" + These take least precedence compared to other methods of setting env + These get added to the task environment prior to launching it. + This setting will prepend existing settings in the cluster default + string + + + + tez.task.log.level + INFO + Root Logging level passed to the Tez tasks. + + Simple configuration: Set the log level for all loggers. + e.g. INFO + This sets the log level to INFO for all loggers. + + Advanced configuration: Set the log level for all classes, along with a different level for some. + e.g. DEBUG;org.apache.hadoop.ipc=INFO;org.apache.hadoop.security=INFO + This sets the log level for all loggers to DEBUG, expect for the + org.apache.hadoop.ipc and org.apache.hadoop.security, which are set to INFO + + Note: The global log level must always be the first parameter. + DEBUG;org.apache.hadoop.ipc=INFO;org.apache.hadoop.security=INFO is valid + org.apache.hadoop.ipc=INFO;org.apache.hadoop.security=INFO is not valid + string + + + + tez.task.max-events-per-heartbeat + 500 + Int value. Maximum number of of events to fetch from the AM by the tasks in a single heartbeat. + Expert level setting. Expert level setting. + integer + + + + tez.task.max-event-backlog + 10000 + Int value. Maximum number of pending task events before a task will stop + asking for more events in the task heartbeat. + Expert level setting. + integer + + + + tez.task.progress.stuck.interval-ms + -1 + Long value. Interval, in milliseconds, within which any of the tasks Input/Processor/Output + components need to make successive progress notifications. If the progress is not notified + for this interval then the task will be considered hung and terminated. + The value for this config should be larger than {@link TezConfiguration#TASK_HEARTBEAT_TIMEOUT_MS} + and larger than 2 times the value of {@link TezConfiguration#TEZ_TASK_AM_HEARTBEAT_INTERVAL_MS}. + A config value <=0 disables this. + string + + + + tez.task.resource.calculator.process-tree.class + string + true + true + + + + tez.task.resource.cpu.vcores + 1 + Int value. The number of virtual cores to be used by tasks. + integer + + + + tez.task.resource.memory.mb + 1024 + Int value. The amount of memory in MB to be used by tasks. This applies to all tasks across + all vertices. Setting it to the same value for all tasks is helpful for container reuse and + thus good for performance typically. + integer + + + + tez.task.scale.memory.additional-reservation.fraction.max + float + true + true + + + + tez.task.scale.memory.additional-reservation.fraction.per-io + Fraction of available memory to reserve per input/output. This amount is + removed from the total available pool before allocation and is for factoring in overheads. + float + true + true + + + + tez.task.scale.memory.allocator.class + org.apache.tez.runtime.library.resources.WeightedScalingMemoryDistributor + The allocator to use for initial memory allocation + string + true + true + + + + tez.task.scale.memory.enabled + true + Whether to scale down memory requested by each component if the total + exceeds the available JVM memory + boolean + true + true + + + + tez.task.scale.memory.reserve-fraction + 0.3 + The fraction of the JVM memory which will not be considered for allocation. + No defaults, since there are pre-existing defaults based on different scenarios. + double + true + true + + + + tez.task.scale.memory.ratios + string + true + true + + + + tez.task-specific.launch.cmd-opts + Additional launch command options to be added for specific tasks. + __VERTEX_NAME__ and __TASK_INDEX__ can be specified, which would be replaced at + runtime by vertex name and task index. + e.g tez.task-specific.launch.cmd-opts= + "-agentpath:libpagent.so,dir=/tmp/__VERTEX_NAME__/__TASK_INDEX__" + string + true + + + + tez.task-specific.launch.cmd-opts.list + Set of tasks for which specific launch command options need to be added. + Format: "vertexName[csv of task ids];vertexName[csv of task ids].." + Valid e.g: + v[0,1,2] - Additional launch-cmd options for tasks 0,1,2 of vertex v + v[1,2,3];v2[5,6,7] - Additional launch-cmd options specified for tasks of vertices v and v2. + v[1:5,20,30];v2[2:5,60,7] - Additional launch-cmd options for 1,2,3,4,5,20,30 of vertex v; 2, + 3,4,5,60,7 of vertex v2 + Partial ranges like :5, 1: are not supported. + v[] - Additional launch-cmd options for all tasks in vertex v + string + true + + + + tez.task-specific.log.level + Task specific log level. + + Simple configuration: Set the log level for all loggers. + e.g. INFO + This sets the log level to INFO for all loggers. + + Advanced configuration: Set the log level for all classes, along with a different level for some. + e.g. DEBUG;org.apache.hadoop.ipc=INFO;org.apache.hadoop.security=INFO + This sets the log level for all loggers to DEBUG, expect for the + org.apache.hadoop.ipc and org.apache.hadoop.security, which are set to INFO + + Note: The global log level must always be the first parameter. + DEBUG;org.apache.hadoop.ipc=INFO;org.apache.hadoop.security=INFO is valid + org.apache.hadoop.ipc=INFO;org.apache.hadoop.security=INFO is not valid + string + true + + + + tez.test.minicluster.app.wait.on.shutdown.secs + 30 + Long value. + Time to wait (in seconds) for apps to complete on MiniTezCluster shutdown. + long + true + + + + tez.use.cluster.hadoop-libs + false + Boolean value. + Specify whether hadoop libraries required to run Tez should be the ones deployed on the cluster. + This is disabled by default - with the expectation being that tez.lib.uris has a complete + tez-deployment which contains the hadoop libraries. + boolean + + + + tez.yarn.ats.acl.domains.auto-create + true + boolean + + + + tez.yarn.ats.event.flush.timeout.millis + -1 + Int value. Time, in milliseconds, to wait while flushing YARN ATS data during shutdown. + Expert level setting. + long + + + + tez.yarn.ats.max.events.per.batch + 5 + Int value. Max no. of events to send in a single batch to ATS. + Expert level setting. + integer + + + + tez.yarn.ats.max.polling.time.per.event.millis + 10 + Int value. Time, in milliseconds, to wait for an event before sending a batch to ATS. + Expert level setting. + integer + + + diff --git a/yarn-site.xml b/yarn-site.xml index 7593388..d9de36e 100644 --- a/yarn-site.xml +++ b/yarn-site.xml @@ -29,4 +29,25 @@ 600 + +yarn.nodemanager.resource.memory-mb +2048 + + + + yarn.scheduler.minimum-allocation-mb + 128 + + + + yarn.nodemanager.vmem-pmem-ratio + 1.5 + + + + yarn.nodemanager.vmem-check-enabled + false + Whether virtual memory limits will be enforced for containers + +