|
| 1 | +#!/bin/bash |
| 2 | +set -x -e |
| 3 | + |
| 4 | +INPUT_PATH="" |
| 5 | +HAIL_VERSION="0.1" |
| 6 | +SPARK_VERSION="2.2.1" |
| 7 | +IS_MASTER=false |
| 8 | + |
| 9 | +if grep isMaster /mnt/var/lib/info/instance.json | grep true; |
| 10 | +then |
| 11 | + IS_MASTER=true |
| 12 | +fi |
| 13 | + |
| 14 | +while [ $# -gt 0 ]; do |
| 15 | + case "$1" in |
| 16 | + --input-path) |
| 17 | + shift |
| 18 | + INPUT_PATH=$1 |
| 19 | + ;; |
| 20 | + --hail-version) |
| 21 | + shift |
| 22 | + HAIL_VERSION=$1 |
| 23 | + ;; |
| 24 | + --spark-version) |
| 25 | + shift |
| 26 | + SPARK_VERSION=$1 |
| 27 | + ;; |
| 28 | + --notebookPath) |
| 29 | + shift |
| 30 | + NotebookPath=$1 |
| 31 | + ;; |
| 32 | + -*) |
| 33 | + error_msg "unrecognized option: $1" |
| 34 | + ;; |
| 35 | + *) |
| 36 | + break; |
| 37 | + ;; |
| 38 | + esac |
| 39 | + shift |
| 40 | +done |
| 41 | + |
| 42 | +BUCKET=$(awk -v XX="$NotebookPath" 'BEGIN{x=substr(XX,6); split(x,a,"/"); print(a[1])}') |
| 43 | +PREFIX=$(awk -v XX="$NotebookPath" -v YY="$BUCKET" 'BEGIN{y=length(YY); print(substr(XX,7+y));}') |
| 44 | + |
| 45 | +aws s3 cp s3://variant-spark/HailJupyter/VariantSpark_example_with_Hail_library.ipynb VariantSpark_example_with_Hail_library.ipynb |
| 46 | +aws s3 cp VariantSpark_example_with_Hail_library.ipynb $NotebookPath/ |
| 47 | + |
| 48 | +upstart_jupyter() { |
| 49 | + sudo puppet apply << PUPPET_SCRIPT |
| 50 | + include 'upstart' |
| 51 | + upstart::job { 'jupyter': |
| 52 | + description => 'Jupyter', |
| 53 | + respawn => true, |
| 54 | + respawn_limit => '0 10', |
| 55 | + start_on => 'runlevel [2345]', |
| 56 | + stop_on => 'runlevel [016]', |
| 57 | + console => 'output', |
| 58 | + chdir => '/home/hadoop', |
| 59 | + script => ' |
| 60 | + sudo su - hadoop > /home/hadoop/jupyter.log 2>&1 <<BASH_SCRIPT |
| 61 | +export SPARK_HOME=/usr/lib/spark |
| 62 | +export PYTHONPATH=$PYTHONPATH:/home/hadoop/hail-python.zip |
| 63 | +/home/hadoop/miniconda2/envs/jupyter/bin/jupyter notebook |
| 64 | +BASH_SCRIPT |
| 65 | + ', |
| 66 | + } |
| 67 | +PUPPET_SCRIPT |
| 68 | +} |
| 69 | + |
| 70 | + |
| 71 | +if [ "$IS_MASTER" = true ]; then |
| 72 | + #Install miniconda |
| 73 | + wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh |
| 74 | + sh Miniconda2-latest-Linux-x86_64.sh -b |
| 75 | + export PATH=~/miniconda2/bin:$PATH |
| 76 | + conda create -y -n jupyter python=2.7 |
| 77 | + source activate jupyter |
| 78 | + #Install other packages |
| 79 | + #TODO: make these configurable |
| 80 | + pip install --upgrade matplotlib pandas click variant-spark |
| 81 | + #Install jupyter components |
| 82 | + pip install --upgrade jupyter==1.0.0 s3contents==0.1.4 decorator==4.2.1 notebook==5.7.0 juspark |
| 83 | + mkdir -p ~/.jupyter |
| 84 | + cat >> ~/.jupyter/jupyter_notebook_config.py << EOF |
| 85 | +# S3ContentsManager |
| 86 | +from s3contents import S3ContentsManager |
| 87 | +c.NotebookApp.contents_manager_class = S3ContentsManager |
| 88 | +c.S3ContentsManager.bucket_name = "$BUCKET" |
| 89 | +c.S3ContentsManager.prefix = "$PREFIX" |
| 90 | +EOF |
| 91 | + |
| 92 | + cat >> ~/.jupyter/jupyter_notebook_config.py << EOF |
| 93 | +c.NotebookApp.token = '' |
| 94 | +c.NotebookApp.password = '' |
| 95 | +c.NotebookApp.ip = '*' |
| 96 | +c.NotebookApp.open_browser = False |
| 97 | +c.NotebookApp.allow_remote_access = True |
| 98 | +EOF |
| 99 | + |
| 100 | + # Setup JuSpark kernel |
| 101 | + |
| 102 | + mkdir -p ~/.local/share/jupyter/kernels/juspark |
| 103 | + cat > ~/.local/share/jupyter/kernels/juspark/kernel.json << EOF |
| 104 | +{ |
| 105 | + "display_name": "JuSpark", |
| 106 | + "language": "python", |
| 107 | + "argv": [ |
| 108 | + "/home/hadoop/miniconda2/envs/jupyter/bin/python", |
| 109 | + "-m", |
| 110 | + "ipykernel", |
| 111 | + "-f", |
| 112 | + "{connection_file}", |
| 113 | + "--ext=juspark" |
| 114 | + ] |
| 115 | +} |
| 116 | +EOF |
| 117 | + |
| 118 | + # Setup profiles for juspark |
| 119 | + mkdir -p ~/.juspark/profiles |
| 120 | + cat > ~/.juspark/profiles/hail << EOF |
| 121 | +{ |
| 122 | + "spark.jars":"/home/hadoop/hail-all-spark.jar", |
| 123 | + "spark.submit.pyFiles":"/home/hadoop/hail-python.zip" |
| 124 | +} |
| 125 | +EOF |
| 126 | + |
| 127 | + #Install puppet modules |
| 128 | + sudo puppet module install spantree-upstart |
| 129 | + |
| 130 | + #Setup daemons |
| 131 | + upstart_jupyter |
| 132 | + |
| 133 | +fi |
0 commit comments