Re-organized Low Level Tutorial

srcarrel · srcarrel · commit 7ba44a52fe7f · 2018-08-10T12:08:06.000-04:00
diff --git a/Tutorials/Boston Housing - XGBoost - Low Level.ipynb b/Tutorials/Boston Housing - XGBoost - Low Level.ipynb
@@ -205,78 +205,77 @@
     "container = get_image_uri(session.boto_region_name, 'xgboost')\n",
     "\n",
     "# We now specify the parameters we wish to use for our training job\n",
-    "training_params = \\\n",
-    "{\n",
-    "    # We need to specify the permissions that this training job will have. For our purposes we can use\n",
-    "    # the same permissions that our current SageMaker session has.\n",
-    "    \"RoleArn\": role,\n",
-    "    \n",
-    "    # Here we describe the algorithm we wish to use. The most important part is the container which\n",
-    "    # contains the training code.\n",
-    "    \"AlgorithmSpecification\": {\n",
-    "        \"TrainingImage\": container,\n",
-    "        \"TrainingInputMode\": \"File\"\n",
-    "    },\n",
-    "    \n",
-    "    # Next we set the algorithm specific hyperparameters. You may wish to change these to see what effect\n",
-    "    # there is on the resulting model.\n",
-    "    \"HyperParameters\": {\n",
-    "        \"max_depth\": \"5\",\n",
-    "        \"eta\": \"0.2\",\n",
-    "        \"gamma\": \"4\",\n",
-    "        \"min_child_weight\": \"6\",\n",
-    "        \"subsample\": \"0.8\",\n",
-    "        \"objective\": \"reg:linear\",\n",
-    "        \"early_stopping_rounds\": \"10\",\n",
-    "        \"num_round\": \"200\"\n",
-    "    },\n",
+    "training_params = {}\n",
+    "\n",
+    "# We need to specify the permissions that this training job will have. For our purposes we can use\n",
+    "# the same permissions that our current SageMaker session has.\n",
+    "training_params['RoleArn'] = role\n",
+    "\n",
+    "# Here we describe the algorithm we wish to use. The most important part is the container which\n",
+    "# contains the training code.\n",
+    "training_params['AlgorithmSpecification'] = {\n",
+    "    \"TrainingImage\": container,\n",
+    "    \"TrainingInputMode\": \"File\"\n",
+    "}\n",
+    "\n",
+    "# We also need to say where we would like the resulting model artifacst stored.\n",
+    "training_params['OutputDataConfig'] = {\n",
+    "    \"S3OutputPath\": \"s3://\" + session.default_bucket() + \"/\" + prefix + \"/output\"\n",
+    "}\n",
+    "\n",
+    "# We also need to set some parameters for the training job itself. Namely we need to describe what sort of\n",
+    "# compute instance we wish to use along with a stopping condition to handle the case that there is\n",
+    "# some sort of error and the training script doesn't terminate.\n",
+    "training_params['ResourceConfig'] = {\n",
+    "    \"InstanceCount\": 1,\n",
+    "    \"InstanceType\": \"ml.m4.xlarge\",\n",
+    "    \"VolumeSizeInGB\": 5\n",
+    "}\n",
     "    \n",
-    "    # Now we need to tell SageMaker where the data should be retrieved from and where to save the\n",
-    "    # resulting model artifacts.\n",
-    "    \"InputDataConfig\": [\n",
-    "        {\n",
-    "            \"ChannelName\": \"train\",\n",
-    "            \"DataSource\": {\n",
-    "                \"S3DataSource\": {\n",
-    "                    \"S3DataType\": \"S3Prefix\",\n",
-    "                    \"S3Uri\": train_location,\n",
-    "                    \"S3DataDistributionType\": \"FullyReplicated\"\n",
-    "                }\n",
-    "            },\n",
-    "            \"ContentType\": \"csv\",\n",
-    "            \"CompressionType\": \"None\"\n",
+    "training_params['StoppingCondition'] = {\n",
+    "    \"MaxRuntimeInSeconds\": 86400\n",
+    "}\n",
+    "\n",
+    "# Next we set the algorithm specific hyperparameters. You may wish to change these to see what effect\n",
+    "# there is on the resulting model.\n",
+    "training_params['HyperParameters'] = {\n",
+    "    \"max_depth\": \"5\",\n",
+    "    \"eta\": \"0.2\",\n",
+    "    \"gamma\": \"4\",\n",
+    "    \"min_child_weight\": \"6\",\n",
+    "    \"subsample\": \"0.8\",\n",
+    "    \"objective\": \"reg:linear\",\n",
+    "    \"early_stopping_rounds\": \"10\",\n",
+    "    \"num_round\": \"200\"\n",
+    "}\n",
+    "\n",
+    "# Now we need to tell SageMaker where the data should be retrieved from.\n",
+    "training_params['InputDataConfig'] = [\n",
+    "    {\n",
+    "        \"ChannelName\": \"train\",\n",
+    "        \"DataSource\": {\n",
+    "            \"S3DataSource\": {\n",
+    "                \"S3DataType\": \"S3Prefix\",\n",
+    "                \"S3Uri\": train_location,\n",
+    "                \"S3DataDistributionType\": \"FullyReplicated\"\n",
+    "            }\n",
     "        },\n",
-    "        {\n",
-    "            \"ChannelName\": \"validation\",\n",
-    "            \"DataSource\": {\n",
-    "                \"S3DataSource\": {\n",
-    "                    \"S3DataType\": \"S3Prefix\",\n",
-    "                    \"S3Uri\": val_location,\n",
-    "                    \"S3DataDistributionType\": \"FullyReplicated\"\n",
-    "                }\n",
-    "            },\n",
-    "            \"ContentType\": \"csv\",\n",
-    "            \"CompressionType\": \"None\"\n",
-    "        }\n",
-    "    ],\n",
-    "    \n",
-    "    \"OutputDataConfig\": {\n",
-    "        \"S3OutputPath\": \"s3://\" + session.default_bucket() + \"/\" + prefix + \"/output\"\n",
-    "    },\n",
-    "    \n",
-    "    # Lastly we set some parameters for the training job itself. Namely we need to describe what sort of\n",
-    "    # compute instance we wish to use along with a stopping condition to handle the case that there is\n",
-    "    # some sort of error and the training script doesn't terminate.\n",
-    "    \"ResourceConfig\": {\n",
-    "        \"InstanceCount\": 1,\n",
-    "        \"InstanceType\": \"ml.m4.xlarge\",\n",
-    "        \"VolumeSizeInGB\": 5\n",
+    "        \"ContentType\": \"csv\",\n",
+    "        \"CompressionType\": \"None\"\n",
     "    },\n",
-    "    \n",
-    "    \"StoppingCondition\": {\n",
-    "        \"MaxRuntimeInSeconds\": 86400\n",
+    "    {\n",
+    "        \"ChannelName\": \"validation\",\n",
+    "        \"DataSource\": {\n",
+    "            \"S3DataSource\": {\n",
+    "                \"S3DataType\": \"S3Prefix\",\n",
+    "                \"S3Uri\": val_location,\n",
+    "                \"S3DataDistributionType\": \"FullyReplicated\"\n",
+    "            }\n",
+    "        },\n",
+    "        \"ContentType\": \"csv\",\n",
+    "        \"CompressionType\": \"None\"\n",
     "    }\n",
-    "}"
+    "]"
    ]
   },
   {