Fix weight column issue

ksalama · ksalama · commit 36e553709785 · 2019-04-09T13:50:31.000+01:00
diff --git a/00_Miscellaneous/tfx/02_tfx_end_to_end.ipynb b/00_Miscellaneous/tfx/02_tfx_end_to_end.ipynb
@@ -14,7 +14,7 @@
     "3. Model training with **TF Estimator**.\n",
     "4. Model evaluation with **TF Model Analysis**.\n",
     "\n",
-    "<a href=\"https://colab.research.google.com/github/GoogleCloudPlatform/training-data-analyst/blob/master/courses/machine_learning/sme_academy/02_tfx_end_to_end.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+    "<a href=\"https://colab.research.google.com/github/GoogleCloudPlatform/tf-estimator-tutorials/blob/master/00_Miscellaneous/tfx/02_tfx_end_to_end.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
    ]
   },
   {
@@ -469,7 +469,8 @@
     "    for feature in raw_schema.feature:\n",
     "      feature_name = feature.name\n",
     "      \n",
-    "      if feature_name in ['income_bracket', 'fnlwgt']:\n",
+    "      # Pass the target feature as is.\n",
+    "      if feature_name == TARGET_FEATURE_NAME:\n",
     "        processed_features[feature_name] = input_features[feature_name]\n",
     "        continue\n",
     "\n",
@@ -480,6 +481,9 @@
     "        # normalize numeric features.\n",
     "        processed_features[feature_name+\"_scaled\"] = tft.scale_to_z_score(input_features[feature_name])\n",
     "\n",
+    "    # Pass the weight column\n",
+    "    processed_features[WEIGHT_COLUMN_NAME] = input_features[WEIGHT_COLUMN_NAME]\n",
+    "\n",
     "    # Bucketize age using quantiles. \n",
     "    quantiles = tft.quantiles(input_features[\"age\"], num_buckets=5, epsilon=0.01)\n",
     "    processed_features[\"age_bucketized\"] = tft.apply_buckets(\n",
@@ -536,6 +540,9 @@
     "  # Load TFDV schema and create tft schema from it.\n",
     "  source_raw_schema = tfdv.load_schema_text(raw_schema_location)\n",
     "  raw_feature_spec = schema_utils.schema_as_feature_spec(source_raw_schema).feature_spec\n",
+    "  # Since the raw_feature_spec doesn't include the weight column, we need ot add it. \n",
+    "  raw_feature_spec[WEIGHT_COLUMN_NAME] = tf.FixedLenFeature(\n",
+    "    shape=[1], dtype=tf.int64, default_value=None)\n",
     "  raw_metadata = dataset_metadata.DatasetMetadata(\n",
     "    dataset_schema.from_feature_spec(raw_feature_spec))\n",
     "\n",
@@ -1096,7 +1103,6 @@
     "  source_raw_schema = tfdv.load_schema_text(RAW_SCHEMA_LOCATION)\n",
     "  raw_feature_spec = schema_utils.schema_as_feature_spec(source_raw_schema).feature_spec\n",
     "  raw_feature_spec.pop(TARGET_FEATURE_NAME)\n",
-    "  raw_feature_spec.pop(WEIGHT_COLUMN_NAME)\n",
     "\n",
     "  # Create the interface for the serving function with the raw features\n",
     "  raw_features = tf.estimator.export.build_parsing_serving_input_receiver_fn(raw_feature_spec)().features\n",
@@ -1137,7 +1143,7 @@
     "        \n",
     "estimator.export_savedmodel(\n",
     "    export_dir_base=export_dir,\n",
-    "    serving_input_receiver_fn=input_receiver_fn\n",
+    "    serving_input_receiver_fn=serving_input_receiver_fn\n",
     ")"
    ]
   },