Add files via upload

Aaron England · web-flow · commit ffd36c805388 · 2019-05-09T05:44:46.000-07:00
diff --git a/Chapter 2/Activities/Activity_03.py b/Chapter 2/Activities/Activity_03.py
@@ -2,11 +2,8 @@
 
 # continuing from Exercise 9:
 
-# generate predicted probabilities of rain
-predicted_prob = model.predict_proba(X_test)[:,1]
-
 # generate predicted classes
-predicted_class = model.predict(X_test)
+predicted_class = model.predict(X_test_scaled)
 
 # evaluate performance with confusion matrix
 from sklearn.metrics import confusion_matrix
diff --git a/Chapter 2/Activities/Activity_04.py b/Chapter 2/Activities/Activity_04.py
@@ -1,4 +1,4 @@
-# Activity 4: Prepare data for decision tree classifier pipeline
+# Activity 4: Prepare data for decision tree classifier
 
 # clear environment prior to running this code
 
@@ -23,5 +23,9 @@
 from sklearn.model_selection import train_test_split
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
 
-
+# scale X_train and X_test
+from sklearn.preprocessing import StandardScaler
+model = StandardScaler()
+X_train_scaled = model.fit_transform(X_train)
+X_test_scaled = model.fit_transform(X_test)
 
diff --git a/Chapter 2/Activities/Activity_05.py b/Chapter 2/Activities/Activity_05.py
@@ -3,10 +3,10 @@
 # continuing from Exercise 11:
 
 # generate predicted probabilities of rain
-predicted_prob = model.predict_proba(X_test)[:,1]
+predicted_prob = model.predict_proba(X_test_scaled)[:,1]
 
 # generate predicted classes
-predicted_class = model.predict(X_test)
+predicted_class = model.predict(X_test_scaled)
 
 # evaluate performance with confusion matrix
 from sklearn.metrics import confusion_matrix
diff --git a/Chapter 2/Activities/Activity_06.py b/Chapter 2/Activities/Activity_06.py
@@ -2,29 +2,21 @@
 
 # continuing from Exercise 12
 
-# Set up the steps for a pipeline
-from sklearn.preprocessing import StandardScaler
-from sklearn.ensemble import RandomForestRegressor
-steps = [('scaler', StandardScaler()), ('Forest', RandomForestRegressor(n_estimators=10))] 
-
-# Setup the pipeline
-from sklearn.pipeline import Pipeline
-pipeline = Pipeline(steps)
-
 # Specify the hyperparameter space
 import numpy as np
-parameters = {'Forest__criterion': ['mse','mae'],
-              'Forest__max_features': ['auto', 'sqrt', 'log2', None],
-              'Forest__min_impurity_decrease': np.linspace(0.0, 1.0, 10),
-              'Forest__bootstrap': [True, False],
-              'Forest__warm_start': [True, False]}
+grid = {'criterion': ['mse','mae'],
+        'max_features': ['auto', 'sqrt', 'log2', None],
+        'min_impurity_decrease': np.linspace(0.0, 1.0, 10),
+        'bootstrap': [True, False],
+        'warm_start': [True, False]}
 
 # Instantiate the GridSearchCV model
 from sklearn.model_selection import GridSearchCV
-model = GridSearchCV(pipeline, parameters, scoring='explained_variance', cv=5)
+from sklearn.ensemble import RandomForestRegressor
+model = GridSearchCV(RandomForestRegressor(), grid, scoring='explained_variance', cv=5)
 
 # Fit to the training set
-model.fit(X_train, y_train)
+model.fit(X_train_scaled, y_train)
 
 # Print the tuned parameters
 best_parameters = model.best_params_