Skip to content
Prev Previous commit
Next Next commit
Add OneHotEncoder and create configs that support sparse matrices
  • Loading branch information
teaearlgraycold committed May 24, 2017
commit 00f9abefbdcf5f490074988f2d265722257352c2
1 change: 1 addition & 0 deletions tpot/built_in_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"""

import numpy as np

from sklearn.base import BaseEstimator
from sklearn.utils import check_array

Expand Down
114 changes: 114 additions & 0 deletions tpot/config_classifier_sparse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
# -*- coding: utf-8 -*-

"""Copyright 2015-Present Randal S. Olson.

This file is part of the TPOT library.

TPOT is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation, either version 3 of
the License, or (at your option) any later version.

TPOT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with TPOT. If not, see <http://www.gnu.org/licenses/>.
"""

import numpy as np

classifier_config_sparse = {
'tpot.one_hot_encoder.OneHotEncoder': {
},

'sklearn.neighbors.KNeighborsClassifier': {
'n_neighbors': range(1, 101),
'weights': ["uniform", "distance"],
'p': [1, 2]
},

'sklearn.ensemble.RandomForestClassifier': {
'n_estimators': [100],
'criterion': ["gini", "entropy"],
'max_features': np.arange(0.05, 1.01, 0.05),
'min_samples_split': range(2, 21),
'min_samples_leaf': range(1, 21),
'bootstrap': [True, False]
},

'sklearn.feature_selection.SelectFwe': {
'alpha': np.arange(0, 0.05, 0.001),
'score_func': {
'sklearn.feature_selection.f_classif': None
}
},

'sklearn.feature_selection.SelectPercentile': {
'percentile': range(1, 100),
'score_func': {
'sklearn.feature_selection.f_classif': None
}
},

'sklearn.feature_selection.VarianceThreshold': {
'threshold': np.arange(0.05, 1.01, 0.05)
},

'sklearn.feature_selection.RFE': {
'step': np.arange(0.05, 1.01, 0.05),
'estimator': {
'sklearn.ensemble.ExtraTreesClassifier': {
'n_estimators': [100],
'criterion': ['gini', 'entropy'],
'max_features': np.arange(0.05, 1.01, 0.05)
}
}
},

'sklearn.feature_selection.SelectFromModel': {
'threshold': np.arange(0, 1.01, 0.05),
'estimator': {
'sklearn.ensemble.ExtraTreesClassifier': {
'n_estimators': [100],
'criterion': ['gini', 'entropy'],
'max_features': np.arange(0.05, 1.01, 0.05)
}
}
},

'sklearn.linear_model.LogisticRegression': {
'penalty': ["l1", "l2"],
'C': [1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1., 5., 10., 15., 20., 25.],
'dual': [True, False]
},

'sklearn.naive_bayes.BernoulliNB': {
'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.],
'fit_prior': [True, False]
},

'sklearn.naive_bayes.MultinomialNB': {
'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.],
'fit_prior': [True, False]
},

'sklearn.svm.LinearSVC': {
'penalty': ["l1", "l2"],
'loss': ["hinge", "squared_hinge"],
'dual': [True, False],
'tol': [1e-5, 1e-4, 1e-3, 1e-2, 1e-1],
'C': [1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1., 5., 10., 15., 20., 25.]
},

'xgboost.XGBClassifier': {
'n_estimators': [100],
'max_depth': range(1, 11),
'learning_rate': [1e-3, 1e-2, 1e-1, 0.5, 1.],
'subsample': np.arange(0.05, 1.01, 0.05),
'min_child_weight': range(1, 21),
'nthread': [1]
}
}
2 changes: 0 additions & 2 deletions tpot/config_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@

regressor_config_dict = {


'sklearn.linear_model.ElasticNetCV': {
'l1_ratio': np.arange(0.0, 1.01, 0.05),
'tol': [1e-5, 1e-4, 1e-3, 1e-2, 1e-1]
Expand Down Expand Up @@ -91,7 +90,6 @@
'sklearn.linear_model.RidgeCV': {
},


'xgboost.XGBRegressor': {
'n_estimators': [100],
'max_depth': range(1, 11),
Expand Down
92 changes: 92 additions & 0 deletions tpot/config_regressor_sparse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# -*- coding: utf-8 -*-

"""Copyright 2015-Present Randal S. Olson.

This file is part of the TPOT library.

TPOT is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation, either version 3 of
the License, or (at your option) any later version.

TPOT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with TPOT. If not, see <http://www.gnu.org/licenses/>.
"""

import numpy as np

regressor_config_sparse = {
'tpot.one_hot_encoder.OneHotEncoder': {
},

'sklearn.neighbors.KNeighborsRegressor': {
'n_neighbors': range(1, 101),
'weights': ["uniform", "distance"],
'p': [1, 2]
},

'sklearn.ensemble.RandomForestRegressor': {
'n_estimators': [100],
'max_features': np.arange(0.05, 1.01, 0.05),
'min_samples_split': range(2, 21),
'min_samples_leaf': range(1, 21),
'bootstrap': [True, False]
},

'sklearn.feature_selection.SelectFwe': {
'alpha': np.arange(0, 0.05, 0.001),
'score_func': {
'sklearn.feature_selection.f_classif': None
}
},

'sklearn.feature_selection.SelectPercentile': {
'percentile': range(1, 100),
'score_func': {
'sklearn.feature_selection.f_classif': None
}
},

'sklearn.feature_selection.VarianceThreshold': {
'threshold': np.arange(0.05, 1.01, 0.05)
},

'sklearn.feature_selection.SelectFromModel': {
'threshold': np.arange(0, 1.01, 0.05),
'estimator': {
'sklearn.ensemble.ExtraTreesRegressor': {
'n_estimators': [100],
'max_features': np.arange(0.05, 1.01, 0.05)
}
}
},

'sklearn.linear_model.ElasticNetCV': {
'l1_ratio': np.arange(0.0, 1.01, 0.05),
'tol': [1e-5, 1e-4, 1e-3, 1e-2, 1e-1]
},

'sklearn.linear_model.RidgeCV': {
},

'sklearn.svm.LinearSVR': {
'loss': ["epsilon_insensitive", "squared_epsilon_insensitive"],
'dual': [True, False],
'tol': [1e-5, 1e-4, 1e-3, 1e-2, 1e-1],
'C': [1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1., 5., 10., 15., 20., 25.],
'epsilon': [1e-4, 1e-3, 1e-2, 1e-1, 1.]
},

'xgboost.XGBRegressor': {
'n_estimators': [100],
'max_depth': range(1, 11),
'learning_rate': [1e-3, 1e-2, 1e-1, 0.5, 1.],
'subsample': np.arange(0.05, 1.01, 0.05),
'min_child_weight': range(1, 21),
'nthread': [1]
}}
Loading