Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
a79079b
fix bug in config dict
Apr 28, 2017
fa1acfb
Update base.py
rhiever Apr 28, 2017
5fab09a
Merge pull request #431 from weixuanfu2016/config_dict_patch
rhiever Apr 28, 2017
ed3bdf7
Update version for minor release
rhiever Apr 28, 2017
5e32488
use stopit replace Interruptable_cross_val_score
May 1, 2017
39cff19
Update requirements.txt
rhiever May 1, 2017
eafc240
fix bugs and clean bugs
May 1, 2017
e50814c
clean test codes
May 1, 2017
c10ba2e
add unit test
May 1, 2017
6bc031c
try backend="threading"
May 1, 2017
3019275
dask works in macOS
May 1, 2017
0b3680c
clean codes
May 1, 2017
91caa55
num_worker added
May 1, 2017
6b00655
use client
May 1, 2017
d9c1863
threading
May 1, 2017
4517abb
clean codes
May 1, 2017
94b4a37
clean codes
May 1, 2017
3ce128b
clean codes
May 1, 2017
02fd277
clean codes
May 1, 2017
af98b96
clean codes
May 1, 2017
9cafac7
return to joblib
May 2, 2017
5447fe0
key works
May 2, 2017
1f97655
fix issue in large dataset
May 2, 2017
23ca6d3
Merge remote-tracking branch 'upstream/development' into joblib_timeout
May 2, 2017
3ce4a30
add doc
May 2, 2017
6515732
clean codes
May 2, 2017
633e9e8
min to sec
May 2, 2017
ac77725
manual dump memmap
May 2, 2017
dd7df4e
clean codes
May 2, 2017
a6ff510
dask array tet
May 2, 2017
dcf640e
jobs test
May 2, 2017
4d87038
add warning for large dataset
May 2, 2017
ec96ecd
add doc and installnation
May 2, 2017
7978f7d
instal in test
May 2, 2017
24c030f
pip install dask
May 2, 2017
0382753
pip install dask[complete]
May 2, 2017
ac3a086
clean codes
May 3, 2017
39ac993
better get
May 4, 2017
224a9bc
clean codes
May 4, 2017
c20d911
fix conflict
May 12, 2017
a4956d4
warning when verbosity > 2
May 12, 2017
7cea3bf
fix this compatibility issue
May 16, 2017
454f54a
add unit test
May 16, 2017
dc40489
fix ci
May 16, 2017
1fc2860
Merge pull request #451 from weixuanfu2016/mdr_dict_master_fix
rhiever May 18, 2017
18927b0
Version increment for hot patch release
rhiever May 18, 2017
568f55d
fix bug for ploynomialfeatures
May 19, 2017
37c1529
add unit test
May 19, 2017
179fdf1
Merge pull request #455 from weixuanfu2016/issue454
rhiever May 19, 2017
7b1eb27
Minor version increment for release
rhiever May 19, 2017
fd2f1c3
Update tests.py
rhiever May 19, 2017
c3b2167
Merge branch 'development' into joblib_timeout
rhiever May 23, 2017
cccf676
fix conflicts
May 23, 2017
211eed9
Merge branch 'development' into joblib_timeout
May 23, 2017
00fc6ff
add patch in master
May 23, 2017
1e0a8c4
add patch in tpot 0.7.5
May 23, 2017
d8e1904
clean codes
May 23, 2017
af01d55
add some small unit tests for increasing coverage
May 23, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ def isclose(a, b, rel_tol=1e-09, abs_tol=0.0):

assert isclose(known_score, score)


def test_score_3():
"""Assert that the TPOTRegressor score function outputs a known score for a fix pipeline"""

Expand Down Expand Up @@ -293,6 +294,7 @@ def isclose(a, b, rel_tol=1e-09, abs_tol=0.0):

assert isclose(known_score, score)


def test_sample_weight_func():
"""Assert that the TPOTRegressor score function outputs a known score for a fixed pipeline with sample weights"""

Expand Down Expand Up @@ -342,6 +344,7 @@ def isclose(a, b, rel_tol=1e-09, abs_tol=0.0):
assert not np.allclose(cv_score1, cv_score_weight)
assert isclose(known_score, score)


def test_predict():
"""Assert that the TPOT predict function raises a RuntimeError when no optimized pipeline exists"""

Expand Down Expand Up @@ -369,6 +372,7 @@ def test_predict_2():

assert result.shape == (testing_features.shape[0],)


def test_predict_proba():
"""Assert that the TPOT predict_proba function returns a numpy matrix of shape (num_testing_rows, num_testing_classes)"""

Expand Down Expand Up @@ -410,6 +414,7 @@ def test_predict_proba2():
except Exception:
assert False


def test_warm_start():
"""Assert that the TPOT warm_start flag stores the pop and pareto_front from the first run"""
tpot_obj = TPOTClassifier(random_state=42, population_size=1, offspring_size=2, generations=1, verbosity=0, warm_start=True)
Expand Down Expand Up @@ -582,6 +587,34 @@ def test_generate_import_code():
"""
assert expected_code == generate_import_code(pipeline, tpot_obj.operators)


def test_PolynomialFeatures_exception():
"""Assert"""
tpot_obj = TPOTClassifier()
tpot_obj._pbar = tqdm(total=1, disable=True)
# pipeline with one PolynomialFeatures operator
pipeline_string_1 = ('LogisticRegression(PolynomialFeatures'
'(input_matrix, PolynomialFeatures__degree=2, PolynomialFeatures__include_bias=DEFAULT, '
'PolynomialFeatures__interaction_only=False), LogisticRegression__C=10.0, '
'LogisticRegression__dual=DEFAULT, LogisticRegression__penalty=DEFAULT)')

# pipeline with two PolynomialFeatures operator
pipeline_string_2 = ('LogisticRegression(PolynomialFeatures'
'(PolynomialFeatures(input_matrix, PolynomialFeatures__degree=2, '
'PolynomialFeatures__include_bias=DEFAULT, PolynomialFeatures__interaction_only=False), '
'PolynomialFeatures__degree=2, PolynomialFeatures__include_bias=DEFAULT, '
'PolynomialFeatures__interaction_only=False), LogisticRegression__C=10.0, '
'LogisticRegression__dual=DEFAULT, LogisticRegression__penalty=DEFAULT)')

# make a list for _evaluate_individuals
pipelines = []
pipelines.append(creator.Individual.from_string(pipeline_string_1, tpot_obj._pset))
pipelines.append(creator.Individual.from_string(pipeline_string_2, tpot_obj._pset))
fitness_scores = tpot_obj._evaluate_individuals(pipelines, training_features, training_classes)
known_scores = [(2, 0.98068077235290885), (5000.0, -float('inf'))]
assert np.allclose(known_scores, fitness_scores)


def test_mutNodeReplacement():
"""Assert that mutNodeReplacement() returns the correct type of mutation node in a fixed pipeline"""
tpot_obj = TPOTClassifier()
Expand Down Expand Up @@ -649,6 +682,7 @@ def test_export_pipeline():
"""
assert expected_code == export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset)


def test_export_pipeline_2():
"""Assert that exported_pipeline() generated a compile source file as expected given a fixed simple pipeline (only one classifier)"""
tpot_obj = TPOTClassifier()
Expand All @@ -673,6 +707,7 @@ def test_export_pipeline_2():
"""
assert expected_code == export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset)


def test_export_pipeline_3():
"""Assert that exported_pipeline() generated a compile source file as expected given a fixed simple pipeline with a preprocessor"""
tpot_obj = TPOTClassifier()
Expand Down Expand Up @@ -704,6 +739,7 @@ def test_export_pipeline_3():
"""
assert expected_code == export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset)


def test_operator_export():
"""Assert that a TPOT operator can export properly with a function as a parameter to a classifier"""
export_string = TPOTSelectKBest.export(5)
Expand Down
5 changes: 3 additions & 2 deletions tpot/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -705,13 +705,13 @@ def _evaluate_individuals(self, individuals, features, classes, sample_weight =
# This is a fairly hacky way to prevent TPOT from getting stuck on bad pipelines and should be improved in a future release
individual = individuals[indidx]
individual_str = str(individual)
if individual_str.count('PolynomialFeatures') > 1:
sklearn_pipeline_str = generate_pipeline_code(expr_to_tree(individual, self._pset), self.operators)
if sklearn_pipeline_str.count('PolynomialFeatures') > 1:
if self.verbosity > 2:
self._pbar.write('Invalid pipeline encountered. Skipping its evaluation.')
fitnesses_dict[indidx] = (5000., -float('inf'))
if not self._pbar.disable:
self._pbar.update(1)

# Check if the individual was evaluated before
elif individual_str in self._evaluated_individuals:
# Get fitness score from previous evaluation
Expand All @@ -726,6 +726,7 @@ def _evaluate_individuals(self, individuals, features, classes, sample_weight =
# Transform the tree expression into an sklearn pipeline
sklearn_pipeline = self._toolbox.compile(expr=individual)


# Fix random state when the operator allows and build sample weight dictionary
self._set_param_recursive(sklearn_pipeline.steps, 'random_state', 42)

Expand Down