Skip to content

Commit 77f5a69

Browse files
update samples from Release-120 as a part of SDK release (Azure#1676)
Co-authored-by: amlrelsa-ms <[email protected]>
1 parent ce82af2 commit 77f5a69

File tree

1 file changed

+2
-14
lines changed
  • how-to-use-azureml/machine-learning-pipelines/nyc-taxi-data-regression-model-building/scripts/prepdata

1 file changed

+2
-14
lines changed

how-to-use-azureml/machine-learning-pipelines/nyc-taxi-data-regression-model-building/scripts/prepdata/cleanse.py

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,25 +5,13 @@
55
import os
66
from azureml.core import Run
77

8-
9-
def get_dict(dict_str):
10-
pairs = dict_str.strip("{}").split(r'\;')
11-
new_dict = {}
12-
for pair in pairs:
13-
key, value = pair.strip().split(":")
14-
new_dict[key.strip().strip("'")] = value.strip().strip("'")
15-
16-
return new_dict
17-
18-
198
print("Cleans the input data")
209

2110
# Get the input green_taxi_data. To learn more about how to access dataset in your script, please
2211
# see https://docs.microsoft.com/en-us/azure/machine-learning/how-to-train-with-datasets.
2312
run = Run.get_context()
2413
raw_data = run.input_datasets["raw_data"]
2514

26-
2715
parser = argparse.ArgumentParser("cleanse")
2816
parser.add_argument("--output_cleanse", type=str, help="cleaned taxi data directory")
2917
parser.add_argument("--useful_columns", type=str, help="useful columns to keep")
@@ -38,8 +26,8 @@ def get_dict(dict_str):
3826
# These functions ensure that null data is removed from the dataset,
3927
# which will help increase machine learning model accuracy.
4028

41-
useful_columns = [s.strip().strip("'") for s in args.useful_columns.strip("[]").split(r'\;')]
42-
columns = get_dict(args.columns)
29+
useful_columns = eval(args.useful_columns.replace(';', ','))
30+
columns = eval(args.columns.replace(';', ','))
4331

4432
new_df = (raw_data.to_pandas_dataframe()
4533
.dropna(how='all')

0 commit comments

Comments
 (0)