File tree Expand file tree Collapse file tree 1 file changed +2
-14
lines changed
how-to-use-azureml/machine-learning-pipelines/nyc-taxi-data-regression-model-building/scripts/prepdata Expand file tree Collapse file tree 1 file changed +2
-14
lines changed Original file line number Diff line number Diff line change 55import os
66from azureml .core import Run
77
8-
9- def get_dict (dict_str ):
10- pairs = dict_str .strip ("{}" ).split (r'\;' )
11- new_dict = {}
12- for pair in pairs :
13- key , value = pair .strip ().split (":" )
14- new_dict [key .strip ().strip ("'" )] = value .strip ().strip ("'" )
15-
16- return new_dict
17-
18-
198print ("Cleans the input data" )
209
2110# Get the input green_taxi_data. To learn more about how to access dataset in your script, please
2211# see https://docs.microsoft.com/en-us/azure/machine-learning/how-to-train-with-datasets.
2312run = Run .get_context ()
2413raw_data = run .input_datasets ["raw_data" ]
2514
26-
2715parser = argparse .ArgumentParser ("cleanse" )
2816parser .add_argument ("--output_cleanse" , type = str , help = "cleaned taxi data directory" )
2917parser .add_argument ("--useful_columns" , type = str , help = "useful columns to keep" )
@@ -38,8 +26,8 @@ def get_dict(dict_str):
3826# These functions ensure that null data is removed from the dataset,
3927# which will help increase machine learning model accuracy.
4028
41- useful_columns = [ s . strip (). strip ( "'" ) for s in args .useful_columns .strip ( "[]" ). split ( r'\;' )]
42- columns = get_dict (args .columns )
29+ useful_columns = eval ( args .useful_columns .replace ( ';' , ',' ))
30+ columns = eval (args .columns . replace ( ';' , ',' ) )
4331
4432new_df = (raw_data .to_pandas_dataframe ()
4533 .dropna (how = 'all' )
You can’t perform that action at this time.
0 commit comments