|
460 | 460 | " name=\"Merge Taxi Data\",\n", |
461 | 461 | " script_name=\"merge.py\", \n", |
462 | 462 | " arguments=[\"--output_merge\", merged_data],\n", |
463 | | - " inputs=[cleansed_green_data.parse_parquet_files(file_extension=None),\n", |
464 | | - " cleansed_yellow_data.parse_parquet_files(file_extension=None)],\n", |
| 463 | + " inputs=[cleansed_green_data.parse_parquet_files(),\n", |
| 464 | + " cleansed_yellow_data.parse_parquet_files()],\n", |
465 | 465 | " outputs=[merged_data],\n", |
466 | 466 | " compute_target=aml_compute,\n", |
467 | 467 | " runconfig=aml_run_config,\n", |
|
497 | 497 | " name=\"Filter Taxi Data\",\n", |
498 | 498 | " script_name=\"filter.py\", \n", |
499 | 499 | " arguments=[\"--output_filter\", filtered_data],\n", |
500 | | - " inputs=[merged_data.parse_parquet_files(file_extension=None)],\n", |
| 500 | + " inputs=[merged_data.parse_parquet_files()],\n", |
501 | 501 | " outputs=[filtered_data],\n", |
502 | 502 | " compute_target=aml_compute,\n", |
503 | 503 | " runconfig = aml_run_config,\n", |
|
533 | 533 | " name=\"Normalize Taxi Data\",\n", |
534 | 534 | " script_name=\"normalize.py\", \n", |
535 | 535 | " arguments=[\"--output_normalize\", normalized_data],\n", |
536 | | - " inputs=[filtered_data.parse_parquet_files(file_extension=None)],\n", |
| 536 | + " inputs=[filtered_data.parse_parquet_files()],\n", |
537 | 537 | " outputs=[normalized_data],\n", |
538 | 538 | " compute_target=aml_compute,\n", |
539 | 539 | " runconfig = aml_run_config,\n", |
|
574 | 574 | " name=\"Transform Taxi Data\",\n", |
575 | 575 | " script_name=\"transform.py\", \n", |
576 | 576 | " arguments=[\"--output_transform\", transformed_data],\n", |
577 | | - " inputs=[normalized_data.parse_parquet_files(file_extension=None)],\n", |
| 577 | + " inputs=[normalized_data.parse_parquet_files()],\n", |
578 | 578 | " outputs=[transformed_data],\n", |
579 | 579 | " compute_target=aml_compute,\n", |
580 | 580 | " runconfig = aml_run_config,\n", |
|
614 | 614 | " script_name=\"train_test_split.py\", \n", |
615 | 615 | " arguments=[\"--output_split_train\", output_split_train,\n", |
616 | 616 | " \"--output_split_test\", output_split_test],\n", |
617 | | - " inputs=[transformed_data.parse_parquet_files(file_extension=None)],\n", |
| 617 | + " inputs=[transformed_data.parse_parquet_files()],\n", |
618 | 618 | " outputs=[output_split_train, output_split_test],\n", |
619 | 619 | " compute_target=aml_compute,\n", |
620 | 620 | " runconfig = aml_run_config,\n", |
|
690 | 690 | " \"n_cross_validations\": 5\n", |
691 | 691 | "}\n", |
692 | 692 | "\n", |
693 | | - "training_dataset = output_split_train.parse_parquet_files(file_extension=None).keep_columns(['pickup_weekday','pickup_hour', 'distance','passengers', 'vendor', 'cost'])\n", |
| 693 | + "training_dataset = output_split_train.parse_parquet_files().keep_columns(['pickup_weekday','pickup_hour', 'distance','passengers', 'vendor', 'cost'])\n", |
694 | 694 | "\n", |
695 | 695 | "automl_config = AutoMLConfig(task = 'regression',\n", |
696 | 696 | " debug_log = 'automated_ml_errors.log',\n", |
|
0 commit comments