This project performs name matching using a trained Random Forest model.
Make sure you have Python and virtualenv installed.
pip install virtualenv
# Create a virtual environment
virtualenv myenv
# Activate the environment
source myenv\Scripts\activate
# Install the requirements file given
pip install -r requirements.txtpython name_matching.py train --train_data_path data/STrain.csv --truth_labels_path data/G.csv --model_output_path rf_model.pklArguments:
--train_data_path: path to the test dataset (CSV, txt)--truth_labels_path: path to the groundtruth labels (CSV, txt)--model_output_path: path where the model pickle will be saved (.pkl)
python name_matching.py predict --test_file_path data/STest.csv --truth_labels_path data/G.csv --model_path rf_model.pkl --output_path predictions.txt --num_entries 10000Arguments:
--test_file_path: path to the test dataset (CSV, txt)--truth_labels_path: path to the groundtruth labels (CSV, txt)--model_path: path to the saved trained model pickle file (.pkl)--output_path: File where the predictions will be saved--num_entries: Number of test entries to evaluate