TheAlgorithms · tianyizheng02 · Aug 14, 2023 · Aug 14, 2023 · Aug 14, 2023 · Aug 14, 2023
diff --git a/machine_learning/forecasting/ex_data.csv b/machine_learning/forecasting/ex_data.csv
@@ -1,4 +1,4 @@
-total_user,total_events,days
+total_users,total_events,days
 18231,0.0,1
 22621,1.0,2
 15675,0.0,3

diff --git a/machine_learning/forecasting/run.py b/machine_learning/forecasting/run.py
@@ -1,6 +1,6 @@
 """
 this is code for forecasting
-but i modified it and used it for safety checker of data
+but I modified it and used it for safety checker of data
 for ex: you have an online shop and for some reason some data are
 missing (the amount of data that u expected are not supposed to be)
         then we can use it
@@ -102,6 +102,10 @@ def data_safety_checker(list_vote: list, actual_result: float) -> bool:
     """
     safe = 0
     not_safe = 0
+
+    if not isinstance(actual_result, float):
+        raise TypeError("Actual result should be float. Value passed is a list")
+
     for i in list_vote:
         if i > actual_result:
             safe = not_safe + 1
@@ -114,11 +118,11 @@ def data_safety_checker(list_vote: list, actual_result: float) -> bool:
 
 
 if __name__ == "__main__":
-    # data_input_df = pd.read_csv("ex_data.csv", header=None)
-    data_input = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]]
-    data_input_df = pd.DataFrame(
-        data_input, columns=["total_user", "total_even", "days"]
-    )
+    data_input_df = pd.read_csv("ex_data.csv")
+    # data_input = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]]
+    # data_input_df = pd.DataFrame(
+    #     data_input, columns=["total_user", "total_even", "days"]
+    # )
-    # data_input = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]]
-    # data_input_df = pd.DataFrame(
-    #     data_input, columns=["total_user", "total_even", "days"]
-    # )
-    # data_input = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]]
-    # data_input_df = pd.DataFrame(
-    #     data_input, columns=["total_user", "total_even", "days"]
-    # )
 
     """
     data column = total user in a day, how much online event held in one day,
@@ -138,23 +142,23 @@ def data_safety_checker(list_vote: list, actual_result: float) -> bool:
     x_test = x[len(x) - 1 :]
 
     # for linear regression & sarimax
-    trn_date = total_date[: len(total_date) - 1]
-    trn_user = total_user[: len(total_user) - 1]
-    trn_match = total_match[: len(total_match) - 1]
+    train_date = total_date[: len(total_date) - 1]
+    train_user = total_user[: len(total_user) - 1]
+    train_match = total_match[: len(total_match) - 1]
 
-    tst_date = total_date[len(total_date) - 1 :]
-    tst_user = total_user[len(total_user) - 1 :]
-    tst_match = total_match[len(total_match) - 1 :]
+    test_date = total_date[len(total_date) - 1 :]
+    test_user = total_user[len(total_user) - 1 :]
+    test_match = total_match[len(total_match) - 1 :]
 
     # voting system with forecasting
     res_vote = [
         linear_regression_prediction(
-            trn_date, trn_user, trn_match, tst_date, tst_match
+            train_date, train_user, train_match, test_date, test_match
         ),
-        sarimax_predictor(trn_user, trn_match, tst_match),
-        support_vector_regressor(x_train, x_test, trn_user),
+        sarimax_predictor(train_user, train_match, test_match),
+        support_vector_regressor(x_train, x_test, train_user),
     ]
 
     # check the safety of today's data
-    not_str = "" if data_safety_checker(res_vote, tst_user) else "not "
-    print("Today's data is {not_str}safe.")
+    not_str = "" if data_safety_checker(res_vote, test_user[0]) else "not "
+    print(f"Today's data is {not_str}safe.")