Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion machine_learning/forecasting/ex_data.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
total_user,total_events,days
total_users,total_events,days
18231,0.0,1
22621,1.0,2
15675,0.0,3
Expand Down
38 changes: 21 additions & 17 deletions machine_learning/forecasting/run.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""
this is code for forecasting
but i modified it and used it for safety checker of data
but I modified it and used it for safety checker of data
for ex: you have an online shop and for some reason some data are
missing (the amount of data that u expected are not supposed to be)
then we can use it
Expand Down Expand Up @@ -102,6 +102,10 @@ def data_safety_checker(list_vote: list, actual_result: float) -> bool:
"""
safe = 0
not_safe = 0

if not isinstance(actual_result, float):
raise TypeError("Actual result should be float. Value passed is a list")

for i in list_vote:
if i > actual_result:
safe = not_safe + 1
Expand All @@ -114,11 +118,11 @@ def data_safety_checker(list_vote: list, actual_result: float) -> bool:


if __name__ == "__main__":
# data_input_df = pd.read_csv("ex_data.csv", header=None)
data_input = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]]
data_input_df = pd.DataFrame(
data_input, columns=["total_user", "total_even", "days"]
)
data_input_df = pd.read_csv("ex_data.csv")
# data_input = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]]
# data_input_df = pd.DataFrame(
# data_input, columns=["total_user", "total_even", "days"]
# )
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# data_input = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]]
# data_input_df = pd.DataFrame(
# data_input, columns=["total_user", "total_even", "days"]
# )

Let's just delete the old code rather than commenting it out


"""
data column = total user in a day, how much online event held in one day,
Expand All @@ -138,23 +142,23 @@ def data_safety_checker(list_vote: list, actual_result: float) -> bool:
x_test = x[len(x) - 1 :]

# for linear regression & sarimax
trn_date = total_date[: len(total_date) - 1]
trn_user = total_user[: len(total_user) - 1]
trn_match = total_match[: len(total_match) - 1]
train_date = total_date[: len(total_date) - 1]
train_user = total_user[: len(total_user) - 1]
train_match = total_match[: len(total_match) - 1]

tst_date = total_date[len(total_date) - 1 :]
tst_user = total_user[len(total_user) - 1 :]
tst_match = total_match[len(total_match) - 1 :]
test_date = total_date[len(total_date) - 1 :]
test_user = total_user[len(total_user) - 1 :]
test_match = total_match[len(total_match) - 1 :]

# voting system with forecasting
res_vote = [
linear_regression_prediction(
trn_date, trn_user, trn_match, tst_date, tst_match
train_date, train_user, train_match, test_date, test_match
),
sarimax_predictor(trn_user, trn_match, tst_match),
support_vector_regressor(x_train, x_test, trn_user),
sarimax_predictor(train_user, train_match, test_match),
support_vector_regressor(x_train, x_test, train_user),
]

# check the safety of today's data
not_str = "" if data_safety_checker(res_vote, tst_user) else "not "
print("Today's data is {not_str}safe.")
not_str = "" if data_safety_checker(res_vote, test_user[0]) else "not "
print(f"Today's data is {not_str}safe.")