11"""
22this is code for forecasting
33but i modified it and used it for safety checker of data
4- for ex: you have a online shop and for some reason some data are
4+ for ex: you have an online shop and for some reason some data are
55missing (the amount of data that u expected are not supposed to be)
66 then we can use it
77*ps : 1. ofc we can use normal statistic method but in this case
@@ -91,14 +91,14 @@ def interquartile_range_checker(train_user: list) -> float:
9191 return low_lim
9292
9393
94- def data_safety_checker (list_vote : list , actual_result : float ) -> None :
94+ def data_safety_checker (list_vote : list , actual_result : float ) -> bool :
9595 """
9696 Used to review all the votes (list result prediction)
9797 and compare it to the actual result.
9898 input : list of predictions
9999 output : print whether it's safe or not
100- >>> data_safety_checker([2,3, 4],5.0)
101- Today's data is not safe.
100+ >>> data_safety_checker([2, 3, 4], 5.0)
101+ False
102102 """
103103 safe = 0
104104 not_safe = 0
@@ -107,50 +107,54 @@ def data_safety_checker(list_vote: list, actual_result: float) -> None:
107107 safe = not_safe + 1
108108 else :
109109 if abs (abs (i ) - abs (actual_result )) <= 0.1 :
110- safe = safe + 1
110+ safe += 1
111111 else :
112- not_safe = not_safe + 1
113- print ( f"Today's data is { 'not ' if safe <= not_safe else '' } safe." )
112+ not_safe += 1
113+ return safe > not_safe
114114
115115
116- # data_input_df = pd.read_csv("ex_data.csv", header=None)
117- data_input = [[18231 , 0.0 , 1 ], [22621 , 1.0 , 2 ], [15675 , 0.0 , 3 ], [23583 , 1.0 , 4 ]]
118- data_input_df = pd .DataFrame (data_input , columns = ["total_user" , "total_even" , "days" ])
116+ if __name__ == "__main__" :
117+ # data_input_df = pd.read_csv("ex_data.csv", header=None)
118+ data_input = [[18231 , 0.0 , 1 ], [22621 , 1.0 , 2 ], [15675 , 0.0 , 3 ], [23583 , 1.0 , 4 ]]
119+ data_input_df = pd .DataFrame (
120+ data_input , columns = ["total_user" , "total_even" , "days" ]
121+ )
119122
120- """
121- data column = total user in a day, how much online event held in one day,
122- what day is that(sunday-saturday)
123- """
123+ """
124+ data column = total user in a day, how much online event held in one day,
125+ what day is that(sunday-saturday)
126+ """
124127
125- # start normalization
126- normalize_df = Normalizer ().fit_transform (data_input_df .values )
127- # split data
128- total_date = normalize_df [:, 2 ].tolist ()
129- total_user = normalize_df [:, 0 ].tolist ()
130- total_match = normalize_df [:, 1 ].tolist ()
131-
132- # for svr (input variable = total date and total match)
133- x = normalize_df [:, [1 , 2 ]].tolist ()
134- x_train = x [: len (x ) - 1 ]
135- x_test = x [len (x ) - 1 :]
136-
137- # for linear reression & sarimax
138- trn_date = total_date [: len (total_date ) - 1 ]
139- trn_user = total_user [: len (total_user ) - 1 ]
140- trn_match = total_match [: len (total_match ) - 1 ]
141-
142- tst_date = total_date [len (total_date ) - 1 :]
143- tst_user = total_user [len (total_user ) - 1 :]
144- tst_match = total_match [len (total_match ) - 1 :]
145-
146-
147- # voting system with forecasting
148- res_vote = []
149- res_vote .append (
150- linear_regression_prediction (trn_date , trn_user , trn_match , tst_date , tst_match )
151- )
152- res_vote .append (sarimax_predictor (trn_user , trn_match , tst_match ))
153- res_vote .append (support_vector_regressor (x_train , x_test , trn_user ))
154-
155- # check the safety of todays'data^^
156- data_safety_checker (res_vote , tst_user )
128+ # start normalization
129+ normalize_df = Normalizer ().fit_transform (data_input_df .values )
130+ # split data
131+ total_date = normalize_df [:, 2 ].tolist ()
132+ total_user = normalize_df [:, 0 ].tolist ()
133+ total_match = normalize_df [:, 1 ].tolist ()
134+
135+ # for svr (input variable = total date and total match)
136+ x = normalize_df [:, [1 , 2 ]].tolist ()
137+ x_train = x [: len (x ) - 1 ]
138+ x_test = x [len (x ) - 1 :]
139+
140+ # for linear regression & sarimax
141+ trn_date = total_date [: len (total_date ) - 1 ]
142+ trn_user = total_user [: len (total_user ) - 1 ]
143+ trn_match = total_match [: len (total_match ) - 1 ]
144+
145+ tst_date = total_date [len (total_date ) - 1 :]
146+ tst_user = total_user [len (total_user ) - 1 :]
147+ tst_match = total_match [len (total_match ) - 1 :]
148+
149+ # voting system with forecasting
150+ res_vote = [
151+ linear_regression_prediction (
152+ trn_date , trn_user , trn_match , tst_date , tst_match
153+ ),
154+ sarimax_predictor (trn_user , trn_match , tst_match ),
155+ support_vector_regressor (x_train , x_test , trn_user ),
156+ ]
157+
158+ # check the safety of today's data
159+ not_str = "" if data_safety_checker (res_vote , tst_user ) else "not "
160+ print ("Today's data is {not_str}safe." )
0 commit comments