| 
82 | 82 |         "from sklearn import svm\n",  | 
83 | 83 |         "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",  | 
84 | 84 |         "from sklearn.linear_model import LogisticRegression\n",  | 
85 |  | -        "import pandas as pd\n",  | 
86 |  | -        "import shap"  | 
 | 85 | +        "import pandas as pd"  | 
87 | 86 |       ]  | 
88 | 87 |     },  | 
89 | 88 |     {  | 
 | 
99 | 98 |       "metadata": {},  | 
100 | 99 |       "outputs": [],  | 
101 | 100 |       "source": [  | 
102 |  | -        "X_raw, Y = shap.datasets.adult()\n",  | 
103 |  | -        "X_raw[\"Race\"].value_counts().to_dict()"  | 
 | 101 | +        "from sklearn.datasets import fetch_openml\n",  | 
 | 102 | +        "data = fetch_openml(data_id=1590, as_frame=True)\n",  | 
 | 103 | +        "X_raw = data.data\n",  | 
 | 104 | +        "Y = (data.target == '>50K') * 1\n",  | 
 | 105 | +        "\n",  | 
 | 106 | +        "X_raw[\"race\"].value_counts().to_dict()"  | 
104 | 107 |       ]  | 
105 | 108 |     },  | 
106 | 109 |     {  | 
 | 
116 | 119 |       "metadata": {},  | 
117 | 120 |       "outputs": [],  | 
118 | 121 |       "source": [  | 
119 |  | -        "A = X_raw[['Sex','Race']]\n",  | 
120 |  | -        "X = X_raw.drop(labels=['Sex', 'Race'],axis = 1)\n",  | 
121 |  | -        "X = pd.get_dummies(X)\n",  | 
 | 122 | +        "A = X_raw[['sex','race']]\n",  | 
 | 123 | +        "X = X_raw.drop(labels=['sex', 'race'],axis = 1)\n",  | 
 | 124 | +        "X_dummies = pd.get_dummies(X)\n",  | 
 | 125 | +        "\n",  | 
 | 126 | +        "sc = StandardScaler()\n",  | 
 | 127 | +        "X_scaled = sc.fit_transform(X_dummies)\n",  | 
 | 128 | +        "X_scaled = pd.DataFrame(X_scaled, columns=X_dummies.columns)\n",  | 
122 | 129 |         "\n",  | 
123 | 130 |         "\n",  | 
124 | 131 |         "le = LabelEncoder()\n",  | 
 | 
139 | 146 |       "outputs": [],  | 
140 | 147 |       "source": [  | 
141 | 148 |         "from sklearn.model_selection import train_test_split\n",  | 
142 |  | -        "X_train, X_test, Y_train, Y_test, A_train, A_test = train_test_split(X_raw, \n",  | 
 | 149 | +        "X_train, X_test, Y_train, Y_test, A_train, A_test = train_test_split(X_scaled, \n",  | 
143 | 150 |         "                                                    Y, \n",  | 
144 | 151 |         "                                                    A,\n",  | 
145 | 152 |         "                                                    test_size = 0.2,\n",  | 
 | 
150 | 157 |         "X_train = X_train.reset_index(drop=True)\n",  | 
151 | 158 |         "A_train = A_train.reset_index(drop=True)\n",  | 
152 | 159 |         "X_test = X_test.reset_index(drop=True)\n",  | 
153 |  | -        "A_test = A_test.reset_index(drop=True)\n",  | 
154 |  | -        "\n",  | 
155 |  | -        "# Improve labels\n",  | 
156 |  | -        "A_test.Sex.loc[(A_test['Sex'] == 0)] = 'female'\n",  | 
157 |  | -        "A_test.Sex.loc[(A_test['Sex'] == 1)] = 'male'\n",  | 
158 |  | -        "\n",  | 
159 |  | -        "\n",  | 
160 |  | -        "A_test.Race.loc[(A_test['Race'] == 0)] = 'Amer-Indian-Eskimo'\n",  | 
161 |  | -        "A_test.Race.loc[(A_test['Race'] == 1)] = 'Asian-Pac-Islander'\n",  | 
162 |  | -        "A_test.Race.loc[(A_test['Race'] == 2)] = 'Black'\n",  | 
163 |  | -        "A_test.Race.loc[(A_test['Race'] == 3)] = 'Other'\n",  | 
164 |  | -        "A_test.Race.loc[(A_test['Race'] == 4)] = 'White'"  | 
 | 160 | +        "A_test = A_test.reset_index(drop=True)"  | 
165 | 161 |       ]  | 
166 | 162 |     },  | 
167 | 163 |     {  | 
 | 
251 | 247 |       "outputs": [],  | 
252 | 248 |       "source": [  | 
253 | 249 |         "sweep.fit(X_train, Y_train,\n",  | 
254 |  | -        "          sensitive_features=A_train.Sex)\n",  | 
 | 250 | +        "          sensitive_features=A_train.sex)\n",  | 
255 | 251 |         "\n",  | 
256 | 252 |         "predictors = sweep._predictors"  | 
257 | 253 |       ]  | 
 | 
274 | 270 |         "    classifier = lambda X: m.predict(X)\n",  | 
275 | 271 |         "    \n",  | 
276 | 272 |         "    error = ErrorRate()\n",  | 
277 |  | -        "    error.load_data(X_train, pd.Series(Y_train), sensitive_features=A_train.Sex)\n",  | 
 | 273 | +        "    error.load_data(X_train, pd.Series(Y_train), sensitive_features=A_train.sex)\n",  | 
278 | 274 |         "    disparity = DemographicParity()\n",  | 
279 |  | -        "    disparity.load_data(X_train, pd.Series(Y_train), sensitive_features=A_train.Sex)\n",  | 
 | 275 | +        "    disparity.load_data(X_train, pd.Series(Y_train), sensitive_features=A_train.sex)\n",  | 
280 | 276 |         "    \n",  | 
281 | 277 |         "    errors.append(error.gamma(classifier)[0])\n",  | 
282 | 278 |         "    disparities.append(disparity.gamma(classifier).max())\n",  | 
 | 
440 | 436 |       "metadata": {},  | 
441 | 437 |       "outputs": [],  | 
442 | 438 |       "source": [  | 
443 |  | -        "sf = { 'sex': A_test.Sex, 'race': A_test.Race }\n",  | 
 | 439 | +        "sf = { 'sex': A_test.sex, 'race': A_test.race }\n",  | 
444 | 440 |         "\n",  | 
445 | 441 |         "from fairlearn.metrics._group_metric_set import _create_group_metric_set\n",  | 
446 | 442 |         "\n",  | 
 | 
0 commit comments