|
254 | 254 | "\n",
|
255 | 255 | "@pm.potential\n",
|
256 | 256 | "def error(true_price=true_price, price_estimate=price_estimate):\n",
|
257 |
| - " return pm.normal_like(true_price, price_estimate, 1 / (3e3) ** 2)\n", |
| 257 | + " return pm.normal_like(true_price, price_estimate, 1 / (3e3) ** 2)\n", |
258 | 258 | "\n",
|
259 | 259 | "\n",
|
260 | 260 | "mcmc = pm.MCMC([true_price, prize_1, prize_2, price_estimate, error])\n",
|
261 | 261 | "mcmc.sample(50000, 10000)\n",
|
262 | 262 | "\n",
|
263 |
| - "price_trace = mcmc.trace(\"true_price\")[:];" |
| 263 | + "price_trace = mcmc.trace(\"true_price\")[:]" |
264 | 264 | ],
|
265 | 265 | "language": "python",
|
266 | 266 | "metadata": {},
|
|
299 | 299 | "plt.title(\"Posterior of the true price estimate\")\n",
|
300 | 300 | "plt.vlines(mu_prior, 0, 1.1 * np.max(_hist[0]), label=\"prior's mean\",\n",
|
301 | 301 | " linestyles=\"--\")\n",
|
302 |
| - "plt.vlines(price_trace.mean(), 0, 1.1 * np.max(_hist[0]), label=\"posterior's mean\", linestyles=\"-.\")\n", |
303 |
| - "plt.legend(loc=\"upper left\");" |
| 302 | + "plt.vlines(price_trace.mean(), 0, 1.1 * np.max(_hist[0]),\n", |
| 303 | + " label=\"posterior's mean\", linestyles=\"-.\")\n", |
| 304 | + "plt.legend(loc=\"upper left\")" |
304 | 305 | ],
|
305 | 306 | "language": "python",
|
306 | 307 | "metadata": {},
|
|
361 | 362 | "\n",
|
362 | 363 | "\n",
|
363 | 364 | "def showdown_loss(guess, true_price, risk=80000):\n",
|
364 |
| - " loss = np.zeros_like(true_price)\n", |
365 |
| - " ix = true_price < guess\n", |
366 |
| - " loss[~ix] = np.abs(guess - true_price[~ix])\n", |
367 |
| - " close_mask = [abs(true_price - guess) <= 250]\n", |
368 |
| - " loss[close_mask] = -2 * true_price[close_mask]\n", |
369 |
| - " loss[ix] = risk\n", |
370 |
| - " return loss\n", |
| 365 | + " loss = np.zeros_like(true_price)\n", |
| 366 | + " ix = true_price < guess\n", |
| 367 | + " loss[~ix] = np.abs(guess - true_price[~ix])\n", |
| 368 | + " close_mask = [abs(true_price - guess) <= 250]\n", |
| 369 | + " loss[close_mask] = -2 * true_price[close_mask]\n", |
| 370 | + " loss[ix] = risk\n", |
| 371 | + " return loss\n", |
| 372 | + "\n", |
371 | 373 | "\n",
|
372 | 374 | "guesses = np.linspace(5000, 50000, 70)\n",
|
373 | 375 | "risks = np.linspace(30000, 150000, 6)\n",
|
374 |
| - "expected_loss = lambda guess, risk: showdown_loss(guess, price_trace, risk).mean()\n", |
| 376 | + "expected_loss = lambda guess, risk: \\\n", |
| 377 | + " showdown_loss(guess, price_trace, risk).mean()\n", |
375 | 378 | "\n",
|
376 | 379 | "for _p in risks:\n",
|
377 | 380 | " results = [expected_loss(_g, _p) for _g in guesses]\n",
|
378 | 381 | " plt.plot(guesses, results, label=\"%d\" % _p)\n",
|
379 | 382 | "\n",
|
380 |
| - "plt.title(\"Expected loss of different guesses, \\nvarious risk-levels of overestimating\")\n", |
| 383 | + "plt.title(\"Expected loss of different guesses, \\nvarious risk-levels of \\\n", |
| 384 | + "overestimating\")\n", |
381 | 385 | "plt.legend(loc=\"upper left\", title=\"Risk parameter\")\n",
|
382 | 386 | "plt.xlabel(\"price bid\")\n",
|
383 | 387 | "plt.ylabel(\"expected loss\")\n",
|
|
420 | 424 | "\n",
|
421 | 425 | "ax = plt.subplot(111)\n",
|
422 | 426 | "\n",
|
| 427 | + "\n", |
423 | 428 | "for _p in risks:\n",
|
424 | 429 | " _color = ax._get_lines.color_cycle.next()\n",
|
425 | 430 | " _min_results = sop.fmin(expected_loss, 15000, args=(_p,), disp=False)\n",
|
426 | 431 | " _results = [expected_loss(_g, _p) for _g in guesses]\n",
|
427 | 432 | " plt.plot(guesses, _results, color=_color)\n",
|
428 |
| - " plt.scatter(_min_results, 0, s=60, color=_color, label=\"%d\" % _p)\n", |
| 433 | + " plt.scatter(_min_results, 0, s=60,\n", |
| 434 | + " color=_color, label=\"%d\" % _p)\n", |
429 | 435 | " plt.vlines(_min_results, 0, 120000, color=_color, linestyles=\"--\")\n",
|
430 | 436 | " print \"minimum at risk %d: %.2f\" % (_p, _min_results)\n",
|
431 | 437 | "\n",
|
432 |
| - "plt.title(\"Expected loss & Bayes actions of different guesses, \\n various risk-levels of overestimating\")\n", |
| 438 | + "plt.title(\"Expected loss & Bayes actions of different guesses, \\n \\\n", |
| 439 | + "various risk-levels of overestimating\")\n", |
433 | 440 | "plt.legend(loc=\"upper left\", scatterpoints=1, title=\"Bayes action at risk:\")\n",
|
434 | 441 | "plt.xlabel(\"price guess\")\n",
|
435 | 442 | "plt.ylabel(\"expected loss\")\n",
|
436 | 443 | "plt.xlim(7000, 30000)\n",
|
437 |
| - "plt.ylim(-1000, 80000);" |
| 444 | + "plt.ylim(-1000, 80000)" |
438 | 445 | ],
|
439 | 446 | "language": "python",
|
440 | 447 | "metadata": {},
|
|
575 | 582 | "\n",
|
576 | 583 | "def stock_loss(true_return, yhat, alpha=100.):\n",
|
577 | 584 | " if true_return * yhat < 0:\n",
|
578 |
| - " # opposite signs, not good\n", |
579 |
| - " return alpha * yhat ** 2 - np.sign(true_return) * yhat + abs(true_return)\n", |
| 585 | + " # opposite signs, not good\n", |
| 586 | + " return alpha * yhat ** 2 - np.sign(true_return) * yhat \\\n", |
| 587 | + " + abs(true_return)\n", |
580 | 588 | " else:\n",
|
581 | 589 | " return abs(true_return - yhat)\n",
|
582 | 590 | "\n",
|
| 591 | + "\n", |
583 | 592 | "true_value = .05\n",
|
584 | 593 | "pred = np.linspace(-.04, .12, 75)\n",
|
585 | 594 | "\n",
|
586 |
| - "plt.plot(pred, [stock_loss(true_value, _p) for _p in pred], label=\"Loss associated with\\n prediction if true value=0.05\", lw=3)\n", |
| 595 | + "plt.plot(pred, [stock_loss(true_value, _p) for _p in pred],\n", |
| 596 | + " label=\"Loss associated with\\n prediction if true value = 0.05\", lw=3)\n", |
587 | 597 | "plt.vlines(0, 0, .25, linestyles=\"--\")\n",
|
588 | 598 | "\n",
|
589 | 599 | "plt.xlabel(\"prediction\")\n",
|
|
592 | 602 | "plt.ylim(0, 0.25)\n",
|
593 | 603 | "\n",
|
594 | 604 | "true_value = -.02\n",
|
595 |
| - "plt.plot(pred, [stock_loss(true_value, _p) for _p in pred], alpha=0.6, label=\"Loss associated with\\n prediction if true value=-0.02\", lw=3)\n", |
| 605 | + "plt.plot(pred, [stock_loss(true_value, _p) for _p in pred], alpha=0.6,\n", |
| 606 | + " label=\"Loss associated with\\n prediction if true value = -0.02\", lw=3)\n", |
596 | 607 | "plt.legend()\n",
|
597 |
| - "plt.title(\"Stock returns loss if true value=0.05, -0.02\");" |
| 608 | + "plt.title(\"Stock returns loss if true value = 0.05, -0.02\");" |
598 | 609 | ],
|
599 | 610 | "language": "python",
|
600 | 611 | "metadata": {},
|
|
695 | 706 | "mcmc = pm.MCMC([obs, beta, alpha, std, prec])\n",
|
696 | 707 | "\n",
|
697 | 708 | "mcmc.sample(100000, 80000)\n",
|
698 |
| - "mcplot(mcmc);" |
| 709 | + "mcplot(mcmc)" |
699 | 710 | ],
|
700 | 711 | "language": "python",
|
701 | 712 | "metadata": {},
|
|
800 | 811 | "\n",
|
801 | 812 | "noise = 1. / np.sqrt(tau_samples) * np.random.randn(N)\n",
|
802 | 813 | "\n",
|
803 |
| - "possible_outcomes = lambda signal: alpha_samples + beta_samples * signal + noise\n", |
| 814 | + "possible_outcomes = lambda signal: alpha_samples + beta_samples * signal \\\n", |
| 815 | + " + noise\n", |
804 | 816 | "\n",
|
805 | 817 | "\n",
|
806 | 818 | "opt_predictions = np.zeros(50)\n",
|
|
894 | 906 | "from draw_sky2 import draw_sky\n",
|
895 | 907 | "\n",
|
896 | 908 | "n_sky = 3 # choose a file/sky to examine.\n",
|
897 |
| - "data = np.genfromtxt(\"data/Train_Skies/Train_Skies/Training_Sky%d.csv\" % (n_sky),\n", |
898 |
| - " dtype=None,\n", |
899 |
| - " skip_header=1,\n", |
900 |
| - " delimiter=\",\",\n", |
901 |
| - " usecols=[1, 2, 3, 4])\n", |
| 909 | + "data = np.genfromtxt(\"data/Train_Skies/Train_Skies/\\\n", |
| 910 | + "Training_Sky%d.csv\" % (n_sky),\n", |
| 911 | + " dtype=None,\n", |
| 912 | + " skip_header=1,\n", |
| 913 | + " delimiter=\",\",\n", |
| 914 | + " usecols=[1, 2, 3, 4])\n", |
902 | 915 | "print \"Data on galaxies in sky %d.\" % n_sky\n",
|
903 | 916 | "print \"position_x, position_y, e_1, e_2 \"\n",
|
904 | 917 | "print data[:3]\n",
|
|
952 | 965 | "\n",
|
953 | 966 | "and in PyMC, \n",
|
954 | 967 | "\n",
|
955 |
| - " exp_mass_large = pm.Uniform( \"exp_mass_large\", 40, 180)\n", |
| 968 | + " exp_mass_large = pm.Uniform(\"exp_mass_large\", 40, 180)\n", |
956 | 969 | " @pm.deterministic\n",
|
957 | 970 | " def mass_large(u = exp_mass_large):\n",
|
958 |
| - " return np.log( u )\n", |
| 971 | + " return np.log(u)\n", |
959 | 972 | "\n",
|
960 | 973 | "(This is what we mean when we say *log*-uniform.) For smaller galaxies, Tim set the mass to be the logarithm of 20. Why did Tim not create a prior for the smaller mass, nor treat it as a unknown? I believe this decision was made to speed up convergence of the algorithm. This is not too restrictive, as by construction the smaller halos have less influence on the galaxies.\n",
|
961 | 974 | "\n",
|
|
1013 | 1026 | "\n",
|
1014 | 1027 | "@pm.deterministic\n",
|
1015 | 1028 | "def mean(mass=mass_large, h_pos=halo_position, glx_pos=data[:, :2]):\n",
|
1016 |
| - " return mass / f_distance(glx_pos, h_pos, 240) * tangential_distance(glx_pos, h_pos);" |
| 1029 | + " return mass / f_distance(glx_pos, h_pos, 240) *\\\n", |
| 1030 | + " tangential_distance(glx_pos, h_pos)" |
1017 | 1031 | ],
|
1018 | 1032 | "language": "python",
|
1019 | 1033 | "metadata": {},
|
|
1029 | 1043 | "mcmc = pm.MCMC([ellpty, mean, halo_position, mass_large])\n",
|
1030 | 1044 | "map_ = pm.MAP([ellpty, mean, halo_position, mass_large])\n",
|
1031 | 1045 | "map_.fit()\n",
|
1032 |
| - "mcmc.sample(200000, 140000, 3);" |
| 1046 | + "mcmc.sample(200000, 140000, 3)" |
1033 | 1047 | ],
|
1034 | 1048 | "language": "python",
|
1035 | 1049 | "metadata": {},
|
|
1101 | 1115 | " delimiter=\",\",\n",
|
1102 | 1116 | " usecols=[1, 2, 3, 4, 5, 6, 7, 8, 9],\n",
|
1103 | 1117 | " skip_header=1)\n",
|
1104 |
| - "print halo_data[n_sky];" |
| 1118 | + "print halo_data[n_sky]" |
1105 | 1119 | ],
|
1106 | 1120 | "language": "python",
|
1107 | 1121 | "metadata": {},
|
|
1139 | 1153 | " c=\"k\", s=70)\n",
|
1140 | 1154 | "plt.legend(scatterpoints=1, loc=\"lower left\")\n",
|
1141 | 1155 | "plt.xlim(0, 4200)\n",
|
1142 |
| - "plt.ylim(0, 4200)\n", |
| 1156 | + "plt.ylim(0, 4200);\n", |
1143 | 1157 | "\n",
|
1144 |
| - "print \"True halo location:\", halo_data[n_sky][3], halo_data[n_sky][4];" |
| 1158 | + "print \"True halo location:\", halo_data[n_sky][3], halo_data[n_sky][4]" |
1145 | 1159 | ],
|
1146 | 1160 | "language": "python",
|
1147 | 1161 | "metadata": {},
|
|
1173 | 1187 | "collapsed": false,
|
1174 | 1188 | "input": [
|
1175 | 1189 | "mean_posterior = t.mean(axis=0).reshape(1, 2)\n",
|
1176 |
| - "print mean_posterior;" |
| 1190 | + "print mean_posterior" |
1177 | 1191 | ],
|
1178 | 1192 | "language": "python",
|
1179 | 1193 | "metadata": {},
|
|
1204 | 1218 | "sky_prediction = mean_posterior\n",
|
1205 | 1219 | "\n",
|
1206 | 1220 | "print \"Using the mean:\"\n",
|
1207 |
| - "main_score(nhalo_all, x_true_all, y_true_all, x_ref_all, y_ref_all, sky_prediction)\n", |
| 1221 | + "main_score(nhalo_all, x_true_all, y_true_all,\n", |
| 1222 | + " x_ref_all, y_ref_all, sky_prediction)\n", |
1208 | 1223 | "\n",
|
1209 | 1224 | "# what's a bad score?\n",
|
1210 | 1225 | "print\n",
|
1211 | 1226 | "random_guess = np.random.randint(0, 4200, size=(1, 2))\n",
|
1212 | 1227 | "print \"Using a random location:\", random_guess\n",
|
1213 |
| - "main_score(nhalo_all, x_true_all, y_true_all, x_ref_all, y_ref_all, random_guess)\n", |
1214 |
| - "print;" |
| 1228 | + "main_score(nhalo_all, x_true_all, y_true_all,\n", |
| 1229 | + " x_ref_all, y_ref_all, random_guess)\n", |
| 1230 | + "print" |
1215 | 1231 | ],
|
1216 | 1232 | "language": "python",
|
1217 | 1233 | "metadata": {},
|
|
1298 | 1314 | "\n",
|
1299 | 1315 | " _sum = 0\n",
|
1300 | 1316 | " for i in range(n_halos_in_sky):\n",
|
1301 |
| - " _sum += mass[i] / f_distance(glx_pos, h_pos[i, :], fdist_constants[i]) * tangential_distance(glx_pos, h_pos[i, :])\n", |
| 1317 | + " _sum += mass[i] / f_distance(glx_pos, h_pos[i, :], fdist_constants[i]) *\\\n", |
| 1318 | + " tangential_distance(glx_pos, h_pos[i, :])\n", |
1302 | 1319 | "\n",
|
1303 | 1320 | " return _sum\n",
|
1304 | 1321 | "\n",
|
|
1310 | 1327 | "\n",
|
1311 | 1328 | " mcmc = pm.MCMC([ellpty, mean, halo_positions, mass_large])\n",
|
1312 | 1329 | " mcmc.sample(samples, burn_in, thin)\n",
|
1313 |
| - " return mcmc.trace(\"halo_positions\")[:];" |
| 1330 | + " return mcmc.trace(\"halo_positions\")[:]" |
1314 | 1331 | ],
|
1315 | 1332 | "language": "python",
|
1316 | 1333 | "metadata": {},
|
|
1322 | 1339 | "collapsed": false,
|
1323 | 1340 | "input": [
|
1324 | 1341 | "n_sky = 215\n",
|
1325 |
| - "data = np.genfromtxt(\"data/Train_Skies/Train_Skies/Training_Sky%d.csv\" % (n_sky),\n", |
1326 |
| - " dtype=None,\n", |
1327 |
| - " skip_header=1,\n", |
1328 |
| - " delimiter=\",\",\n", |
1329 |
| - " usecols=[1, 2, 3, 4]);" |
| 1342 | + "data = np.genfromtxt(\"data/Train_Skies/Train_Skies/\\\n", |
| 1343 | + "Training_Sky%d.csv\" % (n_sky),\n", |
| 1344 | + " dtype=None,\n", |
| 1345 | + " skip_header=1,\n", |
| 1346 | + " delimiter=\",\",\n", |
| 1347 | + " usecols=[1, 2, 3, 4])" |
1330 | 1348 | ],
|
1331 | 1349 | "language": "python",
|
1332 | 1350 | "metadata": {},
|
|
1341 | 1359 | "samples = 10.5e5\n",
|
1342 | 1360 | "traces = halo_posteriors(3, data, samples=samples,\n",
|
1343 | 1361 | " burn_in=9.5e5,\n",
|
1344 |
| - " thin=10);" |
| 1362 | + " thin=10)" |
1345 | 1363 | ],
|
1346 | 1364 | "language": "python",
|
1347 | 1365 | "metadata": {},
|
|
1368 | 1386 | "cell_type": "code",
|
1369 | 1387 | "collapsed": false,
|
1370 | 1388 | "input": [
|
| 1389 | + "\n", |
1371 | 1390 | "fig = draw_sky(data)\n",
|
1372 | 1391 | "plt.title(\"Galaxy positions and ellipcities of sky %d.\" % n_sky)\n",
|
1373 | 1392 | "plt.xlabel(\"x-position\")\n",
|
|
1384 | 1403 | " label=\"True halo position\",\n",
|
1385 | 1404 | " c=\"k\", s=90)\n",
|
1386 | 1405 | "\n",
|
1387 |
| - "# plt.legend(scatterpoints=1)\n", |
| 1406 | + "# plt.legend(scatterpoints = 1)\n", |
1388 | 1407 | "plt.xlim(0, 4200)\n",
|
1389 | 1408 | "plt.ylim(0, 4200);"
|
1390 | 1409 | ],
|
|
1424 | 1443 | "mean_posterior = traces.mean(axis=0).reshape(1, 4)\n",
|
1425 | 1444 | "print mean_posterior\n",
|
1426 | 1445 | "\n",
|
| 1446 | + "\n", |
1427 | 1447 | "nhalo_all = _halo_data[0].reshape(1, 1)\n",
|
1428 | 1448 | "x_true_all = _halo_data[3].reshape(1, 1)\n",
|
1429 | 1449 | "y_true_all = _halo_data[4].reshape(1, 1)\n",
|
1430 | 1450 | "x_ref_all = _halo_data[1].reshape(1, 1)\n",
|
1431 | 1451 | "y_ref_all = _halo_data[2].reshape(1, 1)\n",
|
1432 | 1452 | "sky_prediction = mean_posterior\n",
|
1433 | 1453 | "\n",
|
| 1454 | + "\n", |
1434 | 1455 | "print \"Using the mean:\"\n",
|
1435 |
| - "main_score([1], x_true_all, y_true_all, x_ref_all, y_ref_all, sky_prediction)\n", |
| 1456 | + "main_score([1], x_true_all, y_true_all,\n", |
| 1457 | + " x_ref_all, y_ref_all, sky_prediction)\n", |
1436 | 1458 | "\n",
|
1437 | 1459 | "# what's a bad score?\n",
|
1438 | 1460 | "print\n",
|
1439 | 1461 | "random_guess = np.random.randint(0, 4200, size=(1, 2))\n",
|
1440 | 1462 | "print \"Using a random location:\", random_guess\n",
|
1441 |
| - "main_score([1], x_true_all, y_true_all, x_ref_all, y_ref_all, random_guess)\n", |
1442 |
| - "print;" |
| 1463 | + "main_score([1], x_true_all, y_true_all,\n", |
| 1464 | + " x_ref_all, y_ref_all, random_guess)\n", |
| 1465 | + "print" |
1443 | 1466 | ],
|
1444 | 1467 | "language": "python",
|
1445 | 1468 | "metadata": {},
|
|
1519 | 1542 | "def css_styling():\n",
|
1520 | 1543 | " styles = open(\"../styles/custom.css\", \"r\").read()\n",
|
1521 | 1544 | " return HTML(styles)\n",
|
1522 |
| - "css_styling();" |
| 1545 | + "css_styling()" |
1523 | 1546 | ],
|
1524 | 1547 | "language": "python",
|
1525 | 1548 | "metadata": {},
|
|
0 commit comments