|
119 | 119 | \vspace{1cm} |
120 | 120 | highly recommended textbooks / I like very much |
121 | 121 | \begin{itemize} |
122 | | - \item Kevin P. Murphy (2022): "Probabilistic Machine Learning: An Introduction", MIT Press, 1st. ed. |
| 122 | + \item K. P. Murphy (2022): "Probabilistic Machine Learning: An Introduction", MIT Press, 1st. ed. |
123 | 123 | \href{https://probml.github.io/pml-book/book1.html}{current draft as free pdf} |
124 | | - \item \href{https://math.mit.edu/~gs/}{Gilbert Strang} (2019): "Linear Algebra and Learning from Data", Wellesley, 1st ed. |
| 124 | + \item \href{https://math.mit.edu/~gs/}{G. Strang} (2019): "Linear Algebra and Learning from Data", Wellesley, 1st ed. |
125 | 125 | \end{itemize} |
126 | 126 | \end{frame} |
127 | 127 |
|
128 | 128 | \begin{frame}{Literature} |
129 | 129 | theory textbooks that inspire me a lot (in some order of heavy usage) |
130 | 130 | \begin{itemize} |
131 | 131 | \item C.C. Aggarwal, Neural Networks and Deep Learning. Springer, 2018. |
132 | | - \item Christopher M. Bishop, Pattern Recognition and Machine Learning, Springer, 2006 |
| 132 | + \item C. M. Bishop, Pattern Recognition and Machine Learning, Springer, 2006 |
133 | 133 | \item S. Theodoridis, Machine Learning, 2nd ed. Academic Press, 2020. |
134 | 134 | \href{https://www.sciencedirect.com/book/9780128188033/machine-learning}{free ebook} |
135 | 135 | \item I. Goodfellow, Y. Bengio, and A. Courville, Deep Learning. MIT Press, 2016. |
136 | 136 | \item T. Hastie, R. Tibshirani, and J. Friedman, The Elements of Statistical Learning, 2nd ed. Springer, 2009. |
137 | 137 | \href{https://hastie.su.domains/ElemStatLearn/}{free ebook} |
138 | 138 | \item G. James, D. Witten, T. Hastie, and R. Tibshirani, An Introduction to Statistical Learning with Applications in R, 2nd ed. Springer, 2021. \href{https://www.statlearning.com/}{free ebook} |
139 | 139 | \item C.C. Aggarwal, Linear Algebra and Optimization for Machine Learning. Springer, 2020. \href{https://link.springer.com/book/10.1007/978-3-030-40344-7}{free ebook} |
140 | | - \item Marc P. Deisenroth, A. Aldo Faisal, Cheng S. Ong, Mathematics for Machine Learning, Cambridge, 2020. \href{https://mml-book.github.io/book/mml-book.pdf}{free ebook} |
141 | | - \item Steven L. Brunton, J. Nathan Kutz, Data Driven Science \& Engineering, Cambridge, 2019. \href{http://www.databookuw.com/databook.pdf}{free ebook draft} |
| 140 | + \item M. P. Deisenroth, A. A. Faisal, C. S. Ong, Mathematics for Machine Learning, Cambridge, 2020. \href{https://mml-book.github.io/book/mml-book.pdf}{free ebook} |
| 141 | + \item S. L. Brunton, J. N. Kutz, Data Driven Science \& Engineering, Cambridge, 2019. \href{http://www.databookuw.com/databook.pdf}{free ebook draft} |
142 | 142 | \end{itemize} |
143 | 143 | \end{frame} |
144 | 144 |
|
|
150 | 150 | \href{http://www.databookuw.com/databook.pdf}{free ebook draft}, |
151 | 151 | \href{http://www.databookuw.com/}{video lectures}, |
152 | 152 | \href{https://github.com/dylewsky/Data_Driven_Science_Python_Demos}{Python tutorials} |
153 | | - \item A. G\'{e}ron, Hands-On Machine Learning with SciKit \& TensorFlow, 1st/2nd ed. O'Reilly, 2017/2019. |
| 153 | + \item Aur\'{e}lien G\'{e}ron, Hands-On Machine Learning with SciKit \& TensorFlow, 1st/2nd ed. O'Reilly, 2017/2019. |
154 | 154 | \href{https://github.com/ageron/handson-ml2}{Python tutorials} |
155 | 155 | \item \href{https://playground.tensorflow.org}{A Neural Network Playground---TensorFlow} |
156 | 156 | \item courses by Andrew Ng at \url{https://www.deeplearning.ai/} and/or \url{https://www.coursera.org/} |
|
165 | 165 |
|
166 | 166 | textbooks that I like very much |
167 | 167 | \begin{itemize} |
168 | | - \item John F. Monahan, A Primer on Linear Models, CRC Press, 2008. |
| 168 | + \item J. F. Monahan, A Primer on Linear Models, CRC Press, 2008. |
169 | 169 | \item L. Fahrmeir, A. Hamerle, and G. Tutz, Multivariate statistische Verfahren, 2nd ed. de Gruyter, 1996. |
170 | 170 | \href{https://www.degruyter.com/document/doi/10.1515/9783110816020/html}{free ebook} |
171 | 171 | \item L. Fahrmeir, T. Kneib, S. Lang, and B. D. Marx, Regression, 2nd ed. Springer, 2021. |
@@ -271,8 +271,7 @@ \subsection{Exercise 02} |
271 | 271 | \item recap important matrix factorizations |
272 | 272 | \item recap eigenvalues/eigenvectors |
273 | 273 | \item spectral theorem |
274 | | -\item SVD and 4 subspaces within orthonormal bases $\bm{V}$, $\bm{U}$ |
275 | | -\item rank-1 matrix superposition |
| 274 | +\item SVD as a fundamental matrix factorization |
276 | 275 | \end{itemize} |
277 | 276 | \end{frame} |
278 | 277 |
|
@@ -585,7 +584,10 @@ \subsection{Exercise 03} |
585 | 584 | \begin{frame}{Ex03: SVD and the 4 Matrix Subspaces} |
586 | 585 | Objectives |
587 | 586 | \begin{itemize} |
588 | | -\item TBD |
| 587 | +\item matrix spans four subspaces |
| 588 | +\item the SVD explains these 4 subspaces with orthonormal bases $\bm{V}$, $\bm{U}$ |
| 589 | +\item singular values tell us the 'gain' from input to output singular vectors |
| 590 | +\item rank-1 matrix superposition |
589 | 591 | \end{itemize} |
590 | 592 | \end{frame} |
591 | 593 |
|
@@ -1563,7 +1565,9 @@ \subsection{Exercise 04} |
1563 | 1565 | \begin{frame}{Ex04: Solving an Inverse Problem == Finding Model Parameters / Projection Matrices} |
1564 | 1566 | Objectives |
1565 | 1567 | \begin{itemize} |
1566 | | -\item TBD |
| 1568 | +\item solving linear, inverse problem is actually machine learning |
| 1569 | +\item left inverse solves least squares error problem |
| 1570 | +\item projections matrices nicely explain the inverse solution |
1567 | 1571 | \end{itemize} |
1568 | 1572 | \end{frame} |
1569 | 1573 |
|
@@ -2229,7 +2233,11 @@ \subsection{Exercise 05} |
2229 | 2233 | \begin{frame}{Ex05: Condition Number / Regularization} |
2230 | 2234 | Objectives |
2231 | 2235 | \begin{itemize} |
2232 | | -\item TBD |
| 2236 | +\item concept of the condition number in terms of singular values |
| 2237 | +\item impact / problem of matrix with high condition number |
| 2238 | +\item how to handle small singular values |
| 2239 | +\item ridge regression as simple regularization method |
| 2240 | +\item L-curve concept to find optimum regularization amount |
2233 | 2241 | \end{itemize} |
2234 | 2242 | \end{frame} |
2235 | 2243 |
|
@@ -2474,7 +2482,9 @@ \subsection{Exercise 06} |
2474 | 2482 | \begin{frame}{Ex06: Audio Toy Example for Linear Regression and SVD} |
2475 | 2483 | Objectives |
2476 | 2484 | \begin{itemize} |
2477 | | -\item TBD |
| 2485 | +\item audio multitrack data (stems) arranged as data matrix |
| 2486 | +\item the SVD of this matrix allows to listen to the U space, i.e. to the orthogonal audio signals (which is some source separation approach) |
| 2487 | +\item try to find the mixing gains of a mix that is corrupted by noise |
2478 | 2488 | \end{itemize} |
2479 | 2489 | \end{frame} |
2480 | 2490 |
|
@@ -2521,16 +2531,31 @@ \subsection{Exercise 06} |
2521 | 2531 | \section{Section III: Feature Design} |
2522 | 2532 |
|
2523 | 2533 | \subsection{Exercise 07} |
2524 | | -\begin{frame}[t]{Ex07: Audio Features} |
| 2534 | +\begin{frame}{Ex07: Audio Features} |
| 2535 | +Objectives |
| 2536 | +\begin{itemize} |
| 2537 | +\item frequency / time / frequency x time based |
| 2538 | +\item histogram, PDF |
| 2539 | +\item simple technical energy/peak based measures |
| 2540 | +\item loudness: technical vs. perceptual, LUFS concept |
| 2541 | +\item STFT / periodogram |
| 2542 | +\end{itemize} |
| 2543 | + |
2525 | 2544 | no slides so far |
| 2545 | + |
2526 | 2546 | \end{frame} |
2527 | 2547 |
|
| 2548 | + |
| 2549 | + |
| 2550 | + |
2528 | 2551 | \subsection{Exercise 08} |
2529 | 2552 |
|
2530 | 2553 | \begin{frame}{Ex08: Principal Component Analysis (PCA)} |
2531 | 2554 | Objectives |
2532 | 2555 | \begin{itemize} |
2533 | | -\item TBD |
| 2556 | +\item insights to a mean-free data matrix |
| 2557 | +\item low rank approximation / linear dimensionality reduction as pre-processing steps for feature design |
| 2558 | +\item PCA creates orthogonal features which are sorted by its importance (wrt variance) |
2534 | 2559 | \end{itemize} |
2535 | 2560 | \end{frame} |
2536 | 2561 |
|
@@ -2913,13 +2938,33 @@ \subsection{Exercise 08} |
2913 | 2938 | \section{Section IV: Train Models} |
2914 | 2939 |
|
2915 | 2940 | \subsection{Exercise 09} |
2916 | | -\begin{frame}[t]{Ex 09: Bias-Variance Trade Off} |
| 2941 | +\begin{frame}{Ex 09: Bias-Variance Trade Off} |
| 2942 | + |
| 2943 | +Objectives |
| 2944 | +\begin{itemize} |
| 2945 | +\item concept of total variance split into model bias$^2$ + model variance + data noise variance |
| 2946 | +\item underfitting / overfitting as extrem cases for unappropriate model architectures |
| 2947 | +\item example with Fourier series, i.e. polynomial regression |
| 2948 | +\end{itemize} |
| 2949 | + |
2917 | 2950 | no slides so far |
| 2951 | + |
| 2952 | + |
2918 | 2953 | \end{frame} |
2919 | 2954 |
|
2920 | 2955 | \subsection{Exercise 10} |
2921 | | -\begin{frame}[t]{Ex 10: Gradient Descent} |
| 2956 | +\begin{frame}{Ex 10: Gradient Descent} |
| 2957 | + |
| 2958 | +Objectives |
| 2959 | +\begin{itemize} |
| 2960 | +\item least Squares: closed form vs. numerical via gradient descent (i.e. first order approach) |
| 2961 | +\item local vs. global minima, saddle points |
| 2962 | +\item crucial parameter settings for learning rate, number of iterations and init values |
| 2963 | +\item improvements of plain GD: momentum |
| 2964 | +\end{itemize} |
| 2965 | + |
2922 | 2966 | no slides so far |
| 2967 | + |
2923 | 2968 | \end{frame} |
2924 | 2969 |
|
2925 | 2970 |
|
@@ -3080,7 +3125,10 @@ \subsection{Exercise 11} |
3080 | 3125 | \begin{frame}{Ex11: Non-Linear Model Introduction} |
3081 | 3126 | Objectives |
3082 | 3127 | \begin{itemize} |
3083 | | -\item TBD |
| 3128 | +\item XOR is a classification problem, which cannot be handled by linear algebra |
| 3129 | +\item introduce two nonlinearities: add bias, non-linear activation function |
| 3130 | +\item perceptron concept |
| 3131 | +\item general architecture of non-linear models |
3084 | 3132 | \end{itemize} |
3085 | 3133 | \end{frame} |
3086 | 3134 |
|
@@ -3484,7 +3532,8 @@ \subsection{Exercise 12} |
3484 | 3532 | \begin{frame}{Ex12: Binary Classification} |
3485 | 3533 | Objectives |
3486 | 3534 | \begin{itemize} |
3487 | | -\item TBD |
| 3535 | +\item binary classifier as most simple non-linear model |
| 3536 | +\item check ingredients on that model: architecture, output activation function, an appropriate loss function, forward and back propagation, gradient descent |
3488 | 3537 | \end{itemize} |
3489 | 3538 | \end{frame} |
3490 | 3539 |
|
@@ -3918,7 +3967,8 @@ \subsection{Exercise 13} |
3918 | 3967 | \begin{frame}{Ex13: Binary Classification with Hidden Layer Model / Multivariate Chain Rule / Metrics} |
3919 | 3968 | Objectives |
3920 | 3969 | \begin{itemize} |
3921 | | -\item TBD |
| 3970 | +\item binary classification model with hidden layers means more complexity, more back prop effort |
| 3971 | +\item metrics for binary classification: 2x2 confusion matrix |
3922 | 3972 | \end{itemize} |
3923 | 3973 | \end{frame} |
3924 | 3974 |
|
@@ -4290,7 +4340,13 @@ \subsection{Exercise 14} |
4290 | 4340 | \begin{frame}{Ex14: Multi-Class Classification with Softmax Output Layer} |
4291 | 4341 | Objectives |
4292 | 4342 | \begin{itemize} |
4293 | | -\item TBD |
| 4343 | +\item a potential (often used) multi-class classification with hidden layers |
| 4344 | +\item check ingredients on that model: architecture, output activation function, an appropriate loss function, forward and back propagation |
| 4345 | +\item metrics: confusion matrix with different normalizations, accuracy as single number metrics |
| 4346 | +\item final toy example: simple music genre classification with 3 mutually exclusive classes |
| 4347 | +\item data preparation and feature design |
| 4348 | +\item hyper parameter tuning for DNN models |
| 4349 | +\item final training and evaluation |
4294 | 4350 | \end{itemize} |
4295 | 4351 | \end{frame} |
4296 | 4352 |
|
|
0 commit comments