diff --git a/.archive.mk b/.archive.mk
index 4887b87..27ce859 100755
--- a/.archive.mk
+++ b/.archive.mk
@@ -6,16 +6,16 @@
 # Changelog:
 # * Nov 2022: The archive is extracted again, then slides.pdf is removed if a patched slides-sc22.pdf is found (which includes an SC22 slide 0 title slide); and then repackaged
 .PHONY: all
-all: tut102-multi-gpu.tar.gz
+all: tut140-multi-gpu.tar.gz
 
-SOURCES=$(shell gfind . -maxdepth 1 -mindepth 1 -not -path "./.*" -not -name "tut102-multi-gpu.tar.gz" -printf '%P\n' | sort -h)
+SOURCES=$(shell gfind . -maxdepth 1 -mindepth 1 -not -path "./.*" -not -name "tut140-multi-gpu.tar.gz" -printf '%P\n' | sort -h)
 
-tut102-multi-gpu.tar.gz: $(shell find . -not -name "tut102-multi-gpu.tar.gz")
+tut140-multi-gpu.tar.gz: $(shell find . -not -name "tut140-multi-gpu.tar.gz")
 	sed -i '1 i***Please check GitHub repo for latest version of slides: https://github.com/FZJ-JSC/tutorial-multi-gpu/ ***\n' README.md
-	tar czf $@ --transform 's,^,SC22-tut102-Multi-GPU/,' --exclude=".*" $(SOURCES)
+	tar czf $@ --transform 's,^,SC23-tut140-Multi-GPU/,' --exclude=".*" $(SOURCES)
 	tar xf $@
 	rm $@
-	find SC22-tut102-Multi-GPU/ -not -path './.*' -iname 'slides-*.pdf' -execdir rm slides.pdf \;
-	tar czf $@ SC22-tut102-Multi-GPU
-	rm -rf SC22-tut102-Multi-GPU
+	find SC23-tut140-Multi-GPU/ -not -path './.*' -iname 'slides-*.pdf' -execdir rm slides.pdf \;
+	tar czf $@ SC23-tut140-Multi-GPU
+	rm -rf SC23-tut140-Multi-GPU
 	sed -i '1,2d' README.md
\ No newline at end of file
diff --git a/.etc/.set-facl-permissions.sh b/.etc/.set-facl-permissions.sh
index 33b48bc..b316957 100644
--- a/.etc/.set-facl-permissions.sh
+++ b/.etc/.set-facl-permissions.sh
@@ -3,8 +3,8 @@
 set -x
 
 for user in haghighimood1 kraus1 hrywniak1 oden1 garciadegonzalo1 badwaik1 john2; do
-	setfacl -m u:$user:rwx -R $PROJECT_training2313/common/
-	setfacl -m u:$user:rwx -R $PROJECT_training2313/env.sh
+	setfacl -m u:$user:rwx -R $PROJECT_training2332/common/
+	setfacl -m u:$user:rwx -R $PROJECT_training2332/env.sh
 done
 
 set +x
\ No newline at end of file
diff --git a/.etc/deploy-material.sh b/.etc/deploy-material.sh
index 81cf58d..5c3ef1e 100755
--- a/.etc/deploy-material.sh
+++ b/.etc/deploy-material.sh
@@ -1 +1 @@
-rsync --archive --exclude="*minified.pdf" --exclude="tut*" --exclude=".*" --verbose ../ judac:/p/project/training2313/common/material/
+rsync --archive --exclude="*minified.pdf" --exclude="tut*" --exclude=".*" --exclude="*-sc*.pdf" --verbose ../ judac:/p/project/training2332/common/material/
diff --git a/.etc/deploy.sh b/.etc/deploy.sh
index 363675f..49dcb68 100755
--- a/.etc/deploy.sh
+++ b/.etc/deploy.sh
@@ -1 +1 @@
-rsync --archive --exclude="deploy.sh" --verbose . judac:/p/project/training2313/common/environment/
+rsync --archive --exclude="deploy.sh" --exclude="raw/" --exclude="sc23-titleslides/" --verbose . judac:/p/project/training2332/common/environment/
diff --git a/.etc/instructions-header.md b/.etc/instructions-header.md
index 4267766..b73da92 100644
--- a/.etc/instructions-header.md
+++ b/.etc/instructions-header.md
@@ -1,7 +1,6 @@
-# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale
+# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale
 
--   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST
--   Location: Hall Y8, Congress Center Hamburg, Germany
+-   Time: Monday, 13 November 2023 8:30 - 17:00 MST
+-   Location: 405, Denver Congress Centre, USA
 -   Program Link:
-    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==
-
+    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242
diff --git a/.etc/jsccourse-bashrc.sh b/.etc/jsccourse-bashrc.sh
index 57ae415..be3571c 100644
--- a/.etc/jsccourse-bashrc.sh
+++ b/.etc/jsccourse-bashrc.sh
@@ -11,10 +11,10 @@
 # Andreas Herten, >2017
 ################################################
 if [ -z "$_JSCCOURSE_ENV_SOURCED" ]; then
-	project="training2313"
+	project="training2332"
 
 	export JSCCOURSE_DIR_GROUP=/p/project/$project
-	export JSCCOURSE_DIR_LOCAL=${JSCCOURSE_DIR_LOCAL_BASE:-$HOME}/ISC23-Multi-GPU-Tutorial
+	export JSCCOURSE_DIR_LOCAL=${JSCCOURSE_DIR_LOCAL_BASE:-$HOME}/SC23-Multi-GPU-Tutorial
 
 	export _JSCCOURSE_ENV_SOURCED="$(date)"
 	export C_V_D="0,1,2,3"
@@ -23,8 +23,8 @@ if [ -z "$_JSCCOURSE_ENV_SOURCED" ]; then
 
 	res=""
 	currentday=$(date +%d)
-	if [[ "$currentday" == "21" ]]; then
-		res="--reservation isc23-tutorial-2023-05-21"
+	if [[ "$currentday" == "13" ]]; then
+		res="--reservation sc-tutorial"
 	fi
 	
 	export SLURM_NTASKS=1
@@ -56,9 +56,10 @@ if [ -z "$_JSCCOURSE_ENV_SOURCED" ]; then
 			;;
 	esac
 
-	export JSC_BATCH_CONFIG="$res --partition ${partition} --cpu-bind=sockets --gres=gpu:$ngpus $JSC_SUBMIT_CMD_SYSTEM_SPECIFIC_OPTIONS --time 0:10:00"
+	export JSC_BATCH_CONFIG="$res --partition ${partition} --disable-dcgm --gres=gpu:$ngpus $JSC_SUBMIT_CMD_SYSTEM_SPECIFIC_OPTIONS --time 0:10:00"
 	export JSC_ALLOC_CMD="salloc $JSC_BATCH_CONFIG" 
-	export JSC_SUBMIT_CMD="srun $JSC_BATCH_CONFIG --pty"
+	# export JSC_SUBMIT_CMD="srun $JSC_BATCH_CONFIG --pty"
+	export JSC_SUBMIT_CMD="salloc $JSC_BATCH_CONFIG srun --cpu-bind=sockets --pty"
 	
 	export _JSC_MATERIAL_SYNC="rsync --archive --update --exclude='.*' --exclude='.*/' $JSCCOURSE_DIR_GROUP/common/material/ $JSCCOURSE_DIR_LOCAL"
 	export _JSC_MATERIAL_SYNC_FORCE="rsync --archive --exclude='.*' --exclude='.*/' $JSCCOURSE_DIR_GROUP/common/material/ $JSCCOURSE_DIR_LOCAL"
@@ -108,7 +109,7 @@ if [[ $- =~ "i" ]]; then
 
 	echo ""
 	echo "*******************************************************************************"
-	echo "       Welcome to the ISC23 Tutorial on Multi-GPU Computing for Exascale!       "
+	echo "       Welcome to the SC23 Tutorial on Multi-GPU Computing for Exascale!       "
 	# echo " A default call to get a batch system allocation is stored in \$JSC_ALLOC_CMD!"
 	# echo " Use it with \`eval \$JSC_ALLOC_CMD\`. The value of \$JSC_ALLOC_CMD is:"
 	# echo -n "  "
diff --git a/.etc/modules.sh b/.etc/modules.sh
index d17a67a..12f3e35 100644
--- a/.etc/modules.sh
+++ b/.etc/modules.sh
@@ -2,9 +2,9 @@ module purge
 module load Stages/2023
 module load GCC/11.3.0
 module load CUDA/11.7
-module load ParaStationMPI/5.7.0-1
+module load ParaStationMPI/5.7.1-1
 module load NCCL/default-CUDA-11.7
-module load NVSHMEM/2.5.0
+module load NVSHMEM/2.8.0-CUDA-11.7
 module load Nsight-Systems/2023.2.1
 module load MPI-settings/CUDA
 # module use $JSCCOURSE_DIR_GROUP/common/modulefiles
\ No newline at end of file
diff --git a/.etc/printout.tex b/.etc/printout.tex
index f4eb68a..5c5d62c 100644
--- a/.etc/printout.tex
+++ b/.etc/printout.tex
@@ -52,12 +52,12 @@
 
 	\vspace*{1\baselineskip}
 	\begin{enumerate}
-		\item Create \highlight{\emph{JuDoor}} account, \highlight{join} \texttt{training2313} project: \href{https://go.fzj.de/mg-jd}{\texttt{go.fzj.de/mg-jd}}
+		\item Create \highlight{\emph{JuDoor}} account, \highlight{join} \texttt{training2332} project: \href{https://go.fzj.de/mg-jd}{\texttt{go.fzj.de/mg-jd}}
 		\item Fill \highlight{usage agreement}; wait at least 15 min for synchronization
 		\item Login to \highlight{Jupyter}: \href{https://jupyter-jsc.fz-juelich.de}{\texttt{jupyter-jsc.fz-juelich.de}} (JuDoor credentials)
 		\item Create new Jupyter \highlight{instance} on \texttt{LoginNode\textbf{Booster}} with training project
 		\item Start Jupyter Terminal, \highlight{source} course environment\\
-		\verb|source $PROJECT_training2313/env.sh|
+		\verb|source $PROJECT_training2332/env.sh|
 		\item \highlight{Sync} course material: \verb|jsc-material-sync|
 	\end{enumerate}
 \end{document}
\ No newline at end of file
diff --git a/.etc/sc22-titleslides/prelude_slides.mk b/.etc/sc22-titleslides/prelude_slides.mk
deleted file mode 100755
index 4554893..0000000
--- a/.etc/sc22-titleslides/prelude_slides.mk
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/make -f
-# LAUNCH FROM THIS FOLDER
-
-OUTPUT=../../01-L_Introduction_Overview/slides-sc22.pdf ../../01b-H_Onboarding/slides-sc22.pdf ../../04-L_Performance_and_debugging_tools/slides-sc22.pdf ../../11-L_Summary_Advanced/slides-sc22.pdf
-# OUTPUT=../../01-L_Introduction_Overview/slides-sc22.pdf ../../01b-H_Onboarding/slides-sc22.pdf ../../04-L_Performance_and_debugging_tools/slides-sc22.pdf ../../05-L_Optimization_techniques_for_multi-GPU_applications/slides-sc22.pdf ../../09-L_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/slides-sc22.pdf ../../11-L_Summary_Advanced/slides-sc22.pdf
-
-.PHONY: all
-all: $(OUTPUT)
-MYTMPDIR:=$(shell mktemp -d)
-
-title-slide.in.tex: title-slide.tex
-	cat $< | \
-	sed 's#INSERT TITLE HERE#((( title )))#' | \
-	sed 's#Insert Author Here#((( author )))#' > \
-	$@
-
-title-slide.01.tex ../../01-L_Introduction_Overview/slides-sc22.pdf: SESSIONKEY=01
-title-slide.01b.tex ../../01b-H_Onboarding/slides-sc22.pdf: SESSIONKEY=01b
-title-slide.04.tex ../../04-L_Performance_and_debugging_tools/slides-sc22.pdf: SESSIONKEY=04
-title-slide.05.tex ../../05-L_Optimization_techniques_for_multi-GPU_applications/slides-sc22.pdf: SESSIONKEY=05
-title-slide.09.tex ../../09-L_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/slides-sc22.pdf: SESSIONKEY=09
-title-slide.11.tex ../../11-L_Summary_Advanced/slides-sc22.pdf: SESSIONKEY=11
-title-slide.01.tex title-slide.01b.tex title-slide.04.tex title-slide.05.tex title-slide.09.tex title-slide.11.tex: title-slide.tex
-	python3 gen-titleslide.py --author "$(shell cat sessions.yml | yq .$(SESSIONKEY).author)" --title "$(shell cat sessions.yml | yq .$(SESSIONKEY).title)" --out "$@"
-
-../../01-L_Introduction_Overview/slides-sc22.pdf: BASEDECK=../../01-L_Introduction_Overview/slides.pdf
-../../01b-H_Onboarding/slides-sc22.pdf: BASEDECK=../../01b-H_Onboarding/slides.pdf
-../../04-L_Performance_and_debugging_tools/slides-sc22.pdf: BASEDECK=../../04-L_Performance_and_debugging_tools/slides.pdf
-../../05-L_Optimization_techniques_for_multi-GPU_applications/slides-sc22.pdf: BASEDECK=../../05-L_Optimization_techniques_for_multi-GPU_applications/slides.pdf
-../../09-L_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/slides-sc22.pdf: BASEDECK=../../09-L_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/slides.pdf
-../../11-L_Summary_Advanced/slides-sc22.pdf: BASEDECK=../../11-L_Summary_Advanced/slides.pdf
-
-.SECONDEXPANSION:
-%-sc22.pdf: %.pdf  title-slide.$$(SESSIONKEY).tex $(BASEDECK)
-	latexmk -output-directory=$(MYTMPDIR) -jobname=${SESSIONKEY} -pdfxe title-slide.$(SESSIONKEY).tex
-	cp $(MYTMPDIR)/${SESSIONKEY}.pdf title-slide.$(SESSIONKEY).pdf
-	mutool merge -o $@ title-slide.$(SESSIONKEY).pdf 0 $(BASEDECK)
\ No newline at end of file
diff --git a/.etc/sc22-titleslides/sc-background.png b/.etc/sc22-titleslides/sc-background.png
deleted file mode 100644
index 5d331f3..0000000
Binary files a/.etc/sc22-titleslides/sc-background.png and /dev/null differ
diff --git a/.etc/sc22-titleslides/.gitignore b/.etc/sc23-titleslides/.gitignore
similarity index 100%
rename from .etc/sc22-titleslides/.gitignore
rename to .etc/sc23-titleslides/.gitignore
diff --git a/.etc/sc22-titleslides/README.md b/.etc/sc23-titleslides/README.md
similarity index 100%
rename from .etc/sc22-titleslides/README.md
rename to .etc/sc23-titleslides/README.md
diff --git a/.etc/sc22-titleslides/gen-titleslide.py b/.etc/sc23-titleslides/gen-titleslide.py
similarity index 100%
rename from .etc/sc22-titleslides/gen-titleslide.py
rename to .etc/sc23-titleslides/gen-titleslide.py
diff --git a/.etc/sc23-titleslides/prelude_slides.mk b/.etc/sc23-titleslides/prelude_slides.mk
new file mode 100755
index 0000000..2d0d3a6
--- /dev/null
+++ b/.etc/sc23-titleslides/prelude_slides.mk
@@ -0,0 +1,44 @@
+#!/usr/bin/make -f
+# LAUNCH FROM THIS FOLDER
+
+# OUTPUT=../../01-L_Introduction_Overview/slides-sc23.pdf ../../01b-H_Onboarding/slides-sc23.pdf ../../04-L_Performance_and_debugging_tools/slides-sc23.pdf ../../11-L_Summary_Advanced/slides-sc23.pdf
+OUTPUT=../../01-L_Introduction_Overview/slides-sc23.pdf ../../01b-H_Onboarding/slides-sc23.pdf ../../02-L_Introduction_to_MPI-Distributed_Computing_with_GPUs/slides-sc23.pdf ../../04-L_Performance_and_debugging_tools/slides-sc23.pdf ../../05-L_Optimization_techniques_for_multi-GPU_applications/slides-sc23.pdf ../../07-L_Overview_of_NCCL_and_NVSHMEM_in_MPI_Programs/slides-sc23.pdf ../../09-L_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/slides-sc23.pdf ../../11-L_Summary_Advanced/slides-sc23.pdf
+
+.PHONY: all
+all: $(OUTPUT)
+MYTMPDIR:=$(shell mktemp -d)
+
+title-slide.in.tex: title-slide.tex
+	cat $< | \
+	sed 's#INSERT TITLE HERE#((( title )))#' | \
+	sed 's#Insert Author Here#((( author )))#' > \
+	$@
+
+title-slide.01.tex ../../01-L_Introduction_Overview/slides-sc23.pdf: SESSIONKEY=01
+title-slide.01b.tex ../../01b-H_Onboarding/slides-sc23.pdf: SESSIONKEY=01b
+title-slide.02.tex ../../02-L_Introduction_to_MPI-Distributed_Computing_with_GPUs/slides-sc23.pdf: SESSIONKEY=02
+title-slide.04.tex ../../04-L_Performance_and_debugging_tools/slides-sc23.pdf: SESSIONKEY=04
+title-slide.05.tex ../../05-L_Optimization_techniques_for_multi-GPU_applications/slides-sc23.pdf: SESSIONKEY=05
+title-slide.07.tex ../../07-L_Overview_of_NCCL_and_NVSHMEM_in_MPI_Programs/slides-sc23.pdf: SESSIONKEY=07
+title-slide.09.tex ../../09-L_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/slides-sc23.pdf: SESSIONKEY=09
+title-slide.11.tex ../../11-L_Summary_Advanced/slides-sc23.pdf: SESSIONKEY=11
+title-slide.01.tex title-slide.01b.tex title-slide.02.tex title-slide.04.tex title-slide.05.tex title-slide.07.tex title-slide.09.tex title-slide.11.tex: title-slide.tex
+	python3 gen-titleslide.py --author "$(shell cat sessions.yml | yq .$(SESSIONKEY).author)" --title "$(shell cat sessions.yml | yq .$(SESSIONKEY).title)" --out "$@"
+
+../../01-L_Introduction_Overview/slides-sc23.pdf: BASEDECK=../../01-L_Introduction_Overview/slides.pdf
+../../01b-H_Onboarding/slides-sc23.pdf: BASEDECK=../../01b-H_Onboarding/slides.pdf
+../../02-L_Introduction_to_MPI-Distributed_Computing_with_GPUs/slides-sc23.pdf: BASEDECK=../../02-L_Introduction_to_MPI-Distributed_Computing_with_GPUs/slides.pdf
+../../04-L_Performance_and_debugging_tools/slides-sc23.pdf: BASEDECK=../../04-L_Performance_and_debugging_tools/slides.pdf
+../../05-L_Optimization_techniques_for_multi-GPU_applications/slides-sc23.pdf: BASEDECK=../../05-L_Optimization_techniques_for_multi-GPU_applications/slides.pdf
+../../07-L_Overview_of_NCCL_and_NVSHMEM_in_MPI_Programs/slides-sc23.pdf: BASEDECK=../../07-L_Overview_of_NCCL_and_NVSHMEM_in_MPI_Programs/slides.pdf
+../../09-L_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/slides-sc23.pdf: BASEDECK=../../09-L_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/slides.pdf
+../../11-L_Summary_Advanced/slides-sc23.pdf: BASEDECK=../../11-L_Summary_Advanced/slides.pdf
+
+.SECONDEXPANSION:
+%-sc23.pdf: %.pdf  title-slide.$$(SESSIONKEY).tex $(BASEDECK)
+	latexmk -output-directory=$(MYTMPDIR) -jobname=${SESSIONKEY} -pdfxe title-slide.$(SESSIONKEY).tex
+	cp $(MYTMPDIR)/${SESSIONKEY}.pdf title-slide.$(SESSIONKEY).pdf
+	papersize=$(shell pdfinfo title-slide.$(SESSIONKEY).pdf | awk '/^Page size:/ {printf "{%fbp,%fbp}", $$3, $$5}') && \
+		pdfjam --outfile tmp.pdf --papersize "$$papersize" $(BASEDECK)
+	mutool merge -o $@ title-slide.$(SESSIONKEY).pdf 0 tmp.pdf
+	rm tmp.pdf
\ No newline at end of file
diff --git a/.etc/sc23-titleslides/sc-background.png b/.etc/sc23-titleslides/sc-background.png
new file mode 100644
index 0000000..d5c8d61
Binary files /dev/null and b/.etc/sc23-titleslides/sc-background.png differ
diff --git a/.etc/sc22-titleslides/sessions.yml b/.etc/sc23-titleslides/sessions.yml
similarity index 74%
rename from .etc/sc22-titleslides/sessions.yml
rename to .etc/sc23-titleslides/sessions.yml
index d70d41d..e36192e 100644
--- a/.etc/sc22-titleslides/sessions.yml
+++ b/.etc/sc23-titleslides/sessions.yml
@@ -4,11 +4,17 @@
 01b:
   title: 'Onboarding JUWELS Booster'
   author: 'Andreas Herten, JSC'
+02:
+  title: 'Introduction to MPI-Distributed Computing with GPUs'
+  author: 'Simon Garcia, SNL'
 04:
   title: 'Peformance and Debugging Tools'
   author: 'Markus Hrywniak, NVIDIA'
 05:
   title: 'Optimization Techniques for Multi-GPU Applications'
+  author: 'Simon Garcia, SNL'
+07:
+  title: 'NCCL and Host-Initiated NVSHMEM'
   author: 'Jiri Kraus, NVIDIA'
 09:
   title: 'CUDA Graphs and Device-initiated Communication with NVSHMEM'
diff --git a/.etc/sc23-titleslides/title-slide.pdf b/.etc/sc23-titleslides/title-slide.pdf
new file mode 100644
index 0000000..abc39f9
Binary files /dev/null and b/.etc/sc23-titleslides/title-slide.pdf differ
diff --git a/.etc/sc22-titleslides/title-slide.tex b/.etc/sc23-titleslides/title-slide.tex
similarity index 55%
rename from .etc/sc22-titleslides/title-slide.tex
rename to .etc/sc23-titleslides/title-slide.tex
index 552391a..23bb847 100644
--- a/.etc/sc22-titleslides/title-slide.tex
+++ b/.etc/sc23-titleslides/title-slide.tex
@@ -8,14 +8,16 @@
 \setbeamertemplate{navigation symbols}{}
 \setbeamertemplate{background}{\includegraphics[width=\paperwidth,height=\paperheight]{sc-background.png}}
 
-\setbeamercolor{normal text}{fg=white}
+\setbeamercolor{normal text}{fg=black}
 \begin{document}
 
 \newcommand{\mytitle}{INSERT TITLE HERE}
 \newcommand{\myauthor}{Insert Author Here}
 \begin{frame}[plain, t]
-\vspace*{0.5\paperheight}\LARGE
-\textbf{\mytitle}\\\large
-\myauthor
+	\begin{tikzpicture}[overlay, remember picture, align=left, text width=0.85\paperwidth]
+		\coordinate (main anchor) at ([yshift=0.4\paperheight,xshift=0.06\paperwidth]current page.south west);
+		\node (title) at (main anchor) [anchor=south west, font=\LARGE\bfseries] {\mytitle};
+		\node (subtitle) at ([yshift=-0.05\paperheight]main anchor) [anchor=north west, font=\large, text=white] {\myauthor};
+	\end{tikzpicture}
 \end{frame}
 \end{document}
diff --git a/.gitignore b/.gitignore
index 3afa2cf..82bf06c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
-tut102-multi-gpu.tar.gz
-*-sc22.pdftut*
+tut140-multi-gpu.tar.gz
+*-sc23.pdf
+tut*
diff --git a/01-L_Introduction_Overview/slides.pdf b/01-L_Introduction_Overview/slides.pdf
index 653c54a..351f0f9 100644
Binary files a/01-L_Introduction_Overview/slides.pdf and b/01-L_Introduction_Overview/slides.pdf differ
diff --git a/01b-H_Onboarding/onboarding.pdf b/01b-H_Onboarding/onboarding.pdf
new file mode 100644
index 0000000..3fb3382
Binary files /dev/null and b/01b-H_Onboarding/onboarding.pdf differ
diff --git a/01b-H_Onboarding/slides.pdf b/01b-H_Onboarding/slides.pdf
index 7d7706e..bdb4d56 100644
Binary files a/01b-H_Onboarding/slides.pdf and b/01b-H_Onboarding/slides.pdf differ
diff --git a/02-L_Introduction_to_MPI-Distributed_Computing_with_GPUs/slides.pdf b/02-L_Introduction_to_MPI-Distributed_Computing_with_GPUs/slides.pdf
index dec1995..e0914bc 100644
Binary files a/02-L_Introduction_to_MPI-Distributed_Computing_with_GPUs/slides.pdf and b/02-L_Introduction_to_MPI-Distributed_Computing_with_GPUs/slides.pdf differ
diff --git a/03-H_Multi_GPU_Parallelization/.master/Instructions.ipynb b/03-H_Multi_GPU_Parallelization/.master/Instructions.ipynb
index 46b261f..29efb90 100644
--- a/03-H_Multi_GPU_Parallelization/.master/Instructions.ipynb
+++ b/03-H_Multi_GPU_Parallelization/.master/Instructions.ipynb
@@ -4,14 +4,13 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
+        "# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
         "\n",
-        "-   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST\n",
-        "-   Location: Hall Y8, Congress Center Hamburg, Germany\n",
+        "-   Time: Monday, 13 November 2023 8:30 - 17:00 MST\n",
+        "-   Location: 405, Denver Congress Centre, USA\n",
         "-   Program Link:\n",
-        "    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==\n",
-        "\n",
-        "## Hands-On 3: Multi-GPU Parallelization with CUDA-aware MPI\n",
+        "    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242\n",
+        "    \\## Hands-On 3: Multi-GPU Parallelization with CUDA-aware MPI\n",
         "\n",
         "### Task: Parallelize Jacobi Solver for Multiple GPUs using CUDA-aware MPI\n",
         "\n",
@@ -71,7 +70,7 @@
         "    size + 1 rows\n",
         "-   Adapt the computation of (`iy_start_global`)"
       ],
-      "id": "c9546c58-9fec-4bb4-9d7f-1eaa1c686c7b"
+      "id": "b99f8bbd-dc74-4837-bc8a-aa5ebd315fe7"
     }
   ],
   "nbformat": 4,
diff --git a/03-H_Multi_GPU_Parallelization/.master/Instructions.md b/03-H_Multi_GPU_Parallelization/.master/Instructions.md
index c8992d5..1095e4f 100644
--- a/03-H_Multi_GPU_Parallelization/.master/Instructions.md
+++ b/03-H_Multi_GPU_Parallelization/.master/Instructions.md
@@ -1,10 +1,9 @@
-# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale
+# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale
 
--   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST
--   Location: Hall Y8, Congress Center Hamburg, Germany
+-   Time: Monday, 13 November 2023 8:30 - 17:00 MST
+-   Location: 405, Denver Congress Centre, USA
 -   Program Link:
-    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==
-
+    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242
 ## Hands-On 3: Multi-GPU Parallelization with CUDA-aware MPI
 
 ### Task: Parallelize Jacobi Solver for Multiple GPUs using CUDA-aware MPI
diff --git a/03-H_Multi_GPU_Parallelization/solutions/Instructions.ipynb b/03-H_Multi_GPU_Parallelization/solutions/Instructions.ipynb
index 46b261f..29efb90 100644
--- a/03-H_Multi_GPU_Parallelization/solutions/Instructions.ipynb
+++ b/03-H_Multi_GPU_Parallelization/solutions/Instructions.ipynb
@@ -4,14 +4,13 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
+        "# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
         "\n",
-        "-   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST\n",
-        "-   Location: Hall Y8, Congress Center Hamburg, Germany\n",
+        "-   Time: Monday, 13 November 2023 8:30 - 17:00 MST\n",
+        "-   Location: 405, Denver Congress Centre, USA\n",
         "-   Program Link:\n",
-        "    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==\n",
-        "\n",
-        "## Hands-On 3: Multi-GPU Parallelization with CUDA-aware MPI\n",
+        "    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242\n",
+        "    \\## Hands-On 3: Multi-GPU Parallelization with CUDA-aware MPI\n",
         "\n",
         "### Task: Parallelize Jacobi Solver for Multiple GPUs using CUDA-aware MPI\n",
         "\n",
@@ -71,7 +70,7 @@
         "    size + 1 rows\n",
         "-   Adapt the computation of (`iy_start_global`)"
       ],
-      "id": "c9546c58-9fec-4bb4-9d7f-1eaa1c686c7b"
+      "id": "b99f8bbd-dc74-4837-bc8a-aa5ebd315fe7"
     }
   ],
   "nbformat": 4,
diff --git a/03-H_Multi_GPU_Parallelization/solutions/Instructions.md b/03-H_Multi_GPU_Parallelization/solutions/Instructions.md
index c8992d5..1095e4f 100644
--- a/03-H_Multi_GPU_Parallelization/solutions/Instructions.md
+++ b/03-H_Multi_GPU_Parallelization/solutions/Instructions.md
@@ -1,10 +1,9 @@
-# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale
+# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale
 
--   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST
--   Location: Hall Y8, Congress Center Hamburg, Germany
+-   Time: Monday, 13 November 2023 8:30 - 17:00 MST
+-   Location: 405, Denver Congress Centre, USA
 -   Program Link:
-    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==
-
+    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242
 ## Hands-On 3: Multi-GPU Parallelization with CUDA-aware MPI
 
 ### Task: Parallelize Jacobi Solver for Multiple GPUs using CUDA-aware MPI
diff --git a/03-H_Multi_GPU_Parallelization/solutions/advanced/Instructions.ipynb b/03-H_Multi_GPU_Parallelization/solutions/advanced/Instructions.ipynb
index 46b261f..29efb90 100644
--- a/03-H_Multi_GPU_Parallelization/solutions/advanced/Instructions.ipynb
+++ b/03-H_Multi_GPU_Parallelization/solutions/advanced/Instructions.ipynb
@@ -4,14 +4,13 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
+        "# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
         "\n",
-        "-   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST\n",
-        "-   Location: Hall Y8, Congress Center Hamburg, Germany\n",
+        "-   Time: Monday, 13 November 2023 8:30 - 17:00 MST\n",
+        "-   Location: 405, Denver Congress Centre, USA\n",
         "-   Program Link:\n",
-        "    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==\n",
-        "\n",
-        "## Hands-On 3: Multi-GPU Parallelization with CUDA-aware MPI\n",
+        "    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242\n",
+        "    \\## Hands-On 3: Multi-GPU Parallelization with CUDA-aware MPI\n",
         "\n",
         "### Task: Parallelize Jacobi Solver for Multiple GPUs using CUDA-aware MPI\n",
         "\n",
@@ -71,7 +70,7 @@
         "    size + 1 rows\n",
         "-   Adapt the computation of (`iy_start_global`)"
       ],
-      "id": "c9546c58-9fec-4bb4-9d7f-1eaa1c686c7b"
+      "id": "b99f8bbd-dc74-4837-bc8a-aa5ebd315fe7"
     }
   ],
   "nbformat": 4,
diff --git a/03-H_Multi_GPU_Parallelization/solutions/advanced/Instructions.md b/03-H_Multi_GPU_Parallelization/solutions/advanced/Instructions.md
index c8992d5..1095e4f 100644
--- a/03-H_Multi_GPU_Parallelization/solutions/advanced/Instructions.md
+++ b/03-H_Multi_GPU_Parallelization/solutions/advanced/Instructions.md
@@ -1,10 +1,9 @@
-# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale
+# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale
 
--   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST
--   Location: Hall Y8, Congress Center Hamburg, Germany
+-   Time: Monday, 13 November 2023 8:30 - 17:00 MST
+-   Location: 405, Denver Congress Centre, USA
 -   Program Link:
-    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==
-
+    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242
 ## Hands-On 3: Multi-GPU Parallelization with CUDA-aware MPI
 
 ### Task: Parallelize Jacobi Solver for Multiple GPUs using CUDA-aware MPI
diff --git a/03-H_Multi_GPU_Parallelization/tasks/Instructions.ipynb b/03-H_Multi_GPU_Parallelization/tasks/Instructions.ipynb
index 46b261f..29efb90 100644
--- a/03-H_Multi_GPU_Parallelization/tasks/Instructions.ipynb
+++ b/03-H_Multi_GPU_Parallelization/tasks/Instructions.ipynb
@@ -4,14 +4,13 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
+        "# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
         "\n",
-        "-   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST\n",
-        "-   Location: Hall Y8, Congress Center Hamburg, Germany\n",
+        "-   Time: Monday, 13 November 2023 8:30 - 17:00 MST\n",
+        "-   Location: 405, Denver Congress Centre, USA\n",
         "-   Program Link:\n",
-        "    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==\n",
-        "\n",
-        "## Hands-On 3: Multi-GPU Parallelization with CUDA-aware MPI\n",
+        "    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242\n",
+        "    \\## Hands-On 3: Multi-GPU Parallelization with CUDA-aware MPI\n",
         "\n",
         "### Task: Parallelize Jacobi Solver for Multiple GPUs using CUDA-aware MPI\n",
         "\n",
@@ -71,7 +70,7 @@
         "    size + 1 rows\n",
         "-   Adapt the computation of (`iy_start_global`)"
       ],
-      "id": "c9546c58-9fec-4bb4-9d7f-1eaa1c686c7b"
+      "id": "b99f8bbd-dc74-4837-bc8a-aa5ebd315fe7"
     }
   ],
   "nbformat": 4,
diff --git a/03-H_Multi_GPU_Parallelization/tasks/Instructions.md b/03-H_Multi_GPU_Parallelization/tasks/Instructions.md
index c8992d5..1095e4f 100644
--- a/03-H_Multi_GPU_Parallelization/tasks/Instructions.md
+++ b/03-H_Multi_GPU_Parallelization/tasks/Instructions.md
@@ -1,10 +1,9 @@
-# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale
+# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale
 
--   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST
--   Location: Hall Y8, Congress Center Hamburg, Germany
+-   Time: Monday, 13 November 2023 8:30 - 17:00 MST
+-   Location: 405, Denver Congress Centre, USA
 -   Program Link:
-    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==
-
+    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242
 ## Hands-On 3: Multi-GPU Parallelization with CUDA-aware MPI
 
 ### Task: Parallelize Jacobi Solver for Multiple GPUs using CUDA-aware MPI
diff --git a/04-L_Performance_and_debugging_tools/slides.pdf b/04-L_Performance_and_debugging_tools/slides.pdf
index a854d41..155d6c9 100644
Binary files a/04-L_Performance_and_debugging_tools/slides.pdf and b/04-L_Performance_and_debugging_tools/slides.pdf differ
diff --git a/05-L_Optimization_techniques_for_multi-GPU_applications/slides.pdf b/05-L_Optimization_techniques_for_multi-GPU_applications/slides.pdf
index db220db..a235051 100644
Binary files a/05-L_Optimization_techniques_for_multi-GPU_applications/slides.pdf and b/05-L_Optimization_techniques_for_multi-GPU_applications/slides.pdf differ
diff --git a/06-H_Overlap_Communication_and_Computation_MPI/.master/Instructions.ipynb b/06-H_Overlap_Communication_and_Computation_MPI/.master/Instructions.ipynb
index 5f851fd..f8ef908 100644
--- a/06-H_Overlap_Communication_and_Computation_MPI/.master/Instructions.ipynb
+++ b/06-H_Overlap_Communication_and_Computation_MPI/.master/Instructions.ipynb
@@ -4,14 +4,13 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
+        "# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
         "\n",
-        "-   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST\n",
-        "-   Location: Hall Y8, Congress Center Hamburg, Germany\n",
+        "-   Time: Monday, 13 November 2023 8:30 - 17:00 MST\n",
+        "-   Location: 405, Denver Congress Centre, USA\n",
         "-   Program Link:\n",
-        "    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==\n",
-        "\n",
-        "## Hands-On 6: Overlap Communication and Computation with MPI\n",
+        "    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242\n",
+        "    \\## Hands-On 6: Overlap Communication and Computation with MPI\n",
         "\n",
         "You are now going to apply the concepts you learned in the lectures 4\n",
         "and 5: Using profiling tools, and applying them to implement overlapping\n",
@@ -85,7 +84,7 @@
         "-   Destroy the additional cuda streams and events before ending the\n",
         "    application"
       ],
-      "id": "634784f1-27fa-4390-9be3-6e9c1b18e102"
+      "id": "02d50eab-6f3d-4b5d-8743-4032717ba48f"
     }
   ],
   "nbformat": 4,
diff --git a/06-H_Overlap_Communication_and_Computation_MPI/.master/Instructions.md b/06-H_Overlap_Communication_and_Computation_MPI/.master/Instructions.md
index 12083b0..42ee407 100644
--- a/06-H_Overlap_Communication_and_Computation_MPI/.master/Instructions.md
+++ b/06-H_Overlap_Communication_and_Computation_MPI/.master/Instructions.md
@@ -1,10 +1,9 @@
-# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale
+# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale
 
--   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST
--   Location: Hall Y8, Congress Center Hamburg, Germany
+-   Time: Monday, 13 November 2023 8:30 - 17:00 MST
+-   Location: 405, Denver Congress Centre, USA
 -   Program Link:
-    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==
-
+    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242
 ## Hands-On 6: Overlap Communication and Computation with MPI
 
 You are now going to apply the concepts you learned in the lectures 4 and 5: Using profiling tools,
diff --git a/06-H_Overlap_Communication_and_Computation_MPI/.master/copy.mk b/06-H_Overlap_Communication_and_Computation_MPI/.master/copy.mk
index 8a96f59..b621217 100755
--- a/06-H_Overlap_Communication_and_Computation_MPI/.master/copy.mk
+++ b/06-H_Overlap_Communication_and_Computation_MPI/.master/copy.mk
@@ -6,7 +6,7 @@ SOLUTIONDIR = ../solutions/
 IYPNB_TEMPLATE = ../../.template.json
 
 PROCESSFILES = jacobi.cpp
-COPYFILES = Makefile Instructions.ipynb jacobi_kernels.cu
+COPYFILES = Makefile Instructions.ipynb Instructions.md jacobi_kernels.cu
 
 
 TASKPROCCESFILES = $(addprefix $(TASKDIR)/,$(PROCESSFILES))
diff --git a/06-H_Overlap_Communication_and_Computation_MPI/solutions/Instructions.ipynb b/06-H_Overlap_Communication_and_Computation_MPI/solutions/Instructions.ipynb
index 5f851fd..f8ef908 100644
--- a/06-H_Overlap_Communication_and_Computation_MPI/solutions/Instructions.ipynb
+++ b/06-H_Overlap_Communication_and_Computation_MPI/solutions/Instructions.ipynb
@@ -4,14 +4,13 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
+        "# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
         "\n",
-        "-   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST\n",
-        "-   Location: Hall Y8, Congress Center Hamburg, Germany\n",
+        "-   Time: Monday, 13 November 2023 8:30 - 17:00 MST\n",
+        "-   Location: 405, Denver Congress Centre, USA\n",
         "-   Program Link:\n",
-        "    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==\n",
-        "\n",
-        "## Hands-On 6: Overlap Communication and Computation with MPI\n",
+        "    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242\n",
+        "    \\## Hands-On 6: Overlap Communication and Computation with MPI\n",
         "\n",
         "You are now going to apply the concepts you learned in the lectures 4\n",
         "and 5: Using profiling tools, and applying them to implement overlapping\n",
@@ -85,7 +84,7 @@
         "-   Destroy the additional cuda streams and events before ending the\n",
         "    application"
       ],
-      "id": "634784f1-27fa-4390-9be3-6e9c1b18e102"
+      "id": "02d50eab-6f3d-4b5d-8743-4032717ba48f"
     }
   ],
   "nbformat": 4,
diff --git a/06-H_Overlap_Communication_and_Computation_MPI/solutions/Instructions.md b/06-H_Overlap_Communication_and_Computation_MPI/solutions/Instructions.md
new file mode 100644
index 0000000..42ee407
--- /dev/null
+++ b/06-H_Overlap_Communication_and_Computation_MPI/solutions/Instructions.md
@@ -0,0 +1,61 @@
+# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale
+
+-   Time: Monday, 13 November 2023 8:30 - 17:00 MST
+-   Location: 405, Denver Congress Centre, USA
+-   Program Link:
+    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242
+## Hands-On 6: Overlap Communication and Computation with MPI
+
+You are now going to apply the concepts you learned in the lectures 4 and 5: Using profiling tools,
+and applying them to implement overlapping MPI with GPU kernels. 
+
+Compile with
+
+``` {.bash}
+make
+```
+
+Submit your compiled application to the batch system with
+
+``` {.bash}
+make run
+```
+
+Study the performance by glimpsing at the profile generated with
+`make profile`. For `make run` and `make profile` the environment variable `NP` can be set to change the number of processes.
+
+### Task 0: Profile the non-overlap MPI-CUDA version of the code
+
+Use the Nsight System profiler to profile the starting point version non-Overlap MPI jacobi solver. The objective is to become familiar in navigating the GUI identify possible areas to overlap computation and communication. 
+
+1. Start by compiling and running the application with `make run`
+1. Record an Nsight Systems profile, using the appropriate Makefile target (`make profile`)
+1. Open the recorded profile in the GUI
+    - Either: Install Nsight Systems locally, and transfer the .qdrep/.nsys-rep file
+    - Or: By running Xpra in your browser: In Jupyter, select "File > New Launcher" and "Xpra Desktop", which will open in a new tab. Don't forget to source the environment in your `xterm`.
+1. Familiarize yourself with the different rows and the traces they represent. 
+    - See if you can correlate a CUDA API kernel launch call and the resulting kernel execution on the device
+1. Follow the lecture steps and identify the relevant section with overlap potential in your code
+    - Hint: Try navigating with the NVTX ranges.
+
+
+### Task 1: Implement Communication/Computation overlap
+
+Realize the optimization potential you discovered in the previous task and reduce the whitespace between kernel calls on the GPU profile by implementing communication/computation overlap.
+
+You will need to separately calculate the boundary, and you should use high-priority streams. A less efficient (problem size-dependent) alternative to high-priority streams would be to launch the boundary processing kernels before the bulk kernel.
+regions for the halo exchange.
+
+The starting point of this task is the non-overlapping MPI variant of the Jacobi solver.
+Follow the `TODO`s in `jacobi.cpp`:
+
+- Query the priority range to be used by the CUDA streams
+- Create new top and bottom CUDA streams and corresponding CUDA events
+- Initialize all streams using priorities
+- Modify the original call to `launch_jacobi_kernel` to not compute the top and bottom regions 
+- Add additional calls to `launch_jacobi_kernel` for the top and bottom regions using the high-priority streams
+- Wait on both top and bottom streams when calculating the norm
+- Synchronize top and bottom streams before applying the periodic boundary conditions using MPI
+- Destroy the additional cuda streams and events before ending the application
+
+
diff --git a/06-H_Overlap_Communication_and_Computation_MPI/tasks/Instructions.ipynb b/06-H_Overlap_Communication_and_Computation_MPI/tasks/Instructions.ipynb
index 5f851fd..f8ef908 100644
--- a/06-H_Overlap_Communication_and_Computation_MPI/tasks/Instructions.ipynb
+++ b/06-H_Overlap_Communication_and_Computation_MPI/tasks/Instructions.ipynb
@@ -4,14 +4,13 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
+        "# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
         "\n",
-        "-   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST\n",
-        "-   Location: Hall Y8, Congress Center Hamburg, Germany\n",
+        "-   Time: Monday, 13 November 2023 8:30 - 17:00 MST\n",
+        "-   Location: 405, Denver Congress Centre, USA\n",
         "-   Program Link:\n",
-        "    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==\n",
-        "\n",
-        "## Hands-On 6: Overlap Communication and Computation with MPI\n",
+        "    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242\n",
+        "    \\## Hands-On 6: Overlap Communication and Computation with MPI\n",
         "\n",
         "You are now going to apply the concepts you learned in the lectures 4\n",
         "and 5: Using profiling tools, and applying them to implement overlapping\n",
@@ -85,7 +84,7 @@
         "-   Destroy the additional cuda streams and events before ending the\n",
         "    application"
       ],
-      "id": "634784f1-27fa-4390-9be3-6e9c1b18e102"
+      "id": "02d50eab-6f3d-4b5d-8743-4032717ba48f"
     }
   ],
   "nbformat": 4,
diff --git a/06-H_Overlap_Communication_and_Computation_MPI/tasks/Instructions.md b/06-H_Overlap_Communication_and_Computation_MPI/tasks/Instructions.md
new file mode 100644
index 0000000..42ee407
--- /dev/null
+++ b/06-H_Overlap_Communication_and_Computation_MPI/tasks/Instructions.md
@@ -0,0 +1,61 @@
+# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale
+
+-   Time: Monday, 13 November 2023 8:30 - 17:00 MST
+-   Location: 405, Denver Congress Centre, USA
+-   Program Link:
+    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242
+## Hands-On 6: Overlap Communication and Computation with MPI
+
+You are now going to apply the concepts you learned in the lectures 4 and 5: Using profiling tools,
+and applying them to implement overlapping MPI with GPU kernels. 
+
+Compile with
+
+``` {.bash}
+make
+```
+
+Submit your compiled application to the batch system with
+
+``` {.bash}
+make run
+```
+
+Study the performance by glimpsing at the profile generated with
+`make profile`. For `make run` and `make profile` the environment variable `NP` can be set to change the number of processes.
+
+### Task 0: Profile the non-overlap MPI-CUDA version of the code
+
+Use the Nsight System profiler to profile the starting point version non-Overlap MPI jacobi solver. The objective is to become familiar in navigating the GUI identify possible areas to overlap computation and communication. 
+
+1. Start by compiling and running the application with `make run`
+1. Record an Nsight Systems profile, using the appropriate Makefile target (`make profile`)
+1. Open the recorded profile in the GUI
+    - Either: Install Nsight Systems locally, and transfer the .qdrep/.nsys-rep file
+    - Or: By running Xpra in your browser: In Jupyter, select "File > New Launcher" and "Xpra Desktop", which will open in a new tab. Don't forget to source the environment in your `xterm`.
+1. Familiarize yourself with the different rows and the traces they represent. 
+    - See if you can correlate a CUDA API kernel launch call and the resulting kernel execution on the device
+1. Follow the lecture steps and identify the relevant section with overlap potential in your code
+    - Hint: Try navigating with the NVTX ranges.
+
+
+### Task 1: Implement Communication/Computation overlap
+
+Realize the optimization potential you discovered in the previous task and reduce the whitespace between kernel calls on the GPU profile by implementing communication/computation overlap.
+
+You will need to separately calculate the boundary, and you should use high-priority streams. A less efficient (problem size-dependent) alternative to high-priority streams would be to launch the boundary processing kernels before the bulk kernel.
+regions for the halo exchange.
+
+The starting point of this task is the non-overlapping MPI variant of the Jacobi solver.
+Follow the `TODO`s in `jacobi.cpp`:
+
+- Query the priority range to be used by the CUDA streams
+- Create new top and bottom CUDA streams and corresponding CUDA events
+- Initialize all streams using priorities
+- Modify the original call to `launch_jacobi_kernel` to not compute the top and bottom regions 
+- Add additional calls to `launch_jacobi_kernel` for the top and bottom regions using the high-priority streams
+- Wait on both top and bottom streams when calculating the norm
+- Synchronize top and bottom streams before applying the periodic boundary conditions using MPI
+- Destroy the additional cuda streams and events before ending the application
+
+
diff --git a/07-L_Overview_of_NCCL_and_NVSHMEM_in_MPI_Programs/slides.pdf b/07-L_Overview_of_NCCL_and_NVSHMEM_in_MPI_Programs/slides.pdf
index 9b3a058..4a5ef6e 100644
Binary files a/07-L_Overview_of_NCCL_and_NVSHMEM_in_MPI_Programs/slides.pdf and b/07-L_Overview_of_NCCL_and_NVSHMEM_in_MPI_Programs/slides.pdf differ
diff --git a/08-H_NCCL_NVSHMEM/.master/NCCL/Instructions.ipynb b/08-H_NCCL_NVSHMEM/.master/NCCL/Instructions.ipynb
index f1f674b..2bd9d62 100644
--- a/08-H_NCCL_NVSHMEM/.master/NCCL/Instructions.ipynb
+++ b/08-H_NCCL_NVSHMEM/.master/NCCL/Instructions.ipynb
@@ -4,14 +4,13 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
+        "# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
         "\n",
-        "-   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST\n",
-        "-   Location: Hall Y8, Congress Center Hamburg, Germany\n",
+        "-   Time: Monday, 13 November 2023 8:30 - 17:00 MST\n",
+        "-   Location: 405, Denver Congress Centre, USA\n",
         "-   Program Link:\n",
-        "    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==\n",
-        "\n",
-        "## Hands-On 8-NCCL: Using NCCL for Inter-GPU Communication\n",
+        "    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242\n",
+        "    \\## Hands-On 8-NCCL: Using NCCL for Inter-GPU Communication\n",
         "\n",
         "### Task: Using NCCL\n",
         "\n",
@@ -22,15 +21,14 @@
         "variant of the jacobi solver. You need to work on `TODOs` in\n",
         "`jacobi.cpp`:\n",
         "\n",
-        "-   Initialize NVSHMEM:\n",
-        "    -   Include NCCL headers.\n",
-        "    -   Create a NCCL unique ID, and initialize it\n",
-        "    -   Create a NCCL communicator and initilize it\n",
-        "    -   Replace the MPI_Sendrecv calls with ncclRecv and ncclSend calls\n",
-        "        for the warmup stage\n",
-        "    -   Replace MPI for the periodic boundary conditions with NCCL\n",
-        "    -   Fix output message to indicate nccl rather than mpi\n",
-        "    -   Destroy NCCL comunicator\n",
+        "-   Include NCCL headers.\n",
+        "-   Create a NCCL unique ID, and initialize it\n",
+        "-   Create a NCCL communicator and initialize it\n",
+        "-   Replace the MPI_Sendrecv calls with ncclRecv and ncclSend calls for\n",
+        "    the warmup stage\n",
+        "-   Replace MPI for the periodic boundary conditions with NCCL\n",
+        "-   Fix output message to indicate nccl rather than mpi\n",
+        "-   Destroy NCCL communicator\n",
         "\n",
         "Compile with\n",
         "\n",
@@ -48,7 +46,7 @@
         "`make profile`. For `make run` and `make profile` the environment\n",
         "variable `NP` can be set to change the number of processes."
       ],
-      "id": "fcc1dec5-cae8-41fb-a41d-2f5f1e5c5509"
+      "id": "0179bd70-4d9b-4560-859f-31ecf7708e9e"
     }
   ],
   "nbformat": 4,
diff --git a/08-H_NCCL_NVSHMEM/.master/NCCL/Instructions.md b/08-H_NCCL_NVSHMEM/.master/NCCL/Instructions.md
index e44ea2a..7368f28 100644
--- a/08-H_NCCL_NVSHMEM/.master/NCCL/Instructions.md
+++ b/08-H_NCCL_NVSHMEM/.master/NCCL/Instructions.md
@@ -1,10 +1,9 @@
-# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale
+# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale
 
--   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST
--   Location: Hall Y8, Congress Center Hamburg, Germany
+-   Time: Monday, 13 November 2023 8:30 - 17:00 MST
+-   Location: 405, Denver Congress Centre, USA
 -   Program Link:
-    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==
-
+    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242
 ## Hands-On 8-NCCL: Using NCCL for Inter-GPU Communication
 
 ### Task: Using NCCL
@@ -13,14 +12,13 @@
 
 The purpose of this task is to use NCCL instead of MPI to implement a multi-GPU jacobi solver. The starting point of this task is the MPI variant of the jacobi solver. You need to work on `TODOs` in `jacobi.cpp`:
 
-- Initialize NVSHMEM:
-  - Include NCCL headers.
-  - Create a NCCL unique ID, and initialize it
-  - Create a NCCL communicator and initilize it
-  - Replace the MPI\_Sendrecv calls with ncclRecv and ncclSend calls for the warmup stage
-  - Replace MPI for the periodic boundary conditions with NCCL 
-  - Fix output message to indicate nccl rather than mpi
-  - Destroy NCCL comunicator
+- Include NCCL headers.
+- Create a NCCL unique ID, and initialize it
+- Create a NCCL communicator and initialize it
+- Replace the MPI\_Sendrecv calls with ncclRecv and ncclSend calls for the warmup stage
+- Replace MPI for the periodic boundary conditions with NCCL 
+- Fix output message to indicate nccl rather than mpi
+- Destroy NCCL communicator
 
 Compile with
 
diff --git a/08-H_NCCL_NVSHMEM/.master/NCCL/jacobi.cpp b/08-H_NCCL_NVSHMEM/.master/NCCL/jacobi.cpp
index 33760e2..1995287 100644
--- a/08-H_NCCL_NVSHMEM/.master/NCCL/jacobi.cpp
+++ b/08-H_NCCL_NVSHMEM/.master/NCCL/jacobi.cpp
@@ -271,7 +271,7 @@ int main(int argc, char* argv[]) {
     real* l2_norm_h;
     CUDA_RT_CALL(cudaMallocHost(&l2_norm_h, sizeof(real)));
 
-//TODO: Rename range
+    //TODO: Rename range
 #ifdef SOLUTION
     PUSH_RANGE("NCCL_Warmup", 5)
 #else
@@ -326,7 +326,7 @@ int main(int argc, char* argv[]) {
         CUDA_RT_CALL(cudaStreamWaitEvent(push_stream, reset_l2norm_done, 0));
         calculate_norm = (iter % nccheck) == 0 || (!csv && (iter % 100) == 0);
 
-    launch_jacobi_kernel(a_new, a, l2_norm_d, (iy_start + 1), (iy_end - 1), nx, calculate_norm,
+        launch_jacobi_kernel(a_new, a, l2_norm_d, (iy_start + 1), (iy_end - 1), nx, calculate_norm,
                              compute_stream);
 
         launch_jacobi_kernel(a_new, a, l2_norm_d, iy_start, (iy_start + 1), nx, calculate_norm,
@@ -346,7 +346,7 @@ int main(int argc, char* argv[]) {
         const int bottom = (rank + 1) % size;
 
         // Apply periodic boundary conditions
-    //TODO: Modify the lable for the RANGE, and replace MPI_Sendrecv with ncclSend and ncclRecv calls
+        //TODO: Modify the lable for the RANGE, and replace MPI_Sendrecv with ncclSend and ncclRecv calls
         //      using the nccl communicator and push_stream.
         //      Remember to use ncclGroupStart() and ncclGroupEnd()
 #ifdef SOLUTION
@@ -358,14 +358,14 @@ int main(int argc, char* argv[]) {
         NCCL_CALL(ncclSend(a_new + iy_start * nx,     nx, NCCL_REAL_TYPE, top,    nccl_comm, push_stream));
         NCCL_CALL(ncclGroupEnd());
 #else
-    PUSH_RANGE("MPI", 5)
+        PUSH_RANGE("MPI", 5)
         MPI_CALL(MPI_Sendrecv(a_new + iy_start * nx, nx, MPI_REAL_TYPE, top, 0,
                               a_new + (iy_end * nx), nx, MPI_REAL_TYPE, bottom, 0, MPI_COMM_WORLD,
                               MPI_STATUS_IGNORE));
         MPI_CALL(MPI_Sendrecv(a_new + (iy_end - 1) * nx, nx, MPI_REAL_TYPE, bottom, 0, a_new, nx,
                               MPI_REAL_TYPE, top, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE));
 #endif
-    CUDA_RT_CALL(cudaEventRecord(push_done, push_stream));
+        CUDA_RT_CALL(cudaEventRecord(push_done, push_stream));
         POP_RANGE
 
         if (calculate_norm) {
@@ -410,13 +410,13 @@ int main(int argc, char* argv[]) {
 
     if (rank == 0 && result_correct) {
         if (csv) {
-        //TODO: Dont forget to change your output lable from mpi_overlap to nccl_overlap 
+            //TODO: Dont forget to change your output lable from mpi_overlap to nccl_overlap 
 #ifdef SOLUTION
             printf("nccl_overlap, %d, %d, %d, %d, %d, 1, %f, %f\n", nx, ny, iter_max, nccheck, size,
 #else
-        printf("mpi_overlap, %d, %d, %d, %d, %d, 1, %f, %f\n", nx, ny, iter_max, nccheck, size,
+            printf("mpi_overlap, %d, %d, %d, %d, %d, 1, %f, %f\n", nx, ny, iter_max, nccheck, size,
 #endif
-        (stop - start), runtime_serial);
+                    (stop - start), runtime_serial);
         } else {
             printf("Num GPUs: %d.\n", size);
             printf(
diff --git a/08-H_NCCL_NVSHMEM/.master/NVSHMEM/Instructions.ipynb b/08-H_NCCL_NVSHMEM/.master/NVSHMEM/Instructions.ipynb
index b7d479f..c8d814e 100644
--- a/08-H_NCCL_NVSHMEM/.master/NVSHMEM/Instructions.ipynb
+++ b/08-H_NCCL_NVSHMEM/.master/NVSHMEM/Instructions.ipynb
@@ -4,14 +4,13 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
+        "# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
         "\n",
-        "-   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST\n",
-        "-   Location: Hall Y8, Congress Center Hamburg, Germany\n",
+        "-   Time: Monday, 13 November 2023 8:30 - 17:00 MST\n",
+        "-   Location: 405, Denver Congress Centre, USA\n",
         "-   Program Link:\n",
-        "    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==\n",
-        "\n",
-        "## Hands-On 8-NVSHMEM: Host-initiated Communication with NVSHMEM\n",
+        "    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242\n",
+        "    \\## Hands-On 8-NVSHMEM: Host-initiated Communication with NVSHMEM\n",
         "\n",
         "### Task: Using NVSHMEM device API\n",
         "\n",
@@ -22,18 +21,17 @@
         "is the MPI variant of the jacobi solver. You need to work on `TODOs` in\n",
         "`jacobi.cu`:\n",
         "\n",
-        "-   Initialize NVSHMEM:\n",
-        "    -   Include NVSHMEM headers.\n",
-        "    -   Initialize NVSHMEM using `MPI_COMM_WORLD`.\n",
-        "    -   Allocate work arrays `a` and `a_new` from the NVSHMEM symmetric\n",
-        "        heap. Take care of passing in a consistent size!\n",
-        "    -   Calculate halo/boundary row index of top and bottom neighbors.\n",
-        "    -   Add necessary inter PE synchronization.\n",
-        "    -   Replace MPI periodic boundary conditions with\n",
-        "        `nvshmemx_float_put_on_stream` to directly push values needed by\n",
-        "        top and bottom neighbors.\n",
-        "    -   Deallocate memory from the NVSHMEM symetric heap.\n",
-        "    -   Finalize NVSHMEM before existing the application\n",
+        "-   Include NVSHMEM headers.\n",
+        "-   Initialize NVSHMEM using `MPI_COMM_WORLD`.\n",
+        "-   Allocate work arrays `a` and `a_new` from the NVSHMEM symmetric\n",
+        "    heap. Take care of passing in a consistent size!\n",
+        "-   Calculate halo/boundary row index of top and bottom neighbors.\n",
+        "-   Add necessary inter PE synchronization.\n",
+        "-   Replace MPI periodic boundary conditions with\n",
+        "    `nvshmemx_float_put_on_stream` to directly push values needed by top\n",
+        "    and bottom neighbors.\n",
+        "-   Deallocate memory from the NVSHMEM symmetric heap.\n",
+        "-   Finalize NVSHMEM before existing the application\n",
         "\n",
         "Compile with\n",
         "\n",
@@ -64,7 +62,7 @@
         "GPUs listed. This is automatically done for the `sanitize`, `run` and\n",
         "`profile` make targets."
       ],
-      "id": "c38ceeb2-bbb5-416b-a9ad-dd1dad8ac0e1"
+      "id": "688b0b98-419c-4dd3-a60f-f1f1d3018e63"
     }
   ],
   "nbformat": 4,
diff --git a/08-H_NCCL_NVSHMEM/.master/NVSHMEM/Instructions.md b/08-H_NCCL_NVSHMEM/.master/NVSHMEM/Instructions.md
index 38a3b32..13a5729 100644
--- a/08-H_NCCL_NVSHMEM/.master/NVSHMEM/Instructions.md
+++ b/08-H_NCCL_NVSHMEM/.master/NVSHMEM/Instructions.md
@@ -1,10 +1,9 @@
-# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale
+# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale
 
--   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST
--   Location: Hall Y8, Congress Center Hamburg, Germany
+-   Time: Monday, 13 November 2023 8:30 - 17:00 MST
+-   Location: 405, Denver Congress Centre, USA
 -   Program Link:
-    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==
-
+    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242
 ## Hands-On 8-NVSHMEM: Host-initiated Communication with NVSHMEM
 
 ### Task: Using NVSHMEM device API
@@ -13,15 +12,14 @@
 
 The purpose of this task is to use the NVSHMEM host API instead of MPI to implement a multi-GPU jacobi solver. The starting point of this task is the MPI variant of the jacobi solver. You need to work on `TODOs` in `jacobi.cu`:
 
-- Initialize NVSHMEM:
-  - Include NVSHMEM headers.
-  - Initialize NVSHMEM using `MPI_COMM_WORLD`.
-  - Allocate work arrays `a` and `a_new` from the NVSHMEM symmetric heap. Take care of passing in a consistent size!
-  - Calculate halo/boundary row index of top and bottom neighbors.
-  - Add necessary inter PE synchronization.
-  - Replace MPI periodic boundary conditions with `nvshmemx_float_put_on_stream` to directly push values needed by top and bottom neighbors.
-  - Deallocate memory from the NVSHMEM symetric heap.
-  - Finalize NVSHMEM before existing the application
+- Include NVSHMEM headers.
+- Initialize NVSHMEM using `MPI_COMM_WORLD`.
+- Allocate work arrays `a` and `a_new` from the NVSHMEM symmetric heap. Take care of passing in a consistent size!
+- Calculate halo/boundary row index of top and bottom neighbors.
+- Add necessary inter PE synchronization.
+- Replace MPI periodic boundary conditions with `nvshmemx_float_put_on_stream` to directly push values needed by top and bottom neighbors.
+- Deallocate memory from the NVSHMEM symmetric heap.
+- Finalize NVSHMEM before existing the application
 
 Compile with
 
diff --git a/08-H_NCCL_NVSHMEM/.master/NVSHMEM/jacobi.cu b/08-H_NCCL_NVSHMEM/.master/NVSHMEM/jacobi.cu
index e755c7c..e4f6bcd 100644
--- a/08-H_NCCL_NVSHMEM/.master/NVSHMEM/jacobi.cu
+++ b/08-H_NCCL_NVSHMEM/.master/NVSHMEM/jacobi.cu
@@ -355,11 +355,11 @@ int main(int argc, char* argv[]) {
                                          compute_stream));
         }
 
-    //TODO: Replace MPI communication with Host initiated NVSHMEM calls
+        //TODO: Replace MPI communication with Host initiated NVSHMEM calls
         // Apply periodic boundary conditions
 #ifdef SOLUTION
-    PUSH_RANGE("NVSHMEM", 5)
-    nvshmemx_float_put_on_stream(a_new + iy_top_lower_boundary_idx * nx, a_new + iy_start * nx, nx, top, push_stream);
+        PUSH_RANGE("NVSHMEM", 5)
+        nvshmemx_float_put_on_stream(a_new + iy_top_lower_boundary_idx * nx, a_new + iy_start * nx, nx, top, push_stream);
         nvshmemx_float_put_on_stream(a_new + iy_bottom_upper_boundary_idx * nx, a_new + (iy_end - 1) * nx, nx, bottom,  push_stream);
 #else
         PUSH_RANGE("MPI", 5)
@@ -369,12 +369,12 @@ int main(int argc, char* argv[]) {
         MPI_CALL(MPI_Sendrecv(a_new + (iy_end - 1) * nx, nx, MPI_REAL_TYPE, bottom, 0, a_new, nx,
                               MPI_REAL_TYPE, top, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE));
 #endif
-    CUDA_RT_CALL(cudaEventRecord(push_done, push_stream));
+        CUDA_RT_CALL(cudaEventRecord(push_done, push_stream));
         POP_RANGE
 
         CUDA_RT_CALL(cudaStreamWaitEvent(compute_stream, push_done, 0));
 
-    //TODO: add necessary inter PE synchronization using the nvshmemx_barrier_all_on_stream(...) 
+        //TODO: add necessary inter PE synchronization using the nvshmemx_barrier_all_on_stream(...) 
 #ifdef SOLUTION
         nvshmemx_barrier_all_on_stream(compute_stream);
 #endif
@@ -419,9 +419,9 @@ int main(int argc, char* argv[]) {
 
     if (rank == 0 && result_correct) {
         if (csv) {
-//TODO: Replace MPI with NVSHMEM for your output
+            //TODO: Replace MPI with NVSHMEM for your output
 #ifdef SOLUTION
-        printf("nvshmem, %d, %d, %d, %d, %d, 1, %f, %f\n", nx, ny, iter_max, nccheck, size,
+            printf("nvshmem, %d, %d, %d, %d, %d, 1, %f, %f\n", nx, ny, iter_max, nccheck, size,
 #else
             printf("mpi, %d, %d, %d, %d, %d, 1, %f, %f\n", nx, ny, iter_max, nccheck, size,
 #endif
diff --git a/08-H_NCCL_NVSHMEM/solutions/NCCL/Instructions.ipynb b/08-H_NCCL_NVSHMEM/solutions/NCCL/Instructions.ipynb
index f1f674b..2bd9d62 100644
--- a/08-H_NCCL_NVSHMEM/solutions/NCCL/Instructions.ipynb
+++ b/08-H_NCCL_NVSHMEM/solutions/NCCL/Instructions.ipynb
@@ -4,14 +4,13 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
+        "# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
         "\n",
-        "-   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST\n",
-        "-   Location: Hall Y8, Congress Center Hamburg, Germany\n",
+        "-   Time: Monday, 13 November 2023 8:30 - 17:00 MST\n",
+        "-   Location: 405, Denver Congress Centre, USA\n",
         "-   Program Link:\n",
-        "    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==\n",
-        "\n",
-        "## Hands-On 8-NCCL: Using NCCL for Inter-GPU Communication\n",
+        "    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242\n",
+        "    \\## Hands-On 8-NCCL: Using NCCL for Inter-GPU Communication\n",
         "\n",
         "### Task: Using NCCL\n",
         "\n",
@@ -22,15 +21,14 @@
         "variant of the jacobi solver. You need to work on `TODOs` in\n",
         "`jacobi.cpp`:\n",
         "\n",
-        "-   Initialize NVSHMEM:\n",
-        "    -   Include NCCL headers.\n",
-        "    -   Create a NCCL unique ID, and initialize it\n",
-        "    -   Create a NCCL communicator and initilize it\n",
-        "    -   Replace the MPI_Sendrecv calls with ncclRecv and ncclSend calls\n",
-        "        for the warmup stage\n",
-        "    -   Replace MPI for the periodic boundary conditions with NCCL\n",
-        "    -   Fix output message to indicate nccl rather than mpi\n",
-        "    -   Destroy NCCL comunicator\n",
+        "-   Include NCCL headers.\n",
+        "-   Create a NCCL unique ID, and initialize it\n",
+        "-   Create a NCCL communicator and initialize it\n",
+        "-   Replace the MPI_Sendrecv calls with ncclRecv and ncclSend calls for\n",
+        "    the warmup stage\n",
+        "-   Replace MPI for the periodic boundary conditions with NCCL\n",
+        "-   Fix output message to indicate nccl rather than mpi\n",
+        "-   Destroy NCCL communicator\n",
         "\n",
         "Compile with\n",
         "\n",
@@ -48,7 +46,7 @@
         "`make profile`. For `make run` and `make profile` the environment\n",
         "variable `NP` can be set to change the number of processes."
       ],
-      "id": "fcc1dec5-cae8-41fb-a41d-2f5f1e5c5509"
+      "id": "0179bd70-4d9b-4560-859f-31ecf7708e9e"
     }
   ],
   "nbformat": 4,
diff --git a/08-H_NCCL_NVSHMEM/solutions/NCCL/Instructions.md b/08-H_NCCL_NVSHMEM/solutions/NCCL/Instructions.md
index e44ea2a..7368f28 100644
--- a/08-H_NCCL_NVSHMEM/solutions/NCCL/Instructions.md
+++ b/08-H_NCCL_NVSHMEM/solutions/NCCL/Instructions.md
@@ -1,10 +1,9 @@
-# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale
+# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale
 
--   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST
--   Location: Hall Y8, Congress Center Hamburg, Germany
+-   Time: Monday, 13 November 2023 8:30 - 17:00 MST
+-   Location: 405, Denver Congress Centre, USA
 -   Program Link:
-    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==
-
+    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242
 ## Hands-On 8-NCCL: Using NCCL for Inter-GPU Communication
 
 ### Task: Using NCCL
@@ -13,14 +12,13 @@
 
 The purpose of this task is to use NCCL instead of MPI to implement a multi-GPU jacobi solver. The starting point of this task is the MPI variant of the jacobi solver. You need to work on `TODOs` in `jacobi.cpp`:
 
-- Initialize NVSHMEM:
-  - Include NCCL headers.
-  - Create a NCCL unique ID, and initialize it
-  - Create a NCCL communicator and initilize it
-  - Replace the MPI\_Sendrecv calls with ncclRecv and ncclSend calls for the warmup stage
-  - Replace MPI for the periodic boundary conditions with NCCL 
-  - Fix output message to indicate nccl rather than mpi
-  - Destroy NCCL comunicator
+- Include NCCL headers.
+- Create a NCCL unique ID, and initialize it
+- Create a NCCL communicator and initialize it
+- Replace the MPI\_Sendrecv calls with ncclRecv and ncclSend calls for the warmup stage
+- Replace MPI for the periodic boundary conditions with NCCL 
+- Fix output message to indicate nccl rather than mpi
+- Destroy NCCL communicator
 
 Compile with
 
diff --git a/08-H_NCCL_NVSHMEM/solutions/NCCL/jacobi.cpp b/08-H_NCCL_NVSHMEM/solutions/NCCL/jacobi.cpp
index 0b0b018..7b4e5fd 100644
--- a/08-H_NCCL_NVSHMEM/solutions/NCCL/jacobi.cpp
+++ b/08-H_NCCL_NVSHMEM/solutions/NCCL/jacobi.cpp
@@ -265,7 +265,7 @@ int main(int argc, char* argv[]) {
     real* l2_norm_h;
     CUDA_RT_CALL(cudaMallocHost(&l2_norm_h, sizeof(real)));
 
-//TODO: Rename range
+    //TODO: Rename range
     PUSH_RANGE("NCCL_Warmup", 5)
     for (int i = 0; i < 10; ++i) {
         const int top = rank > 0 ? rank - 1 : (size - 1);
@@ -308,7 +308,7 @@ int main(int argc, char* argv[]) {
         CUDA_RT_CALL(cudaStreamWaitEvent(push_stream, reset_l2norm_done, 0));
         calculate_norm = (iter % nccheck) == 0 || (!csv && (iter % 100) == 0);
 
-    launch_jacobi_kernel(a_new, a, l2_norm_d, (iy_start + 1), (iy_end - 1), nx, calculate_norm,
+        launch_jacobi_kernel(a_new, a, l2_norm_d, (iy_start + 1), (iy_end - 1), nx, calculate_norm,
                              compute_stream);
 
         launch_jacobi_kernel(a_new, a, l2_norm_d, iy_start, (iy_start + 1), nx, calculate_norm,
@@ -328,7 +328,7 @@ int main(int argc, char* argv[]) {
         const int bottom = (rank + 1) % size;
 
         // Apply periodic boundary conditions
-    //TODO: Modify the lable for the RANGE, and replace MPI_Sendrecv with ncclSend and ncclRecv calls
+        //TODO: Modify the lable for the RANGE, and replace MPI_Sendrecv with ncclSend and ncclRecv calls
         //      using the nccl communicator and push_stream.
         //      Remember to use ncclGroupStart() and ncclGroupEnd()
         PUSH_RANGE("NCCL_LAUNCH", 5)
@@ -338,7 +338,7 @@ int main(int argc, char* argv[]) {
         NCCL_CALL(ncclRecv(a_new + (iy_end * nx),     nx, NCCL_REAL_TYPE, bottom, nccl_comm, push_stream));
         NCCL_CALL(ncclSend(a_new + iy_start * nx,     nx, NCCL_REAL_TYPE, top,    nccl_comm, push_stream));
         NCCL_CALL(ncclGroupEnd());
-    CUDA_RT_CALL(cudaEventRecord(push_done, push_stream));
+        CUDA_RT_CALL(cudaEventRecord(push_done, push_stream));
         POP_RANGE
 
         if (calculate_norm) {
@@ -383,9 +383,9 @@ int main(int argc, char* argv[]) {
 
     if (rank == 0 && result_correct) {
         if (csv) {
-        //TODO: Dont forget to change your output lable from mpi_overlap to nccl_overlap 
+            //TODO: Dont forget to change your output lable from mpi_overlap to nccl_overlap 
             printf("nccl_overlap, %d, %d, %d, %d, %d, 1, %f, %f\n", nx, ny, iter_max, nccheck, size,
-        (stop - start), runtime_serial);
+                    (stop - start), runtime_serial);
         } else {
             printf("Num GPUs: %d.\n", size);
             printf(
diff --git a/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/Instructions.ipynb b/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/Instructions.ipynb
index b7d479f..c8d814e 100644
--- a/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/Instructions.ipynb
+++ b/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/Instructions.ipynb
@@ -4,14 +4,13 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
+        "# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
         "\n",
-        "-   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST\n",
-        "-   Location: Hall Y8, Congress Center Hamburg, Germany\n",
+        "-   Time: Monday, 13 November 2023 8:30 - 17:00 MST\n",
+        "-   Location: 405, Denver Congress Centre, USA\n",
         "-   Program Link:\n",
-        "    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==\n",
-        "\n",
-        "## Hands-On 8-NVSHMEM: Host-initiated Communication with NVSHMEM\n",
+        "    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242\n",
+        "    \\## Hands-On 8-NVSHMEM: Host-initiated Communication with NVSHMEM\n",
         "\n",
         "### Task: Using NVSHMEM device API\n",
         "\n",
@@ -22,18 +21,17 @@
         "is the MPI variant of the jacobi solver. You need to work on `TODOs` in\n",
         "`jacobi.cu`:\n",
         "\n",
-        "-   Initialize NVSHMEM:\n",
-        "    -   Include NVSHMEM headers.\n",
-        "    -   Initialize NVSHMEM using `MPI_COMM_WORLD`.\n",
-        "    -   Allocate work arrays `a` and `a_new` from the NVSHMEM symmetric\n",
-        "        heap. Take care of passing in a consistent size!\n",
-        "    -   Calculate halo/boundary row index of top and bottom neighbors.\n",
-        "    -   Add necessary inter PE synchronization.\n",
-        "    -   Replace MPI periodic boundary conditions with\n",
-        "        `nvshmemx_float_put_on_stream` to directly push values needed by\n",
-        "        top and bottom neighbors.\n",
-        "    -   Deallocate memory from the NVSHMEM symetric heap.\n",
-        "    -   Finalize NVSHMEM before existing the application\n",
+        "-   Include NVSHMEM headers.\n",
+        "-   Initialize NVSHMEM using `MPI_COMM_WORLD`.\n",
+        "-   Allocate work arrays `a` and `a_new` from the NVSHMEM symmetric\n",
+        "    heap. Take care of passing in a consistent size!\n",
+        "-   Calculate halo/boundary row index of top and bottom neighbors.\n",
+        "-   Add necessary inter PE synchronization.\n",
+        "-   Replace MPI periodic boundary conditions with\n",
+        "    `nvshmemx_float_put_on_stream` to directly push values needed by top\n",
+        "    and bottom neighbors.\n",
+        "-   Deallocate memory from the NVSHMEM symmetric heap.\n",
+        "-   Finalize NVSHMEM before existing the application\n",
         "\n",
         "Compile with\n",
         "\n",
@@ -64,7 +62,7 @@
         "GPUs listed. This is automatically done for the `sanitize`, `run` and\n",
         "`profile` make targets."
       ],
-      "id": "c38ceeb2-bbb5-416b-a9ad-dd1dad8ac0e1"
+      "id": "688b0b98-419c-4dd3-a60f-f1f1d3018e63"
     }
   ],
   "nbformat": 4,
diff --git a/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/Instructions.md b/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/Instructions.md
index 38a3b32..13a5729 100644
--- a/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/Instructions.md
+++ b/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/Instructions.md
@@ -1,10 +1,9 @@
-# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale
+# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale
 
--   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST
--   Location: Hall Y8, Congress Center Hamburg, Germany
+-   Time: Monday, 13 November 2023 8:30 - 17:00 MST
+-   Location: 405, Denver Congress Centre, USA
 -   Program Link:
-    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==
-
+    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242
 ## Hands-On 8-NVSHMEM: Host-initiated Communication with NVSHMEM
 
 ### Task: Using NVSHMEM device API
@@ -13,15 +12,14 @@
 
 The purpose of this task is to use the NVSHMEM host API instead of MPI to implement a multi-GPU jacobi solver. The starting point of this task is the MPI variant of the jacobi solver. You need to work on `TODOs` in `jacobi.cu`:
 
-- Initialize NVSHMEM:
-  - Include NVSHMEM headers.
-  - Initialize NVSHMEM using `MPI_COMM_WORLD`.
-  - Allocate work arrays `a` and `a_new` from the NVSHMEM symmetric heap. Take care of passing in a consistent size!
-  - Calculate halo/boundary row index of top and bottom neighbors.
-  - Add necessary inter PE synchronization.
-  - Replace MPI periodic boundary conditions with `nvshmemx_float_put_on_stream` to directly push values needed by top and bottom neighbors.
-  - Deallocate memory from the NVSHMEM symetric heap.
-  - Finalize NVSHMEM before existing the application
+- Include NVSHMEM headers.
+- Initialize NVSHMEM using `MPI_COMM_WORLD`.
+- Allocate work arrays `a` and `a_new` from the NVSHMEM symmetric heap. Take care of passing in a consistent size!
+- Calculate halo/boundary row index of top and bottom neighbors.
+- Add necessary inter PE synchronization.
+- Replace MPI periodic boundary conditions with `nvshmemx_float_put_on_stream` to directly push values needed by top and bottom neighbors.
+- Deallocate memory from the NVSHMEM symmetric heap.
+- Finalize NVSHMEM before existing the application
 
 Compile with
 
diff --git a/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/jacobi.cu b/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/jacobi.cu
index d293c62..dd55b30 100644
--- a/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/jacobi.cu
+++ b/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/jacobi.cu
@@ -342,17 +342,17 @@ int main(int argc, char* argv[]) {
                                          compute_stream));
         }
 
-    //TODO: Replace MPI communication with Host initiated NVSHMEM calls
+        //TODO: Replace MPI communication with Host initiated NVSHMEM calls
         // Apply periodic boundary conditions
-    PUSH_RANGE("NVSHMEM", 5)
-    nvshmemx_float_put_on_stream(a_new + iy_top_lower_boundary_idx * nx, a_new + iy_start * nx, nx, top, push_stream);
+        PUSH_RANGE("NVSHMEM", 5)
+        nvshmemx_float_put_on_stream(a_new + iy_top_lower_boundary_idx * nx, a_new + iy_start * nx, nx, top, push_stream);
         nvshmemx_float_put_on_stream(a_new + iy_bottom_upper_boundary_idx * nx, a_new + (iy_end - 1) * nx, nx, bottom,  push_stream);
-    CUDA_RT_CALL(cudaEventRecord(push_done, push_stream));
+        CUDA_RT_CALL(cudaEventRecord(push_done, push_stream));
         POP_RANGE
 
         CUDA_RT_CALL(cudaStreamWaitEvent(compute_stream, push_done, 0));
 
-    //TODO: add necessary inter PE synchronization using the nvshmemx_barrier_all_on_stream(...) 
+        //TODO: add necessary inter PE synchronization using the nvshmemx_barrier_all_on_stream(...) 
         nvshmemx_barrier_all_on_stream(compute_stream);
 
         if (calculate_norm) {
@@ -395,8 +395,8 @@ int main(int argc, char* argv[]) {
 
     if (rank == 0 && result_correct) {
         if (csv) {
-//TODO: Replace MPI with NVSHMEM for your output
-        printf("nvshmem, %d, %d, %d, %d, %d, 1, %f, %f\n", nx, ny, iter_max, nccheck, size,
+            //TODO: Replace MPI with NVSHMEM for your output
+            printf("nvshmem, %d, %d, %d, %d, %d, 1, %f, %f\n", nx, ny, iter_max, nccheck, size,
                    (stop - start), runtime_serial);
         } else {
             printf("Num GPUs: %d.\n", size);
diff --git a/08-H_NCCL_NVSHMEM/tasks/NCCL/Instructions.ipynb b/08-H_NCCL_NVSHMEM/tasks/NCCL/Instructions.ipynb
index d147cf5..2bd9d62 100644
--- a/08-H_NCCL_NVSHMEM/tasks/NCCL/Instructions.ipynb
+++ b/08-H_NCCL_NVSHMEM/tasks/NCCL/Instructions.ipynb
@@ -4,14 +4,13 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
+        "# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
         "\n",
-        "-   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST\n",
-        "-   Location: Hall Y8, Congress Center Hamburg, Germany\n",
+        "-   Time: Monday, 13 November 2023 8:30 - 17:00 MST\n",
+        "-   Location: 405, Denver Congress Centre, USA\n",
         "-   Program Link:\n",
-        "    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==\n",
-        "\n",
-        "## Hands-On 8-NCCL: Using NCCL for Inter-GPU Communication\n",
+        "    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242\n",
+        "    \\## Hands-On 8-NCCL: Using NCCL for Inter-GPU Communication\n",
         "\n",
         "### Task: Using NCCL\n",
         "\n",
@@ -22,15 +21,14 @@
         "variant of the jacobi solver. You need to work on `TODOs` in\n",
         "`jacobi.cpp`:\n",
         "\n",
-        "-   Initialize NCCL:\n",
-        "    -   Include NCCL headers.\n",
-        "    -   Create a NCCL unique ID, and initialize it\n",
-        "    -   Create a NCCL communicator and initilize it\n",
-        "    -   Replace the MPI_Sendrecv calls with ncclRecv and ncclSend calls\n",
-        "        for the warmup stage\n",
-        "    -   Replace MPI for the periodic boundary conditions with NCCL\n",
-        "    -   Fix output message to indicate nccl rather than mpi\n",
-        "    -   Destroy NCCL comunicator\n",
+        "-   Include NCCL headers.\n",
+        "-   Create a NCCL unique ID, and initialize it\n",
+        "-   Create a NCCL communicator and initialize it\n",
+        "-   Replace the MPI_Sendrecv calls with ncclRecv and ncclSend calls for\n",
+        "    the warmup stage\n",
+        "-   Replace MPI for the periodic boundary conditions with NCCL\n",
+        "-   Fix output message to indicate nccl rather than mpi\n",
+        "-   Destroy NCCL communicator\n",
         "\n",
         "Compile with\n",
         "\n",
@@ -48,7 +46,7 @@
         "`make profile`. For `make run` and `make profile` the environment\n",
         "variable `NP` can be set to change the number of processes."
       ],
-      "id": "fcc1dec5-cae8-41fb-a41d-2f5f1e5c5509"
+      "id": "0179bd70-4d9b-4560-859f-31ecf7708e9e"
     }
   ],
   "nbformat": 4,
diff --git a/08-H_NCCL_NVSHMEM/tasks/NCCL/Instructions.md b/08-H_NCCL_NVSHMEM/tasks/NCCL/Instructions.md
index fe70698..7368f28 100644
--- a/08-H_NCCL_NVSHMEM/tasks/NCCL/Instructions.md
+++ b/08-H_NCCL_NVSHMEM/tasks/NCCL/Instructions.md
@@ -1,10 +1,9 @@
-# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale
+# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale
 
--   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST
--   Location: Hall Y8, Congress Center Hamburg, Germany
+-   Time: Monday, 13 November 2023 8:30 - 17:00 MST
+-   Location: 405, Denver Congress Centre, USA
 -   Program Link:
-    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==
-
+    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242
 ## Hands-On 8-NCCL: Using NCCL for Inter-GPU Communication
 
 ### Task: Using NCCL
@@ -13,14 +12,13 @@
 
 The purpose of this task is to use NCCL instead of MPI to implement a multi-GPU jacobi solver. The starting point of this task is the MPI variant of the jacobi solver. You need to work on `TODOs` in `jacobi.cpp`:
 
-- Initialize NCCL:
-  - Include NCCL headers.
-  - Create a NCCL unique ID, and initialize it
-  - Create a NCCL communicator and initilize it
-  - Replace the MPI\_Sendrecv calls with ncclRecv and ncclSend calls for the warmup stage
-  - Replace MPI for the periodic boundary conditions with NCCL 
-  - Fix output message to indicate nccl rather than mpi
-  - Destroy NCCL comunicator
+- Include NCCL headers.
+- Create a NCCL unique ID, and initialize it
+- Create a NCCL communicator and initialize it
+- Replace the MPI\_Sendrecv calls with ncclRecv and ncclSend calls for the warmup stage
+- Replace MPI for the periodic boundary conditions with NCCL 
+- Fix output message to indicate nccl rather than mpi
+- Destroy NCCL communicator
 
 Compile with
 
diff --git a/08-H_NCCL_NVSHMEM/tasks/NCCL/jacobi.cpp b/08-H_NCCL_NVSHMEM/tasks/NCCL/jacobi.cpp
index 3588d21..f99c86a 100644
--- a/08-H_NCCL_NVSHMEM/tasks/NCCL/jacobi.cpp
+++ b/08-H_NCCL_NVSHMEM/tasks/NCCL/jacobi.cpp
@@ -251,7 +251,7 @@ int main(int argc, char* argv[]) {
     real* l2_norm_h;
     CUDA_RT_CALL(cudaMallocHost(&l2_norm_h, sizeof(real)));
 
-//TODO: Rename range
+    //TODO: Rename range
     PUSH_RANGE("MPI_Warmup", 5)
     for (int i = 0; i < 10; ++i) {
         const int top = rank > 0 ? rank - 1 : (size - 1);
@@ -292,7 +292,7 @@ int main(int argc, char* argv[]) {
         CUDA_RT_CALL(cudaStreamWaitEvent(push_stream, reset_l2norm_done, 0));
         calculate_norm = (iter % nccheck) == 0 || (!csv && (iter % 100) == 0);
 
-    launch_jacobi_kernel(a_new, a, l2_norm_d, (iy_start + 1), (iy_end - 1), nx, calculate_norm,
+        launch_jacobi_kernel(a_new, a, l2_norm_d, (iy_start + 1), (iy_end - 1), nx, calculate_norm,
                              compute_stream);
 
         launch_jacobi_kernel(a_new, a, l2_norm_d, iy_start, (iy_start + 1), nx, calculate_norm,
@@ -312,16 +312,16 @@ int main(int argc, char* argv[]) {
         const int bottom = (rank + 1) % size;
 
         // Apply periodic boundary conditions
-    //TODO: Modify the lable for the RANGE, and replace MPI_Sendrecv with ncclSend and ncclRecv calls
+        //TODO: Modify the lable for the RANGE, and replace MPI_Sendrecv with ncclSend and ncclRecv calls
         //      using the nccl communicator and push_stream.
         //      Remember to use ncclGroupStart() and ncclGroupEnd()
-    PUSH_RANGE("MPI", 5)
+        PUSH_RANGE("MPI", 5)
         MPI_CALL(MPI_Sendrecv(a_new + iy_start * nx, nx, MPI_REAL_TYPE, top, 0,
                               a_new + (iy_end * nx), nx, MPI_REAL_TYPE, bottom, 0, MPI_COMM_WORLD,
                               MPI_STATUS_IGNORE));
         MPI_CALL(MPI_Sendrecv(a_new + (iy_end - 1) * nx, nx, MPI_REAL_TYPE, bottom, 0, a_new, nx,
                               MPI_REAL_TYPE, top, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE));
-    CUDA_RT_CALL(cudaEventRecord(push_done, push_stream));
+        CUDA_RT_CALL(cudaEventRecord(push_done, push_stream));
         POP_RANGE
 
         if (calculate_norm) {
@@ -366,9 +366,9 @@ int main(int argc, char* argv[]) {
 
     if (rank == 0 && result_correct) {
         if (csv) {
-        //TODO: Dont forget to change your output lable from mpi_overlap to nccl_overlap 
-        printf("mpi_overlap, %d, %d, %d, %d, %d, 1, %f, %f\n", nx, ny, iter_max, nccheck, size,
-        (stop - start), runtime_serial);
+            //TODO: Dont forget to change your output lable from mpi_overlap to nccl_overlap 
+            printf("mpi_overlap, %d, %d, %d, %d, %d, 1, %f, %f\n", nx, ny, iter_max, nccheck, size,
+                    (stop - start), runtime_serial);
         } else {
             printf("Num GPUs: %d.\n", size);
             printf(
diff --git a/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/Instructions.ipynb b/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/Instructions.ipynb
index b7d479f..c8d814e 100644
--- a/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/Instructions.ipynb
+++ b/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/Instructions.ipynb
@@ -4,14 +4,13 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
+        "# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
         "\n",
-        "-   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST\n",
-        "-   Location: Hall Y8, Congress Center Hamburg, Germany\n",
+        "-   Time: Monday, 13 November 2023 8:30 - 17:00 MST\n",
+        "-   Location: 405, Denver Congress Centre, USA\n",
         "-   Program Link:\n",
-        "    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==\n",
-        "\n",
-        "## Hands-On 8-NVSHMEM: Host-initiated Communication with NVSHMEM\n",
+        "    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242\n",
+        "    \\## Hands-On 8-NVSHMEM: Host-initiated Communication with NVSHMEM\n",
         "\n",
         "### Task: Using NVSHMEM device API\n",
         "\n",
@@ -22,18 +21,17 @@
         "is the MPI variant of the jacobi solver. You need to work on `TODOs` in\n",
         "`jacobi.cu`:\n",
         "\n",
-        "-   Initialize NVSHMEM:\n",
-        "    -   Include NVSHMEM headers.\n",
-        "    -   Initialize NVSHMEM using `MPI_COMM_WORLD`.\n",
-        "    -   Allocate work arrays `a` and `a_new` from the NVSHMEM symmetric\n",
-        "        heap. Take care of passing in a consistent size!\n",
-        "    -   Calculate halo/boundary row index of top and bottom neighbors.\n",
-        "    -   Add necessary inter PE synchronization.\n",
-        "    -   Replace MPI periodic boundary conditions with\n",
-        "        `nvshmemx_float_put_on_stream` to directly push values needed by\n",
-        "        top and bottom neighbors.\n",
-        "    -   Deallocate memory from the NVSHMEM symetric heap.\n",
-        "    -   Finalize NVSHMEM before existing the application\n",
+        "-   Include NVSHMEM headers.\n",
+        "-   Initialize NVSHMEM using `MPI_COMM_WORLD`.\n",
+        "-   Allocate work arrays `a` and `a_new` from the NVSHMEM symmetric\n",
+        "    heap. Take care of passing in a consistent size!\n",
+        "-   Calculate halo/boundary row index of top and bottom neighbors.\n",
+        "-   Add necessary inter PE synchronization.\n",
+        "-   Replace MPI periodic boundary conditions with\n",
+        "    `nvshmemx_float_put_on_stream` to directly push values needed by top\n",
+        "    and bottom neighbors.\n",
+        "-   Deallocate memory from the NVSHMEM symmetric heap.\n",
+        "-   Finalize NVSHMEM before existing the application\n",
         "\n",
         "Compile with\n",
         "\n",
@@ -64,7 +62,7 @@
         "GPUs listed. This is automatically done for the `sanitize`, `run` and\n",
         "`profile` make targets."
       ],
-      "id": "c38ceeb2-bbb5-416b-a9ad-dd1dad8ac0e1"
+      "id": "688b0b98-419c-4dd3-a60f-f1f1d3018e63"
     }
   ],
   "nbformat": 4,
diff --git a/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/Instructions.md b/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/Instructions.md
index 38a3b32..13a5729 100644
--- a/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/Instructions.md
+++ b/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/Instructions.md
@@ -1,10 +1,9 @@
-# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale
+# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale
 
--   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST
--   Location: Hall Y8, Congress Center Hamburg, Germany
+-   Time: Monday, 13 November 2023 8:30 - 17:00 MST
+-   Location: 405, Denver Congress Centre, USA
 -   Program Link:
-    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==
-
+    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242
 ## Hands-On 8-NVSHMEM: Host-initiated Communication with NVSHMEM
 
 ### Task: Using NVSHMEM device API
@@ -13,15 +12,14 @@
 
 The purpose of this task is to use the NVSHMEM host API instead of MPI to implement a multi-GPU jacobi solver. The starting point of this task is the MPI variant of the jacobi solver. You need to work on `TODOs` in `jacobi.cu`:
 
-- Initialize NVSHMEM:
-  - Include NVSHMEM headers.
-  - Initialize NVSHMEM using `MPI_COMM_WORLD`.
-  - Allocate work arrays `a` and `a_new` from the NVSHMEM symmetric heap. Take care of passing in a consistent size!
-  - Calculate halo/boundary row index of top and bottom neighbors.
-  - Add necessary inter PE synchronization.
-  - Replace MPI periodic boundary conditions with `nvshmemx_float_put_on_stream` to directly push values needed by top and bottom neighbors.
-  - Deallocate memory from the NVSHMEM symetric heap.
-  - Finalize NVSHMEM before existing the application
+- Include NVSHMEM headers.
+- Initialize NVSHMEM using `MPI_COMM_WORLD`.
+- Allocate work arrays `a` and `a_new` from the NVSHMEM symmetric heap. Take care of passing in a consistent size!
+- Calculate halo/boundary row index of top and bottom neighbors.
+- Add necessary inter PE synchronization.
+- Replace MPI periodic boundary conditions with `nvshmemx_float_put_on_stream` to directly push values needed by top and bottom neighbors.
+- Deallocate memory from the NVSHMEM symmetric heap.
+- Finalize NVSHMEM before existing the application
 
 Compile with
 
diff --git a/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/jacobi.cu b/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/jacobi.cu
index 0359d7e..b754207 100644
--- a/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/jacobi.cu
+++ b/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/jacobi.cu
@@ -333,7 +333,7 @@ int main(int argc, char* argv[]) {
                                          compute_stream));
         }
 
-    //TODO: Replace MPI communication with Host initiated NVSHMEM calls
+        //TODO: Replace MPI communication with Host initiated NVSHMEM calls
         // Apply periodic boundary conditions
         PUSH_RANGE("MPI", 5)
         MPI_CALL(MPI_Sendrecv(a_new + iy_start * nx, nx, MPI_REAL_TYPE, top, 0,
@@ -341,12 +341,12 @@ int main(int argc, char* argv[]) {
                               MPI_STATUS_IGNORE));
         MPI_CALL(MPI_Sendrecv(a_new + (iy_end - 1) * nx, nx, MPI_REAL_TYPE, bottom, 0, a_new, nx,
                               MPI_REAL_TYPE, top, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE));
-    CUDA_RT_CALL(cudaEventRecord(push_done, push_stream));
+        CUDA_RT_CALL(cudaEventRecord(push_done, push_stream));
         POP_RANGE
 
         CUDA_RT_CALL(cudaStreamWaitEvent(compute_stream, push_done, 0));
 
-    //TODO: add necessary inter PE synchronization using the nvshmemx_barrier_all_on_stream(...) 
+        //TODO: add necessary inter PE synchronization using the nvshmemx_barrier_all_on_stream(...) 
 
         if (calculate_norm) {
             CUDA_RT_CALL(cudaStreamSynchronize(compute_stream));
@@ -388,7 +388,7 @@ int main(int argc, char* argv[]) {
 
     if (rank == 0 && result_correct) {
         if (csv) {
-//TODO: Replace MPI with NVSHMEM for your output
+            //TODO: Replace MPI with NVSHMEM for your output
             printf("mpi, %d, %d, %d, %d, %d, 1, %f, %f\n", nx, ny, iter_max, nccheck, size,
                    (stop - start), runtime_serial);
         } else {
diff --git a/09-L_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/slides.pdf b/09-L_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/slides.pdf
index 60b761b..8095d4a 100644
Binary files a/09-L_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/slides.pdf and b/09-L_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/slides.pdf differ
diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/Instructions.ipynb b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/Instructions.ipynb
index ebc5ea5..d0c4c4a 100644
--- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/Instructions.ipynb
+++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/Instructions.ipynb
@@ -4,14 +4,13 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "# SC22 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
+        "# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
         "\n",
-        "-   Time: Monday, 14 November 2022 8:30AM - 5PM CST\n",
-        "-   Location: D163, Kay Bailey Hutchison Convention Center Dallas\n",
+        "-   Time: Monday, 13 November 2023 8:30 - 17:00 MST\n",
+        "-   Location: 405, Denver Congress Centre, USA\n",
         "-   Program Link:\n",
-        "    https://sc22.supercomputing.org/presentation/?id=tut102&sess=sess196\n",
-        "\n",
-        "## Hands-On 10A: Device-initiated Communication with NVSHMEM\n",
+        "    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242\n",
+        "    \\## Hands-On 10A: Device-initiated Communication with NVSHMEM\n",
         "\n",
         "### Task: Using NVSHMEM device API\n",
         "\n",
@@ -86,7 +85,7 @@
         "    repository](https://github.com/NVIDIA/multi-gpu-programming-models)\n",
         "    implements the same strategy."
       ],
-      "id": "57748efe-edc3-44ae-9321-54c897d82a47"
+      "id": "3d1de1bf-0ebb-4709-bddc-9e34484b03bb"
     }
   ],
   "nbformat": 4,
diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/Instructions.md b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/Instructions.md
index 7b31752..2217166 100644
--- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/Instructions.md
+++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/Instructions.md
@@ -1,10 +1,9 @@
-# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale
+# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale
 
--   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST
--   Location: Hall Y8, Congress Center Hamburg, Germany
+-   Time: Monday, 13 November 2023 8:30 - 17:00 MST
+-   Location: 405, Denver Congress Centre, USA
 -   Program Link:
-    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==
-
+    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242
 ## Hands-On 10A: Device-initiated Communication with NVSHMEM
 
 ### Task: Using NVSHMEM device API
diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/copy.mk b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/copy.mk
index df1bb37..fdc71c0 100755
--- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/copy.mk
+++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/copy.mk
@@ -1,7 +1,6 @@
 #!/usr/bin/make -f
 # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
-TASKDIR = ../../tasks/Device-initiated_Communication_with_NVSHM
-EM
+TASKDIR = ../../tasks/Device-initiated_Communication_with_NVSHMEM
 SOLUTIONDIR = ../../solutions/Device-initiated_Communication_with_NVSHMEM
 
 IYPNB_TEMPLATE = ../../../.template.json
diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/Instructions.ipynb b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/Instructions.ipynb
index cd06caa..8c9afe1 100644
--- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/Instructions.ipynb
+++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/Instructions.ipynb
@@ -4,14 +4,13 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
+        "# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
         "\n",
-        "-   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST\n",
-        "-   Location: Hall Y8, Congress Center Hamburg, Germany\n",
+        "-   Time: Monday, 13 November 2023 8:30 - 17:00 MST\n",
+        "-   Location: 405, Denver Congress Centre, USA\n",
         "-   Program Link:\n",
-        "    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==\n",
-        "\n",
-        "## Hands-On 10B: Using CUDA Graphs\n",
+        "    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242\n",
+        "    \\## Hands-On 10B: Using CUDA Graphs\n",
         "\n",
         "### Task: Combining CUDA Graphs with NCCL for Inter-GPU Communication\n",
         "\n",
@@ -65,7 +64,7 @@
         "`make profile`. For `make run` and `make profile` the environment\n",
         "variable `NP` can be set to change the number of processes."
       ],
-      "id": "4865b2a9-edfd-40e8-879d-f503b5b30fd5"
+      "id": "bc28bb54-3dda-4b0a-9465-f6609051d0cc"
     }
   ],
   "nbformat": 4,
diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/Instructions.md b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/Instructions.md
index 2e3645a..b6e9534 100644
--- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/Instructions.md
+++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/Instructions.md
@@ -1,10 +1,9 @@
-# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale
+# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale
 
--   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST
--   Location: Hall Y8, Congress Center Hamburg, Germany
+-   Time: Monday, 13 November 2023 8:30 - 17:00 MST
+-   Location: 405, Denver Congress Centre, USA
 -   Program Link:
-    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==
-
+    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242
 ## Hands-On 10B: Using CUDA Graphs
 
 ### Task: Combining CUDA Graphs with NCCL for Inter-GPU Communication
diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/Instructions.ipynb b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/Instructions.ipynb
index ebc5ea5..d0c4c4a 100644
--- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/Instructions.ipynb
+++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/Instructions.ipynb
@@ -4,14 +4,13 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "# SC22 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
+        "# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
         "\n",
-        "-   Time: Monday, 14 November 2022 8:30AM - 5PM CST\n",
-        "-   Location: D163, Kay Bailey Hutchison Convention Center Dallas\n",
+        "-   Time: Monday, 13 November 2023 8:30 - 17:00 MST\n",
+        "-   Location: 405, Denver Congress Centre, USA\n",
         "-   Program Link:\n",
-        "    https://sc22.supercomputing.org/presentation/?id=tut102&sess=sess196\n",
-        "\n",
-        "## Hands-On 10A: Device-initiated Communication with NVSHMEM\n",
+        "    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242\n",
+        "    \\## Hands-On 10A: Device-initiated Communication with NVSHMEM\n",
         "\n",
         "### Task: Using NVSHMEM device API\n",
         "\n",
@@ -86,7 +85,7 @@
         "    repository](https://github.com/NVIDIA/multi-gpu-programming-models)\n",
         "    implements the same strategy."
       ],
-      "id": "57748efe-edc3-44ae-9321-54c897d82a47"
+      "id": "3d1de1bf-0ebb-4709-bddc-9e34484b03bb"
     }
   ],
   "nbformat": 4,
diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/Instructions.md b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/Instructions.md
index 7b31752..2217166 100644
--- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/Instructions.md
+++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/Instructions.md
@@ -1,10 +1,9 @@
-# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale
+# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale
 
--   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST
--   Location: Hall Y8, Congress Center Hamburg, Germany
+-   Time: Monday, 13 November 2023 8:30 - 17:00 MST
+-   Location: 405, Denver Congress Centre, USA
 -   Program Link:
-    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==
-
+    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242
 ## Hands-On 10A: Device-initiated Communication with NVSHMEM
 
 ### Task: Using NVSHMEM device API
diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/Instructions.ipynb b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/Instructions.ipynb
index cd06caa..8c9afe1 100644
--- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/Instructions.ipynb
+++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/Instructions.ipynb
@@ -4,14 +4,13 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
+        "# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
         "\n",
-        "-   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST\n",
-        "-   Location: Hall Y8, Congress Center Hamburg, Germany\n",
+        "-   Time: Monday, 13 November 2023 8:30 - 17:00 MST\n",
+        "-   Location: 405, Denver Congress Centre, USA\n",
         "-   Program Link:\n",
-        "    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==\n",
-        "\n",
-        "## Hands-On 10B: Using CUDA Graphs\n",
+        "    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242\n",
+        "    \\## Hands-On 10B: Using CUDA Graphs\n",
         "\n",
         "### Task: Combining CUDA Graphs with NCCL for Inter-GPU Communication\n",
         "\n",
@@ -65,7 +64,7 @@
         "`make profile`. For `make run` and `make profile` the environment\n",
         "variable `NP` can be set to change the number of processes."
       ],
-      "id": "4865b2a9-edfd-40e8-879d-f503b5b30fd5"
+      "id": "bc28bb54-3dda-4b0a-9465-f6609051d0cc"
     }
   ],
   "nbformat": 4,
diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/Instructions.md b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/Instructions.md
index 2e3645a..b6e9534 100644
--- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/Instructions.md
+++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/Instructions.md
@@ -1,10 +1,9 @@
-# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale
+# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale
 
--   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST
--   Location: Hall Y8, Congress Center Hamburg, Germany
+-   Time: Monday, 13 November 2023 8:30 - 17:00 MST
+-   Location: 405, Denver Congress Centre, USA
 -   Program Link:
-    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==
-
+    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242
 ## Hands-On 10B: Using CUDA Graphs
 
 ### Task: Combining CUDA Graphs with NCCL for Inter-GPU Communication
diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Device-initiated_Communication_with_NVSHMEM/Instructions.ipynb b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Device-initiated_Communication_with_NVSHMEM/Instructions.ipynb
index ebc5ea5..d0c4c4a 100644
--- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Device-initiated_Communication_with_NVSHMEM/Instructions.ipynb
+++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Device-initiated_Communication_with_NVSHMEM/Instructions.ipynb
@@ -4,14 +4,13 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "# SC22 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
+        "# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
         "\n",
-        "-   Time: Monday, 14 November 2022 8:30AM - 5PM CST\n",
-        "-   Location: D163, Kay Bailey Hutchison Convention Center Dallas\n",
+        "-   Time: Monday, 13 November 2023 8:30 - 17:00 MST\n",
+        "-   Location: 405, Denver Congress Centre, USA\n",
         "-   Program Link:\n",
-        "    https://sc22.supercomputing.org/presentation/?id=tut102&sess=sess196\n",
-        "\n",
-        "## Hands-On 10A: Device-initiated Communication with NVSHMEM\n",
+        "    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242\n",
+        "    \\## Hands-On 10A: Device-initiated Communication with NVSHMEM\n",
         "\n",
         "### Task: Using NVSHMEM device API\n",
         "\n",
@@ -86,7 +85,7 @@
         "    repository](https://github.com/NVIDIA/multi-gpu-programming-models)\n",
         "    implements the same strategy."
       ],
-      "id": "57748efe-edc3-44ae-9321-54c897d82a47"
+      "id": "3d1de1bf-0ebb-4709-bddc-9e34484b03bb"
     }
   ],
   "nbformat": 4,
diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Device-initiated_Communication_with_NVSHMEM/Instructions.md b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Device-initiated_Communication_with_NVSHMEM/Instructions.md
index 7b31752..2217166 100644
--- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Device-initiated_Communication_with_NVSHMEM/Instructions.md
+++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Device-initiated_Communication_with_NVSHMEM/Instructions.md
@@ -1,10 +1,9 @@
-# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale
+# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale
 
--   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST
--   Location: Hall Y8, Congress Center Hamburg, Germany
+-   Time: Monday, 13 November 2023 8:30 - 17:00 MST
+-   Location: 405, Denver Congress Centre, USA
 -   Program Link:
-    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==
-
+    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242
 ## Hands-On 10A: Device-initiated Communication with NVSHMEM
 
 ### Task: Using NVSHMEM device API
diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Using_CUDA_Graphs/Instructions.ipynb b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Using_CUDA_Graphs/Instructions.ipynb
index cd06caa..8c9afe1 100644
--- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Using_CUDA_Graphs/Instructions.ipynb
+++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Using_CUDA_Graphs/Instructions.ipynb
@@ -4,14 +4,13 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
+        "# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale\n",
         "\n",
-        "-   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST\n",
-        "-   Location: Hall Y8, Congress Center Hamburg, Germany\n",
+        "-   Time: Monday, 13 November 2023 8:30 - 17:00 MST\n",
+        "-   Location: 405, Denver Congress Centre, USA\n",
         "-   Program Link:\n",
-        "    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==\n",
-        "\n",
-        "## Hands-On 10B: Using CUDA Graphs\n",
+        "    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242\n",
+        "    \\## Hands-On 10B: Using CUDA Graphs\n",
         "\n",
         "### Task: Combining CUDA Graphs with NCCL for Inter-GPU Communication\n",
         "\n",
@@ -65,7 +64,7 @@
         "`make profile`. For `make run` and `make profile` the environment\n",
         "variable `NP` can be set to change the number of processes."
       ],
-      "id": "4865b2a9-edfd-40e8-879d-f503b5b30fd5"
+      "id": "bc28bb54-3dda-4b0a-9465-f6609051d0cc"
     }
   ],
   "nbformat": 4,
diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Using_CUDA_Graphs/Instructions.md b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Using_CUDA_Graphs/Instructions.md
index 2e3645a..b6e9534 100644
--- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Using_CUDA_Graphs/Instructions.md
+++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Using_CUDA_Graphs/Instructions.md
@@ -1,10 +1,9 @@
-# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale
+# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale
 
--   Time: Sunday, 21 May 2023 9:00 - 18:00 CEST
--   Location: Hall Y8, Congress Center Hamburg, Germany
+-   Time: Monday, 13 November 2023 8:30 - 17:00 MST
+-   Location: 405, Denver Congress Centre, USA
 -   Program Link:
-    https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==
-
+    https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242
 ## Hands-On 10B: Using CUDA Graphs
 
 ### Task: Combining CUDA Graphs with NCCL for Inter-GPU Communication
diff --git a/11-L_Summary_Advanced/slides.pdf b/11-L_Summary_Advanced/slides.pdf
index 1914e74..20ec4d8 100644
Binary files a/11-L_Summary_Advanced/slides.pdf and b/11-L_Summary_Advanced/slides.pdf differ
diff --git a/README.md b/README.md
index 2deed2f..4bbc5c9 100644
--- a/README.md
+++ b/README.md
@@ -1,30 +1,40 @@
-# ISC23 Tutorial: Efficient Distributed GPU Programming for Exascale
+# SC23 Tutorial: Efficient Distributed GPU Programming for Exascale
 
 [![DOI](https://zenodo.org/badge/409504932.svg)](https://zenodo.org/badge/latestdoi/409504932)
 
 
-Repository with talks and exercises of our Efficient GPU Programming for Exascale tutorial, to be held at [ISC23](https://app.swapcard.com/widget/event/isc-high-performance-2023/planning/UGxhbm5pbmdfMTIyMDc5OA==).
+Repository with talks and exercises of our Efficient GPU Programming for Exascale tutorial, to be held at [SC23](https://sc23.supercomputing.org/presentation/?id=tut140&sess=sess242).
 
 ## Coordinates
 
-* Date: 21 May 2023
-* Occasion: ISC23 Tutorial
+* Date: 13 November 2023
+* Occasion: SC23 Tutorial
 * Tutors: Simon Garcia (SNL), Andreas Herten (JSC), Markus Hrywniak (NVIDIA), Jiri Kraus (NVIDIA), Lena Oden (Uni Hagen)
 
+
 ## Setup
 
 The tutorial is an interactive tutorial with introducing lectures and practical exercises to apply knowledge. The exercises have been derived from the Jacobi solver implementations available in [NVIDIA/multi-gpu-programming-models](https://github.com/NVIDIA/multi-gpu-programming-models).
 
+Walk-through:
+
+* Sign up at JuDoor: https://go.fzj.de/mg-jd
+* Open Jupyter JSC: https://jupyter-jsc.fz-juelich.de
+* Create new Jupyter instance on JUWELS, using training2332 account, on **LoginNodeBooster**
+* Source course environment: `source $PROJECT_training2332/env.sh`
+* Sync material: `jsc-material-sync`
+* Locally install NVIDIA Nsight Systems: https://developer.nvidia.com/nsight-systems
+
 Curriculum:
 
 1. Lecture: Tutorial Overview, Introduction to System + Onboarding *Andreas*
 2. Lecture: MPI-Distributed Computing with GPUs *Simon*
 3. Hands-on: Multi-GPU Parallelization
 4. Lecture: Performance / Debugging Tools *Markus*
-5. Lecture: Optimization Techniques for Multi-GPU Applications *Markus*
+5. Lecture: Optimization Techniques for Multi-GPU Applications *Simon*
 6. Hands-on: Overlap Communication and Computation with MPI
-7. Lecture: Overview of NCCL and NVSHMEN in MPI *Lena*
+7. Lecture: Overview of NCCL and NVSHMEN in MPI *Jiri*
 8. Hands-on: Using NCCL and NVSHMEM
-9. Lecture: Device-initiated Communication with NVSHMEM *Lena*
+9. Lecture: Device-initiated Communication with NVSHMEM *Jiri*
 10. Hands-on: Using Device-Initiated Communication with NVSHMEM
 11. Lecture: Conclusion and Outline of Advanced Topics *Andreas*