diff --git a/.github/workflows/sync_hf_space.yaml b/.github/workflows/sync_hf_space.yaml
index e1c09a5..68befff 100644
--- a/.github/workflows/sync_hf_space.yaml
+++ b/.github/workflows/sync_hf_space.yaml
@@ -1,6 +1,9 @@
 name: Sync to Hugging Face Space
 
 on:
+  release:
+    types: [published]
+
   workflow_dispatch:
 
 jobs:
@@ -10,3 +13,29 @@ jobs:
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0
+
+      - run: git clone https://${{ secrets.HF_USERNAME }}:${{ secrets.HF_TOKEN }}@huggingface.co/spaces/mozilla-ai/structured-qa hf-space
+
+      - run: |
+          cp demo/app.py hf-space/app.py
+          cp demo/Dockerfile hf-space/Dockerfile
+
+      - run: |
+          cd hf-space
+          git config user.name 'github-actions[bot]'
+          git config user.email 'github-actions[bot]@users.noreply.github.com'
+          git add .
+          git commit -m "Sync with https://github.com/mozilla-ai/structured-qa"
+
+      - name: Push to Hugging Face
+        run: |
+          cd hf-space
+          git push https://${{ secrets.HF_USERNAME }}:${{ secrets.HF_TOKEN }}@huggingface.co/spaces/mozilla-ai/structured-qa main
+
+      - name: Reboot Space
+        if: always()
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          pip install huggingface_hub
+          python demo/reboot_space.py
diff --git a/demo/Dockerfile b/demo/Dockerfile
new file mode 100644
index 0000000..8316a78
--- /dev/null
+++ b/demo/Dockerfile
@@ -0,0 +1,26 @@
+FROM nvidia/cuda:12.2.2-cudnn8-devel-ubuntu22.04
+
+RUN apt-get update && apt-get install --no-install-recommends -y \
+  build-essential \
+  python3.10 \
+  python3.10-dev \
+  python3-pip \
+  git \
+  && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+RUN useradd -m -u 1000 user
+
+USER user
+
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+
+WORKDIR $HOME/app
+
+RUN pip3 install https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp310-cp310-linux_x86_64.whl
+RUN pip3 install structured-qa
+
+COPY --chown=user . $HOME/app
+
+EXPOSE 8501
+ENTRYPOINT ["streamlit", "run", "app.py", "--server.enableXsrfProtection", "false"]
diff --git a/demo/README.md b/demo/README.md
new file mode 100644
index 0000000..31fd149
--- /dev/null
+++ b/demo/README.md
@@ -0,0 +1,11 @@
+---
+title: Structured Qa
+emoji: 📚
+colorFrom: green
+colorTo: purple
+sdk: docker
+app_port: 8501
+pinned: false
+license: apache-2.0
+short_description: Question answering for structured documents
+---
diff --git a/demo/reboot_space.py b/demo/reboot_space.py
new file mode 100644
index 0000000..597095a
--- /dev/null
+++ b/demo/reboot_space.py
@@ -0,0 +1,11 @@
+import os
+
+from huggingface_hub import HfApi
+
+if __name__ == "__main__":
+    api = HfApi()
+    api.restart_space(
+        repo_id="mozilla-ai/structured-qa",
+        token=os.getenv("HF_TOKEN"),
+        factory_reboot=True,
+    )
diff --git a/demo/run.sh b/demo/run.sh
deleted file mode 100755
index bad3e42..0000000
--- a/demo/run.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/bin/bash
-
-# Adapted from https://docs.streamlit.io/deploy/tutorials/kubernetes
-
-APP_PID=
-stopRunningProcess() {
-    # Based on https://linuxconfig.org/how-to-propagate-a-signal-to-child-processes-from-a-bash-script
-    if test ! "${APP_PID}" = '' && ps -p ${APP_PID} > /dev/null ; then
-       > /proc/1/fd/1 echo "Stopping ${COMMAND_PATH} which is running with process ID ${APP_PID}"
-
-       kill -TERM ${APP_PID}
-       > /proc/1/fd/1 echo "Waiting for ${COMMAND_PATH} to process SIGTERM signal"
-
-        wait ${APP_PID}
-        > /proc/1/fd/1 echo "All processes have stopped running"
-    else
-        > /proc/1/fd/1 echo "${COMMAND_PATH} was not started when the signal was sent or it has already been stopped"
-    fi
-}
-
-trap stopRunningProcess EXIT TERM
-
-streamlit run ${HOME}/document-to-podcast/demo/app.py &
-APP_ID=${!}
-
-wait ${APP_ID}
diff --git a/src/structured_qa/model_loaders.py b/src/structured_qa/model_loaders.py
index 62fbc05..8a6d587 100644
--- a/src/structured_qa/model_loaders.py
+++ b/src/structured_qa/model_loaders.py
@@ -1,7 +1,15 @@
-import torch
+import subprocess
 from llama_cpp import Llama
 
 
+def gpu_available():
+    try:
+        subprocess.check_output("nvidia-smi")
+        return True
+    except Exception:
+        return False
+
+
 def load_llama_cpp_model(model_id: str) -> Llama:
     """
     Loads the given model_id using Llama.from_pretrained.
@@ -22,6 +30,6 @@ def load_llama_cpp_model(model_id: str) -> Llama:
         filename=filename,
         n_ctx=0,  # 0 means that the model limit will be used, instead of the default (512) or other hardcoded value
         verbose=False,
-        n_gpu_layers=-1 if torch.cuda.is_available() else 0,
+        n_gpu_layers=-1 if gpu_available() else 0,
     )
     return model