diff --git a/.ci/docker/Dockerfile b/.ci/docker/Dockerfile new file mode 100644 index 00000000000..8aefbfe8f47 --- /dev/null +++ b/.ci/docker/Dockerfile @@ -0,0 +1,25 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} + +ENV DEBIAN_FRONTEND noninteractive + +# Install common dependencies (so that this step can be cached separately) +COPY ./common/install_base.sh install_base.sh +RUN bash ./install_base.sh && rm install_base.sh + +# Setup user +# TODO: figure out how to remove this part +COPY ./common/install_user.sh install_user.sh +RUN bash ./install_user.sh && rm install_user.sh + +COPY ./common/install_docs_reqs.sh install_docs_reqs.sh +RUN bash ./install_docs_reqs.sh && rm install_docs_reqs.sh + +COPY ./common/install_pip_requirements.sh install_pip_requirements.sh +COPY ./requirements.txt requirements.txt +RUN bash ./install_pip_requirements.sh && rm install_pip_requirements.sh + +RUN ln -s /usr/bin/python3 /usr/bin/python + +USER ci-user +CMD ["bash"] diff --git a/.ci/docker/build.sh b/.ci/docker/build.sh new file mode 100755 index 00000000000..f40c45fea3d --- /dev/null +++ b/.ci/docker/build.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +set -exu + +IMAGE_NAME="$1" +shift + +export UBUNTU_VERSION="22.04" +export CUDA_VERSION="12.6.3" + +export BASE_IMAGE="nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}" +echo "Building ${IMAGE_NAME} Docker image" + +docker build \ + --no-cache \ + --progress=plain \ + -f Dockerfile \ + --build-arg BASE_IMAGE="${BASE_IMAGE}" \ + "$@" \ + . diff --git a/.ci/docker/common/install_base.sh b/.ci/docker/common/install_base.sh new file mode 100644 index 00000000000..3100b550a89 --- /dev/null +++ b/.ci/docker/common/install_base.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# Based off of https://github.com/pytorch/pytorch/tree/b52e0bf131a4e55cd987176f9c5a8d2ad6783b4f/.ci/docker + +set -ex + +install_ubuntu() { + # Install common dependencies + apt-get update + # TODO: Some of these may not be necessary + apt-get install -y --no-install-recommends \ + build-essential \ + ca-certificates \ + cmake=3.22* \ + curl \ + git \ + wget \ + sudo \ + vim \ + jq \ + vim \ + unzip \ + gdb \ + rsync \ + libssl-dev \ + p7zip-full \ + libglfw3 \ + libglfw3-dev \ + sox \ + libsox-dev \ + libsox-fmt-all \ + python3-pip \ + python3-dev + + # Cleanup package manager + apt-get autoclean && apt-get clean + rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* +} + +# Install base packages depending on the base OS +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +case "$ID" in + ubuntu) + install_ubuntu + ;; + *) + echo "Unable to determine OS..." + exit 1 + ;; +esac diff --git a/.ci/docker/common/install_docs_reqs.sh b/.ci/docker/common/install_docs_reqs.sh new file mode 100644 index 00000000000..541c9976ad1 --- /dev/null +++ b/.ci/docker/common/install_docs_reqs.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# Based off of https://github.com/pytorch/pytorch/tree/b52e0bf131a4e55cd987176f9c5a8d2ad6783b4f/.ci/docker +set -ex + +apt-get update +apt-get install -y gpg-agent + +curl --retry 3 -sL https://deb.nodesource.com/setup_20.x | sudo -E bash - +sudo apt-get install -y nodejs + +curl --retry 3 -sS https://dl.yarnpkg.com/debian/pubkey.gpg | sudo apt-key add - +echo "deb https://dl.yarnpkg.com/debian/ stable main" | sudo tee /etc/apt/sources.list.d/yarn.list + +apt-get update +apt-get install -y --no-install-recommends yarn +yarn global add katex --prefix /usr/local + +sudo apt-get -y install doxygen + +apt-get autoclean && apt-get clean +rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* diff --git a/.ci/docker/common/install_pip_requirements.sh b/.ci/docker/common/install_pip_requirements.sh new file mode 100644 index 00000000000..a548d200462 --- /dev/null +++ b/.ci/docker/common/install_pip_requirements.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +set -ex + +# Install pip packages +pip install --upgrade pip +pip install -r ./requirements.txt diff --git a/.ci/docker/common/install_user.sh b/.ci/docker/common/install_user.sh new file mode 100644 index 00000000000..6deb62086bc --- /dev/null +++ b/.ci/docker/common/install_user.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# Copied from https://github.com/pytorch/executorch/blob/6e431355a554e5f84c3a05dfa2b981ead90c2b48/.ci/docker/common/install_user.sh#L1 + +set -ex + +# Same as ec2-user +echo "ci-user:x:1000:1000::/var/lib/ci-user:" >> /etc/passwd +echo "ci-user:x:1000:" >> /etc/group +# Needed on Focal or newer +echo "ci-user:*:19110:0:99999:7:::" >> /etc/shadow + +# Create $HOME +mkdir -p /var/lib/ci-user +chown ci-user:ci-user /var/lib/ci-user + +# Allow sudo +echo 'ci-user ALL=(ALL) NOPASSWD:ALL' > /etc/sudoers.d/ci-user + +# Test that sudo works +sudo -u ci-user sudo -v diff --git a/.ci/docker/requirements.txt b/.ci/docker/requirements.txt new file mode 100644 index 00000000000..964f9aad4ab --- /dev/null +++ b/.ci/docker/requirements.txt @@ -0,0 +1,78 @@ +# --extra-index-url https://download.pytorch.org/whl/cu117/index.html # Use this to run/publish tutorials against the latest binaries during the RC stage. Comment out after the release. Each release verify the correct cuda version. +# Refer to ./jenkins/build.sh for tutorial build instructions. + +# Sphinx dependencies +sphinx==7.2.6 +sphinx-gallery==0.19.0 +sphinx-reredirects==0.1.4 +sphinx_design==0.6.1 +docutils>=0.18.1,<0.21 +sphinx-copybutton==0.5.2 +sphinx_sitemap==2.7.1 +sphinxcontrib-mermaid==1.0.0 +sphinxcontrib.katex==0.9.10 +pypandoc==1.15 +pandocfilters==1.5.1 +markdown==3.8.2 + +# PyTorch Theme +-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@c2e38b37f3c432c610639f06d1d421c6df4c225c#egg=pytorch_sphinx_theme2 + +# Tutorial dependencies +tqdm==4.66.1 +numpy==1.24.4 +matplotlib +librosa +torch==2.8 +torchvision +torchdata +networkx +PyHamcrest +bs4 +awscliv2==2.1.1 +flask +spacy==3.4.1 +ray[tune]==2.7.2 +tensorboard +jinja2==3.1.3 +pytorch-lightning +torchx +torchrl==0.9.2 +tensordict==0.9.1 +# For ax_multiobjective_nas_tutorial.py +ax-platform>=0.4.0,<0.5.0 +nbformat>=5.9.2 +datasets +transformers +onnx +onnxscript>=0.2.2 +onnxruntime +evaluate +accelerate>=0.20.1 + +importlib-metadata==6.8.0 + +ipython + +sphinxcontrib.katex +# to run examples +boto3 +pandas +requests +scikit-image +scipy==1.11.1 +numba==0.57.1 +pillow==10.2.0 +wget +gym==0.26.2 +gym-super-mario-bros==7.4.0 +pyopengl +gymnasium[mujoco]==0.27.0 +timm +pygame==2.6.0 +pycocotools +semilearn==0.3.2 +torchao==0.10.0 +segment_anything==1.0 +torchrec==1.2.0; platform_system == "Linux" +fbgemm-gpu==1.2.0; platform_system == "Linux" diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 00000000000..4928e536acf --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,8 @@ +FROM mcr.microsoft.com/vscode/devcontainers/python:3.8 + +COPY requirements.txt /tmp/pip-tmp/ + +RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ + && apt-get install git gcc unzip make -y \ + && pip3 install --disable-pip-version-check --no-cache-dir -r /tmp/pip-tmp/requirements.txt \ + && rm -rf /tmp/pip-tmp diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000000..86fe20483c5 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,18 @@ +{ + "name": "PyTorch Tutorials", + "build": { + "context": "..", + "dockerfile": "Dockerfile", + "args": {} + }, + "settings": { + "terminal.integrated.shell.linux": "/bin/bash", + "workbench.startupEditor": "none", + "files.autoSave": "afterDelay", + "python.dataScience.enabled": true, + "python.dataScience.alwaysTrustNotebooks": true, + "python.insidersChannel": "weekly", + "python.showStartPage": false + }, + "extensions": ["ms-python.python", "lextudio.restructuredtext"] +} diff --git a/.devcontainer/requirements.txt b/.devcontainer/requirements.txt new file mode 100644 index 00000000000..2be1df895be --- /dev/null +++ b/.devcontainer/requirements.txt @@ -0,0 +1,31 @@ +# Refer to ./jenkins/build.sh for tutorial build instructions + +sphinx==1.8.2 +sphinx-gallery==0.3.1 +tqdm +numpy +matplotlib +torch +torchvision +torchtext +torchaudio +PyHamcrest +bs4 +awscli==1.16.35 +flask +spacy +ray[tune] + +# PyTorch Theme +-e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme + +ipython + +# to run examples +pandas +scikit-image +pillow==10.3.0 +wget + +# for codespaces env +pylint diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml new file mode 100644 index 00000000000..937417f4999 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -0,0 +1,60 @@ +name: 🐛 Bug Report +description: Create a tutorial bug report +title: "[BUG] -
tags
+ return f'{html.escape(item["c"][1])}
'
+ elif item['t'] == 'CodeBlock':
+ # Escape the code block and wrap it in tags
+ return f'{html.escape(item["c"][1])}
'
+ else:
+ return ''
+
+
+def process_admonitions(key, value, format, meta):
+ # Replace admonitions with proper HTML.
+ if key == 'Div':
+ [[ident, classes, keyvals], contents] = value
+ if 'note' in classes:
+ color = '#54c7ec'
+ label = 'NOTE:'
+ elif 'tip' in classes:
+ color = '#6bcebb'
+ label = 'TIP:'
+ elif 'warning' in classes:
+ color = '#e94f3b'
+ label = 'WARNING:'
+ else:
+ return
+
+ note_content = []
+ for block in contents:
+ if block.get('t') == 'Para':
+ for item in block['c']:
+ if item['t'] == 'Str':
+ note_content.append(Str(item['c']))
+ elif item['t'] == 'Space':
+ note_content.append(Space())
+ elif item['t'] == 'Link':
+ note_content.append(Link(*item['c']))
+ elif item['t'] == 'Code':
+ note_content.append(Code(*item['c']))
+ elif block.get('t') == 'CodeBlock':
+ note_content.append(CodeBlock(*block['c']))
+
+ note_content_md = ''.join(to_markdown(item) for item in note_content)
+ html_content = markdown.markdown(note_content_md)
+
+ return [{'t': 'RawBlock', 'c': ['html', f'{label}']}, {'t': 'RawBlock', 'c': ['html', '']}, {'t': 'RawBlock', 'c': ['html', html_content]}, {'t': 'RawBlock', 'c': ['html', '']}]
+ elif key == 'RawBlock':
+ # this is needed for the cells that have embedded video.
+ # We add a special tag to those: ``` {python, .jupyter-code-cell}
+ # The post-processing script then finds those and genrates separate
+ # code cells that can load video.
+ [format, content] = value
+ if format == 'html' and 'iframe' in content:
+ # Extract the video URL
+ video_url = content.split('src="')[1].split('"')[0]
+ # Create the Python code to display the video
+ python_code = f"""
+from IPython.display import display, HTML
+html_code = \"""
+{content}
+\"""
+display(HTML(html_code))
+"""
+
+ return {'t': 'CodeBlock', 'c': [['', ['python', 'jupyter-code-cell'], []], python_code]}
+
+
+def process_images(key, value, format, meta):
+ # Add https://pytorch.org/tutorials/ to images so that they
+ # load correctly in the notebook.
+ if key != 'Image':
+ return None
+ [ident, classes, keyvals], caption, [src, title] = value
+ if not src.startswith('http'):
+ while src.startswith('../'):
+ src = src[3:]
+ if src.startswith('/_static'):
+ src = src[1:]
+ src = 'https://pytorch.org/tutorials/' + src
+
+ return {'t': 'Image', 'c': [[ident, classes, keyvals], caption, [src, title]]}
+
+
+def process_grids(key, value, format, meta):
+ # Generate side by side grid cards. Only for the two-cards layout
+ # that we use in the tutorial template.
+ if key == 'Div':
+ [[ident, classes, keyvals], contents] = value
+ if 'grid' in classes:
+ columns = ['',
+ '']
+ column_num = 0
+ for block in contents:
+ if 't' in block and block['t'] == 'Div' and 'grid-item-card' in block['c'][0][1]:
+ item_html = ''
+ for item in block['c'][1]:
+ if item['t'] == 'Para':
+ item_html += '' + ''.join(to_markdown(i) for i in item['c']) + '
'
+ elif item['t'] == 'BulletList':
+ item_html += ''
+ for list_item in item['c']:
+ item_html += '- ' + ''.join(to_markdown(i) for i in list_item[0]['c']) + '
'
+ item_html += '
'
+ columns[column_num] += item_html
+ column_num = (column_num + 1) % 2
+ columns = [column + '' for column in columns]
+ return {'t': 'RawBlock', 'c': ['html', ''.join(columns)]}
+
+def is_code_block(item):
+ return item['t'] == 'Code' and 'octicon' in item['c'][1]
+
+
+def process_all(key, value, format, meta):
+ for transform in [process_admonitions, process_images, process_grids]:
+ new_value = transform(key, value, format, meta)
+ if new_value is not None:
+ break
+ return new_value
+
+
+if __name__ == "__main__":
+ toJSONFilter(process_all)
diff --git a/.jenkins/delete_html_file_with_runnable_code_removed.py b/.jenkins/delete_html_file_with_runnable_code_removed.py
new file mode 100644
index 00000000000..b84a0ecd92e
--- /dev/null
+++ b/.jenkins/delete_html_file_with_runnable_code_removed.py
@@ -0,0 +1,11 @@
+import sys
+import os
+
+html_file_path = sys.argv[1]
+
+with open(html_file_path, 'r', encoding='utf-8') as html_file:
+ html = html_file.read()
+
+if "%%%%%%RUNNABLE_CODE_REMOVED%%%%%%" in html:
+ print("Removing " + html_file_path)
+ os.remove(html_file_path)
diff --git a/.jenkins/download_data.py b/.jenkins/download_data.py
new file mode 100644
index 00000000000..939e63fc7a8
--- /dev/null
+++ b/.jenkins/download_data.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python3
+import hashlib
+import os
+
+from typing import Optional
+from urllib.request import urlopen, Request
+from pathlib import Path
+from zipfile import ZipFile
+
+REPO_BASE_DIR = Path(__file__).absolute().parent.parent
+DATA_DIR = REPO_BASE_DIR / "_data"
+BEGINNER_DATA_DIR = REPO_BASE_DIR / "beginner_source" / "data"
+INTERMEDIATE_DATA_DIR = REPO_BASE_DIR / "intermediate_source" / "data"
+ADVANCED_DATA_DIR = REPO_BASE_DIR / "advanced_source" / "data"
+PROTOTYPE_DATA_DIR = REPO_BASE_DIR / "unstable_source" / "data"
+FILES_TO_RUN = os.getenv("FILES_TO_RUN")
+
+
+def size_fmt(nbytes: int) -> str:
+ """Returns a formatted file size string"""
+ KB = 1024
+ MB = 1024 * KB
+ GB = 1024 * MB
+ if abs(nbytes) >= GB:
+ return f"{nbytes * 1.0 / GB:.2f} Gb"
+ elif abs(nbytes) >= MB:
+ return f"{nbytes * 1.0 / MB:.2f} Mb"
+ elif abs(nbytes) >= KB:
+ return f"{nbytes * 1.0 / KB:.2f} Kb"
+ return str(nbytes) + " bytes"
+
+
+def download_url_to_file(url: str,
+ dst: Optional[str] = None,
+ prefix: Optional[Path] = None,
+ sha256: Optional[str] = None) -> Path:
+ dst = dst if dst is not None else Path(url).name
+ dst = dst if prefix is None else str(prefix / dst)
+ if Path(dst).exists():
+ print(f"Skip downloading {url} as {dst} already exists")
+ return Path(dst)
+ file_size = None
+ u = urlopen(Request(url, headers={"User-Agent": "tutorials.downloader"}))
+ meta = u.info()
+ if hasattr(meta, 'getheaders'):
+ content_length = meta.getheaders("Content-Length")
+ else:
+ content_length = meta.get_all("Content-Length")
+ if content_length is not None and len(content_length) > 0:
+ file_size = int(content_length[0])
+ sha256_sum = hashlib.sha256()
+ with open(dst, "wb") as f:
+ while True:
+ buffer = u.read(32768)
+ if len(buffer) == 0:
+ break
+ sha256_sum.update(buffer)
+ f.write(buffer)
+ digest = sha256_sum.hexdigest()
+ if sha256 is not None and sha256 != digest:
+ Path(dst).unlink()
+ raise RuntimeError(f"Downloaded {url} has unexpected sha256sum {digest} should be {sha256}")
+ print(f"Downloaded {url} sha256sum={digest} size={size_fmt(file_size)}")
+ return Path(dst)
+
+
+def unzip(archive: Path, tgt_dir: Path) -> None:
+ with ZipFile(str(archive), "r") as zip_ref:
+ zip_ref.extractall(str(tgt_dir))
+
+
+def download_hymenoptera_data():
+ # transfer learning tutorial data
+ z = download_url_to_file("https://download.pytorch.org/tutorial/hymenoptera_data.zip",
+ prefix=DATA_DIR,
+ sha256="fbc41b31d544714d18dd1230b1e2b455e1557766e13e67f9f5a7a23af7c02209",
+ )
+ unzip(z, BEGINNER_DATA_DIR)
+
+
+def download_nlp_data() -> None:
+ # nlp tutorial data
+ z = download_url_to_file("https://download.pytorch.org/tutorial/data.zip",
+ prefix=DATA_DIR,
+ sha256="fb317e80248faeb62dc25ef3390ae24ca34b94e276bbc5141fd8862c2200bff5",
+ )
+ # This will unzip all files in data.zip to intermediate_source/data/ folder
+ unzip(z, INTERMEDIATE_DATA_DIR.parent)
+
+
+def download_dcgan_data() -> None:
+ # Download dataset for beginner_source/dcgan_faces_tutorial.py
+ z = download_url_to_file("https://s3.amazonaws.com/pytorch-tutorial-assets/img_align_celeba.zip",
+ prefix=DATA_DIR,
+ sha256="46fb89443c578308acf364d7d379fe1b9efb793042c0af734b6112e4fd3a8c74",
+ )
+ unzip(z, BEGINNER_DATA_DIR / "celeba")
+
+
+def download_lenet_mnist() -> None:
+ # Download model for beginner_source/fgsm_tutorial.py
+ download_url_to_file("https://docs.google.com/uc?export=download&id=1HJV2nUHJqclXQ8flKvcWmjZ-OU5DGatl",
+ prefix=BEGINNER_DATA_DIR,
+ dst="lenet_mnist_model.pth",
+ sha256="cb5f8e578aef96d5c1a2cc5695e1aa9bbf4d0fe00d25760eeebaaac6ebc2edcb",
+ )
+
+def download_gpu_quantization_torchao() -> None:
+ # Download SAM model checkpoint unstable_source/gpu_quantization_torchao_tutorial.py
+ download_url_to_file("https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth",
+ prefix=PROTOTYPE_DATA_DIR,
+ dst="sam_vit_h_4b8939.pth",
+ sha256="a7bf3b02f3ebf1267aba913ff637d9a2d5c33d3173bb679e46d9f338c26f262e",
+ )
+
+def main() -> None:
+ DATA_DIR.mkdir(exist_ok=True)
+ BEGINNER_DATA_DIR.mkdir(exist_ok=True)
+ ADVANCED_DATA_DIR.mkdir(exist_ok=True)
+ INTERMEDIATE_DATA_DIR.mkdir(exist_ok=True)
+ PROTOTYPE_DATA_DIR.mkdir(exist_ok=True)
+
+ if FILES_TO_RUN is None or "transfer_learning_tutorial" in FILES_TO_RUN:
+ download_hymenoptera_data()
+ nlp_tutorials = ["seq2seq_translation_tutorial", "char_rnn_classification_tutorial", "char_rnn_generation_tutorial"]
+ if FILES_TO_RUN is None or any(x in FILES_TO_RUN for x in nlp_tutorials):
+ download_nlp_data()
+ if FILES_TO_RUN is None or "dcgan_faces_tutorial" in FILES_TO_RUN:
+ download_dcgan_data()
+ if FILES_TO_RUN is None or "fgsm_tutorial" in FILES_TO_RUN:
+ download_lenet_mnist()
+ if FILES_TO_RUN is None or "gpu_quantization_torchao_tutorial" in FILES_TO_RUN:
+ download_gpu_quantization_torchao()
+
+if __name__ == "__main__":
+ main()
diff --git a/.jenkins/get_docker_tag.py b/.jenkins/get_docker_tag.py
new file mode 100644
index 00000000000..21c4a8f7089
--- /dev/null
+++ b/.jenkins/get_docker_tag.py
@@ -0,0 +1,18 @@
+import requests
+
+REQUEST_HEADERS = {
+ "Accept": "application/vnd.github.v3+json",
+}
+
+if __name__ == "__main__":
+ url = "https://api.github.com/repos/pytorch/pytorch/contents/.ci"
+
+ response = requests.get(url, headers=REQUEST_HEADERS)
+ docker_sha = None
+ for finfo in response.json():
+ if finfo["name"] == "docker":
+ docker_sha = finfo["sha"]
+ break
+ if docker_sha is None:
+ raise RuntimeError("Can't find sha sum of docker folder")
+ print(docker_sha)
diff --git a/.jenkins/get_files_to_run.py b/.jenkins/get_files_to_run.py
new file mode 100644
index 00000000000..bdf4562a827
--- /dev/null
+++ b/.jenkins/get_files_to_run.py
@@ -0,0 +1,106 @@
+from typing import Any, Dict, List, Optional, Tuple
+import json
+import os
+from pathlib import Path
+from remove_runnable_code import remove_runnable_code
+
+
+# Calculate repo base dir
+REPO_BASE_DIR = Path(__file__).absolute().parent.parent
+
+
+def get_all_files() -> List[str]:
+ sources = [x.relative_to(REPO_BASE_DIR) for x in REPO_BASE_DIR.glob("*_source/**/*.py") if 'data' not in x.parts]
+ return sorted([str(x) for x in sources])
+
+
+def read_metadata() -> Dict[str, Any]:
+ with (REPO_BASE_DIR / ".jenkins" / "metadata.json").open() as fp:
+ return json.load(fp)
+
+
+def calculate_shards(all_files: List[str], num_shards: int = 20) -> List[List[str]]:
+ sharded_files: List[Tuple[float, List[str]]] = [(0.0, []) for _ in range(num_shards)]
+ metadata = read_metadata()
+
+ def get_duration(file: str) -> int:
+ # tutorials not listed in the metadata.json file usually take
+ # <3min to run, so we'll default to 1min if it's not listed
+ return metadata.get(file, {}).get("duration", 60)
+
+ def get_needs_machine(file: str) -> Optional[str]:
+ return metadata.get(file, {}).get("needs", None)
+
+ def add_to_shard(i, filename):
+ shard_time, shard_jobs = sharded_files[i]
+ shard_jobs.append(filename)
+ sharded_files[i] = (
+ shard_time + get_duration(filename),
+ shard_jobs,
+ )
+
+ all_other_files = all_files.copy()
+ needs_multigpu = list(
+ filter(lambda x: get_needs_machine(x) == "linux.16xlarge.nvidia.gpu", all_files,)
+ )
+ needs_a10g = list(
+ filter(lambda x: get_needs_machine(x) == "linux.g5.4xlarge.nvidia.gpu", all_files,)
+ )
+ for filename in needs_multigpu:
+ # currently, the only job that has multigpu is the 0th worker,
+ # so we'll add all the jobs that need this machine to the 0th worker
+ add_to_shard(0, filename)
+ all_other_files.remove(filename)
+ for filename in needs_a10g:
+ # currently, workers 1-5 use linux.g5.4xlarge.nvidia.gpu (sm86, A10G),
+ # so we'll add all the jobs that need this machine to the 1st worker
+ add_to_shard(1, filename)
+ all_other_files.remove(filename)
+ sorted_files = sorted(all_other_files, key=get_duration, reverse=True,)
+
+ for filename in sorted_files:
+ min_shard_index = sorted(range(1, num_shards), key=lambda i: sharded_files[i][0])[
+ 0
+ ]
+ add_to_shard(min_shard_index, filename)
+ return [x[1] for x in sharded_files]
+
+
+def compute_files_to_keep(files_to_run: List[str]) -> List[str]:
+ metadata = read_metadata()
+ files_to_keep = list(files_to_run)
+ for file in files_to_run:
+ extra_files = metadata.get(file, {}).get("extra_files", [])
+ files_to_keep.extend(extra_files)
+ return files_to_keep
+
+
+def remove_other_files(all_files, files_to_keep) -> None:
+
+ for file in all_files:
+ if file not in files_to_keep:
+ remove_runnable_code(file, file)
+
+
+def parse_args() -> Any:
+ from argparse import ArgumentParser
+ parser = ArgumentParser("Select files to run")
+ parser.add_argument("--dry-run", action="store_true")
+ parser.add_argument("--num-shards", type=int, default=int(os.environ.get("NUM_WORKERS", "20")))
+ parser.add_argument("--shard-num", type=int, default=int(os.environ.get("WORKER_ID", "1")))
+ return parser.parse_args()
+
+
+def main() -> None:
+ args = parse_args()
+
+ all_files = get_all_files()
+ files_to_run = calculate_shards(all_files, num_shards=args.num_shards)[args.shard_num - 1]
+ if not args.dry_run:
+ remove_other_files(all_files, compute_files_to_keep(files_to_run))
+ stripped_file_names = [Path(x).stem for x in files_to_run]
+ print(" ".join(stripped_file_names))
+
+
+if __name__ == "__main__":
+ main()
diff --git a/.jenkins/get_sphinx_filenames.py b/.jenkins/get_sphinx_filenames.py
new file mode 100644
index 00000000000..b84267b48a3
--- /dev/null
+++ b/.jenkins/get_sphinx_filenames.py
@@ -0,0 +1,13 @@
+from pathlib import Path
+from typing import List
+
+from get_files_to_run import get_all_files
+from validate_tutorials_built import NOT_RUN
+
+
+def get_files_for_sphinx() -> List[str]:
+ all_py_files = get_all_files()
+ return [x for x in all_py_files if all(y not in x for y in NOT_RUN)]
+
+
+SPHINX_SHOULD_RUN = "|".join(get_files_for_sphinx())
diff --git a/.jenkins/insert_last_verified.py b/.jenkins/insert_last_verified.py
new file mode 100644
index 00000000000..b43ef8de8e8
--- /dev/null
+++ b/.jenkins/insert_last_verified.py
@@ -0,0 +1,160 @@
+import json
+import os
+import subprocess
+import sys
+from datetime import datetime
+
+from bs4 import BeautifulSoup
+
+
+json_file_path = "tutorials-review-data.json"
+
+# paths to skip from the post-processing script
+paths_to_skip = [
+ "beginner/examples_autograd/two_layer_net_custom_function", # not present in the repo
+ "beginner/examples_nn/two_layer_net_module", # not present in the repo
+ "beginner/examples_tensor/two_layer_net_numpy", # not present in the repo
+ "beginner/examples_tensor/two_layer_net_tensor", # not present in the repo
+ "beginner/examples_autograd/two_layer_net_autograd", # not present in the repo
+ "beginner/examples_nn/two_layer_net_optim", # not present in the repo
+ "beginner/examples_nn/two_layer_net_nn", # not present in the repo
+ "intermediate/coding_ddpg", # not present in the repo - will delete the carryover
+]
+# Mapping of source directories to build directories
+source_to_build_mapping = {
+ "beginner": "beginner_source",
+ "recipes": "recipes_source",
+ "distributed": "distributed",
+ "intermediate": "intermediate_source",
+ "prototype": "prototype_source",
+ "advanced": "advanced_source",
+ "": "", # root dir for index.rst
+}
+
+def get_git_log_date(file_path, git_log_args):
+ try:
+ result = subprocess.run(
+ ["git", "log"] + git_log_args + ["--", file_path],
+ capture_output=True,
+ text=True,
+ check=True,
+ )
+ if result.stdout:
+ date_str = result.stdout.splitlines()[0]
+ return datetime.strptime(date_str, "%a, %d %b %Y %H:%M:%S %z")
+ except subprocess.CalledProcessError:
+ pass
+ raise ValueError(f"Could not find date for {file_path}")
+
+def get_creation_date(file_path):
+ return get_git_log_date(file_path, ["--diff-filter=A", "--format=%aD"]).strftime("%b %d, %Y")
+
+
+def get_last_updated_date(file_path):
+ return get_git_log_date(file_path, ["-1", "--format=%aD"]).strftime("%b %d, %Y")
+
+# Try to find the source file with the given base path and the extensions .rst and .py
+def find_source_file(base_path):
+ for ext in [".rst", ".py"]:
+ source_file_path = base_path + ext
+ if os.path.exists(source_file_path):
+ return source_file_path
+ return None
+
+
+# Function to process a JSON file and insert the "Last Verified" information into the HTML files
+def process_json_file(build_dir , json_file_path):
+ with open(json_file_path, "r", encoding="utf-8") as json_file:
+ json_data = json.load(json_file)
+
+ for entry in json_data:
+ path = entry["Path"]
+ last_verified = entry["Last Verified"]
+ status = entry.get("Status", "")
+ if path in paths_to_skip:
+ print(f"Skipping path: {path}")
+ continue
+ if status in ["needs update", "not verified"]:
+ formatted_last_verified = "Not Verified"
+ elif last_verified:
+ try:
+ last_verified_date = datetime.strptime(last_verified, "%Y-%m-%d")
+ formatted_last_verified = last_verified_date.strftime("%b %d, %Y")
+ except ValueError:
+ formatted_last_verified = "Unknown"
+ else:
+ formatted_last_verified = "Not Verified"
+ if status == "deprecated":
+ formatted_last_verified += "Deprecated"
+
+ for build_subdir, source_subdir in source_to_build_mapping.items():
+ if path.startswith(build_subdir):
+ html_file_path = os.path.join(build_dir, path + ".html")
+ base_source_path = os.path.join(
+ source_subdir, path[len(build_subdir) + 1 :]
+ )
+ source_file_path = find_source_file(base_source_path)
+ break
+ else:
+ print(f"Warning: No mapping found for path {path}")
+ continue
+
+ if not os.path.exists(html_file_path):
+ print(
+ f"Warning: HTML file not found for path {html_file_path}."
+ "If this is a new tutorial, please add it to the audit JSON file and set the Verified status and todays's date."
+ )
+ continue
+
+ if not source_file_path:
+ print(f"Warning: Source file not found for path {base_source_path}.")
+ continue
+
+ created_on = get_creation_date(source_file_path)
+ last_updated = get_last_updated_date(source_file_path)
+
+ with open(html_file_path, "r", encoding="utf-8") as file:
+ soup = BeautifulSoup(file, "html.parser")
+ # Check if the tag with class "date-info-last-verified" already exists
+ existing_date_info = soup.find("p", {"class": "date-info-last-verified"})
+ if existing_date_info:
+ print(
+ f"Warning:
tag with class 'date-info-last-verified' already exists in {html_file_path}"
+ )
+ continue
+
+ h1_tag = soup.find("h1") # Find the h1 tag to insert the dates
+ if h1_tag:
+ date_info_tag = soup.new_tag("p", **{"class": "date-info-last-verified"})
+ date_info_tag["style"] = "color: #6c6c6d; font-size: small;"
+ # Add the "Created On", "Last Updated", and "Last Verified" information
+ date_info_tag.string = (
+ f"Created On: {created_on} | "
+ f"Last Updated: {last_updated} | "
+ f"Last Verified: {formatted_last_verified}"
+ )
+ # Insert the new tag after the
tag
+ h1_tag.insert_after(date_info_tag)
+ # Save back to the HTML.
+ with open(html_file_path, "w", encoding="utf-8") as file:
+ file.write(str(soup))
+ else:
+ print(f"Warning: tag not found in {html_file_path}")
+
+
+def main():
+ if len(sys.argv) < 2:
+ print("Error: Build directory not provided. Exiting.")
+ exit(1)
+ build_dir = sys.argv[1]
+ print(f"Build directory: {build_dir}")
+ process_json_file(build_dir , json_file_path)
+ print(
+ "Finished processing JSON file. Please check the output for any warnings. "
+ "Pages like `nlp/index.html` are generated only during the full `make docs` "
+ "or `make html` build. Warnings about these files when you run `make html-noplot` "
+ "can be ignored."
+ )
+
+if __name__ == "__main__":
+ main()
diff --git a/.jenkins/metadata.json b/.jenkins/metadata.json
new file mode 100644
index 00000000000..6e82d054b4e
--- /dev/null
+++ b/.jenkins/metadata.json
@@ -0,0 +1,76 @@
+{
+ "intermediate_source/ax_multiobjective_nas_tutorial.py": {
+ "extra_files": ["intermediate_source/mnist_train_nas.py"],
+ "duration": 2000
+ },
+ "beginner_source/dcgan_faces_tutorial.py": {
+ "duration": 2000
+ },
+ "intermediate_source/seq2seq_translation_tutorial.py": {
+ "duration": 1200
+ },
+ "beginner_source/hyperparameter_tuning_tutorial.py": {
+ "duration": 0
+ },
+ "advanced_source/dynamic_quantization_tutorial.py": {
+ "duration": 380
+ },
+ "beginner_source/chatbot_tutorial.py": {
+ "duration": 330
+ },
+ "intermediate_source/pipeline_tutorial.py": {
+ "duration": 320,
+ "needs": "linux.16xlarge.nvidia.gpu"
+ },
+ "beginner_source/blitz/data_parallel_tutorial.py": {
+ "needs": "linux.16xlarge.nvidia.gpu"
+ },
+ "intermediate_source/model_parallel_tutorial.py": {
+ "needs": "linux.16xlarge.nvidia.gpu"
+ },
+ "intermediate_source/torchrec_intro_tutorial.py": {
+ "needs": "linux.g5.4xlarge.nvidia.gpu"
+ },
+ "recipes_source/torch_export_aoti_python.py": {
+ "needs": "linux.g5.4xlarge.nvidia.gpu"
+ },
+ "advanced_source/pendulum.py": {
+ "needs": "linux.g5.4xlarge.nvidia.gpu",
+ "_comment": "need to be here for the compiling_optimizer_lr_scheduler.py to run."
+ },
+ "intermediate_source/torchvision_tutorial.py": {
+ "needs": "linux.g5.4xlarge.nvidia.gpu",
+ "_comment": "does not require a5g but needs to run before gpu_quantization_torchao_tutorial.py."
+ },
+ "advanced_source/coding_ddpg.py": {
+ "needs": "linux.g5.4xlarge.nvidia.gpu",
+ "_comment": "does not require a5g but needs to run before gpu_quantization_torchao_tutorial.py."
+ },
+ "recipes_source/compiling_optimizer_lr_scheduler.py": {
+ "needs": "linux.g5.4xlarge.nvidia.gpu"
+ },
+ "intermediate_source/torch_compile_tutorial.py": {
+ "needs": "linux.g5.4xlarge.nvidia.gpu"
+ },
+ "intermediate_source/torch_export_tutorial.py": {
+ "needs": "linux.g5.4xlarge.nvidia.gpu"
+ },
+ "intermediate_source/scaled_dot_product_attention_tutorial.py": {
+ "needs": "linux.g5.4xlarge.nvidia.gpu"
+ },
+ "intermediate_source/transformer_building_blocks.py": {
+ "needs": "linux.g5.4xlarge.nvidia.gpu"
+ },
+ "recipes_source/torch_compile_user_defined_triton_kernel_tutorial.py": {
+ "needs": "linux.g5.4xlarge.nvidia.gpu"
+ },
+ "recipes_source/regional_compilation.py": {
+ "needs": "linux.g5.4xlarge.nvidia.gpu"
+ },
+ "advanced_source/semi_structured_sparse.py": {
+ "needs": "linux.g5.4xlarge.nvidia.gpu"
+ },
+ "prototype_source/gpu_quantization_torchao_tutorial.py": {
+ "needs": "linux.g5.4xlarge.nvidia.gpu"
+ }
+}
diff --git a/.jenkins/post_process_notebooks.py b/.jenkins/post_process_notebooks.py
new file mode 100644
index 00000000000..d10eb5a1bcc
--- /dev/null
+++ b/.jenkins/post_process_notebooks.py
@@ -0,0 +1,97 @@
+import nbformat as nbf
+import os
+import re
+
+"""
+This post-processing script needs to run after the .ipynb files are
+generated. The script removes extraneous ```{=html} syntax from the
+admonitions and splits the cells that have video iframe into a
+separate code cell that can be run to load the video directly
+in the notebook. This script is included in build.sh.
+"""
+
+
+# Pattern to search ``` {.python .jupyter-code-cell}
+pattern = re.compile(r'(.*?)``` {\.python \.jupyter-code-cell}\n(.*?from IPython\.display import display, HTML.*?display\(HTML\(html_code\)\))\n```(.*)', re.DOTALL)
+
+
+def process_video_cell(notebook_path):
+ """
+ This function finds the code blocks with the
+ "``` {.python .jupyter-code-cell}" code bocks and slices them
+ into a separe code cell (instead of markdown) which allows to
+ load the video in the notebook. The rest of the content is placed
+ in a new markdown cell.
+ """
+ print(f'Processing file: {notebook_path}')
+ notebook = nbf.read(notebook_path, as_version=4)
+
+ # Iterate over markdown cells
+ for i, cell in enumerate(notebook.cells):
+ if cell.cell_type == 'markdown':
+ match = pattern.search(cell.source)
+ if match:
+ print(f'Match found in cell {i}: {match.group(0)[:100]}...')
+ # Extract the parts before and after the video code block
+ before_html_block = match.group(1)
+ code_block = match.group(2)
+
+ # Add a comment to run the cell to display the video
+ code_block = "# Run this cell to load the video\n" + code_block
+ # Create a new code cell
+ new_code_cell = nbf.v4.new_code_cell(source=code_block)
+
+ # Replace the original markdown cell with the part before the code block
+ cell.source = before_html_block
+
+ # Insert the new code cell after the current one
+ notebook.cells.insert(i+1, new_code_cell)
+ print(f'New code cell created with source: {new_code_cell.source}')
+
+ # If there is content after the HTML code block, create a new markdown cell
+ if len(match.group(3).strip()) > 0:
+ after_html_block = match.group(3)
+ new_markdown_cell = nbf.v4.new_markdown_cell(source=after_html_block)
+ # Create a new markdown cell and add the content after code block there
+ notebook.cells.insert(i+2, new_markdown_cell)
+
+ else:
+ # Remove ```{=html} from the code block
+ cell.source = remove_html_tag(cell.source)
+
+ nbf.write(notebook, notebook_path)
+
+
+def remove_html_tag(content):
+ """
+ Pandoc adds an extraneous ```{=html} ``` to raw HTML blocks which
+ prevents it from rendering correctly. This function removes
+ ```{=html} that we don't need.
+ """
+ content = re.sub(r'```{=html}\n\n```', '">', content)
+ content = re.sub(r'<\/div>\n```', '\n', content)
+ content = re.sub(r'```{=html}\n
\n```', '\n', content)
+ content = re.sub(r'```{=html}', '', content)
+ content = re.sub(r'\n```', '', content)
+ return content
+
+
+def walk_dir(downloads_dir):
+ """
+ Walk the dir and process all notebook files in
+ the _downloads directory and its subdirectories.
+ """
+ for root, dirs, files in os.walk(downloads_dir):
+ for filename in files:
+ if filename.endswith('.ipynb'):
+ process_video_cell(os.path.join(root, filename))
+
+
+def main():
+ downloads_dir = './docs/_downloads'
+ walk_dir(downloads_dir)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/.jenkins/remove_invisible_code_block_batch.sh b/.jenkins/remove_invisible_code_block_batch.sh
new file mode 100644
index 00000000000..5de9698de0f
--- /dev/null
+++ b/.jenkins/remove_invisible_code_block_batch.sh
@@ -0,0 +1,21 @@
+BUILDDIR=$1
+
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
+
+# Remove INVISIBLE_CODE_BLOCK from .html/.rst/.rst.txt/.ipynb/.py files
+for filename in $(find $BUILDDIR/beginner $BUILDDIR/intermediate $BUILDDIR/advanced -name '*.html'); do
+ echo "Removing INVISIBLE_CODE_BLOCK from " $filename
+ python $DIR/remove_invisible_code_block_from_html.py $filename $filename
+done
+for filename in $(find $BUILDDIR/_sources/beginner $BUILDDIR/_sources/intermediate $BUILDDIR/_sources/advanced -name '*.rst.txt'); do
+ echo "Removing INVISIBLE_CODE_BLOCK from " $filename
+ python $DIR/remove_invisible_code_block_from_rst_txt.py $filename $filename
+done
+for filename in $(find $BUILDDIR/_downloads -name '*.ipynb'); do
+ echo "Removing INVISIBLE_CODE_BLOCK from " $filename
+ python $DIR/remove_invisible_code_block_from_ipynb.py $filename $filename
+done
+for filename in $(find $BUILDDIR/_downloads -name '*.py'); do
+ echo "Removing INVISIBLE_CODE_BLOCK from " $filename
+ python $DIR/remove_invisible_code_block_from_py.py $filename $filename
+done
diff --git a/.jenkins/remove_invisible_code_block_from_html.py b/.jenkins/remove_invisible_code_block_from_html.py
new file mode 100644
index 00000000000..827b9802d91
--- /dev/null
+++ b/.jenkins/remove_invisible_code_block_from_html.py
@@ -0,0 +1,17 @@
+import sys
+from bs4 import BeautifulSoup
+
+html_file_path = sys.argv[1]
+output_file_path = sys.argv[2]
+
+with open(html_file_path, 'r', encoding='utf-8') as html_file:
+ html = html_file.read()
+html_soup = BeautifulSoup(html, 'html.parser')
+
+elems = html_soup.find_all("div", {"class": "highlight-default"})
+for elem in elems:
+ if "%%%%%%INVISIBLE_CODE_BLOCK%%%%%%" in str(elem):
+ elem.decompose()
+
+with open(output_file_path, "w", encoding='utf-8') as output_file:
+ output_file.write(str(html_soup))
diff --git a/.jenkins/remove_invisible_code_block_from_ipynb.py b/.jenkins/remove_invisible_code_block_from_ipynb.py
new file mode 100644
index 00000000000..69913efb050
--- /dev/null
+++ b/.jenkins/remove_invisible_code_block_from_ipynb.py
@@ -0,0 +1,18 @@
+import sys
+from bs4 import BeautifulSoup
+
+ipynb_file_path = sys.argv[1]
+output_file_path = sys.argv[2]
+
+with open(ipynb_file_path, 'r', encoding='utf-8') as ipynb_file:
+ ipynb_lines = ipynb_file.readlines()
+
+ipynb_out_lines = []
+
+for line in ipynb_lines:
+ if not '%%%%%%INVISIBLE_CODE_BLOCK%%%%%%' in line:
+ ipynb_out_lines.append(line)
+
+with open(output_file_path, "w", encoding='utf-8') as output_file:
+ for line in ipynb_out_lines:
+ output_file.write(line)
diff --git a/.jenkins/remove_invisible_code_block_from_py.py b/.jenkins/remove_invisible_code_block_from_py.py
new file mode 100644
index 00000000000..d39e5f4bf98
--- /dev/null
+++ b/.jenkins/remove_invisible_code_block_from_py.py
@@ -0,0 +1,25 @@
+import sys
+from bs4 import BeautifulSoup
+
+py_file_path = sys.argv[1]
+output_file_path = sys.argv[2]
+
+with open(py_file_path, 'r', encoding='utf-8') as py_file:
+ py_lines = py_file.readlines()
+
+py_out_lines = []
+
+in_invisible_block = False
+for line in py_lines:
+ if not in_invisible_block:
+ if '%%%%%%INVISIBLE_CODE_BLOCK%%%%%%' in line:
+ in_invisible_block = True
+ else:
+ py_out_lines.append(line)
+ else:
+ if '%%%%%%INVISIBLE_CODE_BLOCK%%%%%%' in line:
+ in_invisible_block = False
+
+with open(output_file_path, "w", encoding='utf-8') as output_file:
+ for line in py_out_lines:
+ output_file.write(line)
diff --git a/.jenkins/remove_invisible_code_block_from_rst_txt.py b/.jenkins/remove_invisible_code_block_from_rst_txt.py
new file mode 100644
index 00000000000..e6eb648e754
--- /dev/null
+++ b/.jenkins/remove_invisible_code_block_from_rst_txt.py
@@ -0,0 +1,19 @@
+import sys
+from bs4 import BeautifulSoup
+
+rst_txt_file_path = sys.argv[1]
+output_file_path = sys.argv[2]
+
+with open(rst_txt_file_path, 'r', encoding='utf-8') as rst_txt_file:
+ rst_txt = rst_txt_file.read()
+
+splits = rst_txt.split('.. code-block:: default\n\n\n # %%%%%%INVISIBLE_CODE_BLOCK%%%%%%\n')
+if len(splits) == 2:
+ code_before_invisible_block = splits[0]
+ code_after_invisible_block = splits[1].split(' # %%%%%%INVISIBLE_CODE_BLOCK%%%%%%\n')[1]
+ rst_txt_out = code_before_invisible_block + code_after_invisible_block
+else:
+ rst_txt_out = rst_txt
+
+with open(output_file_path, "w", encoding='utf-8') as output_file:
+ output_file.write(rst_txt_out)
diff --git a/.jenkins/remove_runnable_code.py b/.jenkins/remove_runnable_code.py
new file mode 100644
index 00000000000..037017d8d76
--- /dev/null
+++ b/.jenkins/remove_runnable_code.py
@@ -0,0 +1,58 @@
+import sys
+
+STATE_IN_MULTILINE_COMMENT_BLOCK_DOUBLE_QUOTE = "STATE_IN_MULTILINE_COMMENT_BLOCK_DOUBLE_QUOTE"
+STATE_IN_MULTILINE_COMMENT_BLOCK_SINGLE_QUOTE = "STATE_IN_MULTILINE_COMMENT_BLOCK_SINGLE_QUOTE"
+STATE_NORMAL = "STATE_NORMAL"
+
+
+def remove_runnable_code(python_file_path, output_file_path):
+ with open(python_file_path, 'r', encoding='utf-8') as file:
+ lines = file.readlines()
+ ret_lines = []
+ state = STATE_NORMAL
+ for line in lines:
+ if state == STATE_NORMAL:
+ if line.startswith('#'):
+ ret_lines.append(line)
+ state = STATE_NORMAL
+ elif ((line.startswith('"""') or line.startswith('r"""')) and
+ line.endswith('"""')):
+ ret_lines.append(line)
+ state = STATE_NORMAL
+ elif line.startswith('"""') or line.startswith('r"""'):
+ ret_lines.append(line)
+ state = STATE_IN_MULTILINE_COMMENT_BLOCK_DOUBLE_QUOTE
+ elif ((line.startswith("'''") or line.startswith("r'''")) and
+ line.endswith("'''")):
+ ret_lines.append(line)
+ state = STATE_NORMAL
+ elif line.startswith("'''") or line.startswith("r'''"):
+ ret_lines.append(line)
+ state = STATE_IN_MULTILINE_COMMENT_BLOCK_SINGLE_QUOTE
+ else:
+ ret_lines.append("\n")
+ state = STATE_NORMAL
+ elif state == STATE_IN_MULTILINE_COMMENT_BLOCK_DOUBLE_QUOTE:
+ if line.startswith('"""'):
+ ret_lines.append(line)
+ state = STATE_NORMAL
+ else:
+ ret_lines.append(line)
+ state = STATE_IN_MULTILINE_COMMENT_BLOCK_DOUBLE_QUOTE
+ elif state == STATE_IN_MULTILINE_COMMENT_BLOCK_SINGLE_QUOTE:
+ if line.startswith("'''"):
+ ret_lines.append(line)
+ state = STATE_NORMAL
+ else:
+ ret_lines.append(line)
+ state = STATE_IN_MULTILINE_COMMENT_BLOCK_SINGLE_QUOTE
+
+ ret_lines.append("\n# %%%%%%RUNNABLE_CODE_REMOVED%%%%%%")
+
+ with open(output_file_path, 'w', encoding='utf-8') as file:
+ for line in ret_lines:
+ file.write(line)
+
+
+if __name__ == "__main__":
+ remove_runnable_code(sys.argv[1], sys.argv[2])
diff --git a/.jenkins/replace_tutorial_html_content.py b/.jenkins/replace_tutorial_html_content.py
new file mode 100644
index 00000000000..587464cd99e
--- /dev/null
+++ b/.jenkins/replace_tutorial_html_content.py
@@ -0,0 +1,24 @@
+import sys
+
+noplot_html_file_path = sys.argv[1]
+hasplot_html_file_path = sys.argv[2]
+output_html_file_path = sys.argv[3]
+
+from bs4 import BeautifulSoup
+with open(noplot_html_file_path, 'r', encoding='utf-8') as noplot_html_file:
+ noplot_html = noplot_html_file.read()
+with open(hasplot_html_file_path, 'r', encoding='utf-8') as hasplot_html_file:
+ hasplot_html = hasplot_html_file.read()
+
+noplot_html_soup = BeautifulSoup(noplot_html, 'html.parser')
+elems = noplot_html_soup.find_all("div", {"class": "sphx-glr-example-title"})
+if len(elems) == 0:
+ print("No match found, not replacing HTML content in "+noplot_html_file_path)
+elif len(elems) == 1:
+ print("Match found in "+noplot_html_file_path+". Replacing its content.")
+ elem = elems[0]
+ elem.replace_with(BeautifulSoup(hasplot_html, 'html.parser').find_all("div", {"class": "sphx-glr-example-title"})[0])
+ with open(output_html_file_path, "w", encoding='utf-8') as output_html_file:
+ output_html_file.write(str(noplot_html_soup))
+else:
+ raise Exception("Found more than one match in "+noplot_html_file_path+". Aborting.")
diff --git a/.jenkins/test_files_to_run.py b/.jenkins/test_files_to_run.py
new file mode 100644
index 00000000000..b4842a7dd75
--- /dev/null
+++ b/.jenkins/test_files_to_run.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python
+from get_files_to_run import get_all_files, calculate_shards
+from unittest import TestCase, main
+from functools import reduce
+
+class TestSharding(TestCase):
+ def test_no_sharding(self):
+ all_files=get_all_files()
+ sharded_files = calculate_shards(all_files, 1)
+ self.assertSetEqual(set(all_files), set(sharded_files[0]))
+
+ def test_sharding(self, num_shards=20):
+ all_files=get_all_files()
+ sharded_files = map(set, calculate_shards(all_files, num_shards))
+ self.assertSetEqual(set(all_files), reduce(lambda x,y: x.union(y), sharded_files, set()))
+
+
+
+if __name__ == "__main__":
+ main()
diff --git a/.jenkins/validate_tutorials_built.py b/.jenkins/validate_tutorials_built.py
new file mode 100644
index 00000000000..75dd51dd789
--- /dev/null
+++ b/.jenkins/validate_tutorials_built.py
@@ -0,0 +1,84 @@
+from pathlib import Path
+from typing import List
+
+from bs4 import BeautifulSoup
+
+REPO_ROOT = Path(__file__).parent.parent
+
+# For every tutorial on this list, we should determine if it is ok to not run the tutorial (add a comment after
+# the file name to explain why, like intro.html), or fix the tutorial and remove it from this list).
+
+NOT_RUN = [
+ "beginner_source/basics/intro", # no code
+ "beginner_source/introyt/introyt_index", # no code
+ "beginner_source/onnx/intro_onnx",
+ "beginner_source/profiler",
+ "beginner_source/saving_loading_models",
+ "beginner_source/introyt/captumyt",
+ "beginner_source/examples_nn/polynomial_module",
+ "beginner_source/examples_nn/dynamic_net",
+ "beginner_source/examples_nn/polynomial_optim",
+ "beginner_source/examples_autograd/polynomial_autograd",
+ "beginner_source/examples_autograd/polynomial_custom_function",
+ "intermediate_source/dqn_with_rnn_tutorial", #not working on 2.8 release reenable after 3514
+ "intermediate_source/mnist_train_nas", # used by ax_multiobjective_nas_tutorial.py
+ "intermediate_source/torch_compile_conv_bn_fuser",
+ "intermediate_source/_torch_export_nightly_tutorial", # does not work on release
+ "advanced_source/usb_semisup_learn", # fails with CUDA OOM error, should try on a different worker
+ "unstable_source/gpu_direct_storage", # requires specific filesystem + GPUDirect Storage to be set up
+ "recipes_source/recipes/tensorboard_with_pytorch",
+ "recipes_source/recipes/what_is_state_dict",
+ "recipes_source/recipes/profiler_recipe",
+ "recipes_source/recipes/warmstarting_model_using_parameters_from_a_different_model",
+ "recipes_source/recipes/benchmark",
+ "recipes_source/recipes/tuning_guide",
+ "recipes_source/recipes/zeroing_out_gradients",
+ "recipes_source/recipes/defining_a_neural_network",
+ "recipes_source/recipes/timer_quick_start",
+ "recipes_source/recipes/amp_recipe",
+ "recipes_source/recipes/Captum_Recipe",
+ "intermediate_source/tensorboard_profiler_tutorial", # reenable after 2.0 release.
+ "advanced_source/semi_structured_sparse", # reenable after 3303 is fixed.
+ "intermediate_source/torchrec_intro_tutorial.py", #failing with 2.8 reenable after 3498
+]
+
+def tutorial_source_dirs() -> List[Path]:
+ return [
+ p.relative_to(REPO_ROOT).with_name(p.stem[:-7])
+ for p in REPO_ROOT.glob("*_source")
+ ]
+
+
+def main() -> None:
+ docs_dir = REPO_ROOT / "docs"
+ html_file_paths = []
+ for tutorial_source_dir in tutorial_source_dirs():
+ glob_path = f"{tutorial_source_dir}/**/*.html"
+ html_file_paths += docs_dir.glob(glob_path)
+
+ should_not_run = [f'{x.replace("_source", "")}.html' for x in NOT_RUN]
+ did_not_run = []
+ for html_file_path in html_file_paths:
+ with open(html_file_path, "r", encoding="utf-8") as html_file:
+ html = html_file.read()
+ html_soup = BeautifulSoup(html, "html.parser")
+ elems = html_soup.find_all("p", {"class": "sphx-glr-timing"})
+ for elem in elems:
+ if (
+ "Total running time of the script: ( 0 minutes 0.000 seconds)"
+ in elem.text
+ and not any(html_file_path.match(file) for file in should_not_run)
+ ):
+ did_not_run.append(html_file_path.as_posix())
+
+ if len(did_not_run) != 0:
+ raise RuntimeError(
+ "The following file(s) are not known bad but ran in 0.000 sec, meaning that any "
+ + "python code in this tutorial probably didn't run:\n{}".format(
+ "\n".join(did_not_run)
+ )
+ )
+
+
+if __name__ == "__main__":
+ main()
diff --git a/.lintrunner.toml b/.lintrunner.toml
new file mode 100644
index 00000000000..d3a1cbd9885
--- /dev/null
+++ b/.lintrunner.toml
@@ -0,0 +1,225 @@
+merge_base_with = "origin/main"
+
+# 4805a6ead6f1e7f32351056e2602be4e908f69b7 is from pytorch/pytorch main branch 2025-07-16
+
+[[linter]]
+code = 'SPACES'
+include_patterns = ['**']
+exclude_patterns = [
+ "_static/**/*", # Contains some files that should usually not be linted
+ # All files below this should be checked and either removed from the
+ # exclusion list by fixing them or have a reason to be excluded.
+ "advanced_source/coding_ddpg.py",
+ "advanced_source/cpp_autograd.rst",
+ "advanced_source/cpp_custom_ops.rst",
+ "advanced_source/generic_join.rst",
+ "advanced_source/neural_style_tutorial.py",
+ "advanced_source/pendulum.py",
+ "advanced_source/privateuseone.rst",
+ "advanced_source/semi_structured_sparse.py",
+ "advanced_source/sharding.rst",
+ "advanced_source/torch_script_custom_classes/custom_class_project/custom_test.py",
+ "advanced_source/transformer__timeseries_cpp_tutorial/transformer_timeseries.cpp",
+ "advanced_source/usb_semisup_learn.py",
+ "beginner_source/blitz/README.txt",
+ "beginner_source/blitz/neural_networks_tutorial.py",
+ "beginner_source/dcgan_faces_tutorial.py",
+ "beginner_source/ddp_series_fault_tolerance.rst",
+ "beginner_source/ddp_series_theory.rst",
+ "beginner_source/examples_nn/polynomial_module.py",
+ "beginner_source/examples_nn/polynomial_nn.py",
+ "beginner_source/hta_intro_tutorial.rst",
+ "beginner_source/hta_trace_diff_tutorial.rst",
+ "beginner_source/hybrid_frontend/README.txt",
+ "beginner_source/hybrid_frontend_tutorial.rst",
+ "beginner_source/hyperparameter_tuning_tutorial.py",
+ "beginner_source/introyt/README.txt",
+ "beginner_source/introyt/autogradyt_tutorial.py",
+ "beginner_source/introyt/captumyt.py",
+ "beginner_source/introyt/introyt1_tutorial.py",
+ "beginner_source/introyt/modelsyt_tutorial.py",
+ "beginner_source/introyt/tensorboardyt_tutorial.py",
+ "beginner_source/introyt/tensors_deeper_tutorial.py",
+ "beginner_source/introyt/trainingyt.py",
+ "beginner_source/knowledge_distillation_tutorial.py",
+ "beginner_source/nlp/sequence_models_tutorial.py",
+ "beginner_source/onnx/export_control_flow_model_to_onnx_tutorial.py",
+ "beginner_source/onnx/onnx_registry_tutorial.py",
+ "beginner_source/pytorch_with_examples.rst",
+ "beginner_source/saving_loading_models.py",
+ "beginner_source/template_tutorial.py",
+ "beginner_source/transfer_learning_tutorial.py",
+ "intermediate_source/TCPStore_libuv_backend.rst",
+ "intermediate_source/ax_multiobjective_nas_tutorial.py",
+ "intermediate_source/compiled_autograd_tutorial.rst",
+ "intermediate_source/ddp_series_multinode.rst",
+ "intermediate_source/dqn_with_rnn_tutorial.py",
+ "intermediate_source/fx_profiling_tutorial.py",
+ "intermediate_source/inductor_debug_cpu.py",
+ "intermediate_source/jacobians_hessians.py",
+ "intermediate_source/optimizer_step_in_backward_tutorial.py",
+ "intermediate_source/per_sample_grads.py",
+ "intermediate_source/pruning_tutorial.py",
+ "intermediate_source/reinforcement_q_learning.py",
+ "intermediate_source/tensorboard_profiler_tutorial.py",
+ "intermediate_source/torch_compile_tutorial.py",
+ "intermediate_source/transformer_building_blocks.py",
+ "unstable_source/README.md",
+ "unstable_source/README.txt",
+ "unstable_source/gpu_direct_storage.py",
+ "unstable_source/inductor_cpp_wrapper_tutorial.rst",
+ "unstable_source/inductor_windows.rst",
+ "unstable_source/maskedtensor_advanced_semantics.py",
+ "unstable_source/max_autotune_on_CPU_tutorial.rst",
+ "unstable_source/vmap_recipe.py",
+ "recipes_source/README.txt",
+ "recipes_source/compiling_optimizer.rst",
+ "recipes_source/compiling_optimizer_lr_scheduler.py",
+ "recipes_source/distributed_optim_torchscript.rst",
+ "recipes_source/foreach_map.py",
+ "recipes_source/profile_with_itt.rst",
+ "recipes_source/recipes/Captum_Recipe.py",
+ "recipes_source/recipes/benchmark.py",
+ "recipes_source/recipes/changing_default_device.py",
+ "recipes_source/recipes/defining_a_neural_network.py",
+ "recipes_source/recipes/tensorboard_with_pytorch.py",
+ "recipes_source/recipes/timer_quick_start.py",
+ "recipes_source/recipes/tuning_guide.py",
+ "recipes_source/recipes/warmstarting_model_using_parameters_from_a_different_model.py",
+ "recipes_source/recipes/what_is_state_dict.py",
+ "recipes_source/torch_compile_caching_tutorial.rst",
+ "recipes_source/torch_compile_torch_function_modes.py",
+ "recipes_source/torch_compile_user_defined_triton_kernel_tutorial.py",
+ "recipes_source/torch_compiler_set_stance_tutorial.py",
+ "recipes_source/torch_export_aoti_python.py",
+ "recipes_source/xeon_run_cpu.rst",
+ "advanced_source/cpp_export.rst",
+ "advanced_source/torch-script-parallelism.rst",
+ "advanced_source/torch_script_custom_classes.rst",
+ "advanced_source/torch_script_custom_ops.rst",
+ "recipes_source/torchscript_inference.rst",
+]
+init_command = [
+ 'python3',
+ 'tools/linter/adapters/run_from_link.py',
+ '--lint-name=grep_linter.py',
+ '--lint-link=https://raw.githubusercontent.com/pytorch/pytorch/4805a6ead6f1e7f32351056e2602be4e908f69b7/tools/linter/adapters/grep_linter.py',
+ '--',
+ '--dry-run={{DRYRUN}}',
+]
+command = [
+ 'python3',
+ 'tools/linter/adapters/run_from_link.py',
+ '--run-lint',
+ '--lint-name=grep_linter.py',
+ '--',
+ '--pattern=[[:blank:]]$',
+ '--linter-name=SPACES',
+ '--error-name=trailing spaces',
+ '--replace-pattern=s/[[:blank:]]+$//',
+ """--error-description=\
+ This line has trailing spaces; please remove them.\
+ """,
+ '--',
+ '@{{PATHSFILE}}'
+]
+
+[[linter]]
+code = 'TABS'
+include_patterns = ['**']
+exclude_patterns = [
+ "_static/**/*", # Contains some files that should usually not be linted
+ ".lintrunner.toml", # Ironically needs to contain the tab character to find in other files
+ "Makefile", # Wants tabs for indentationo
+ # All files below this should be checked and either removed from the
+ # exclusion list by fixing them or have a reason to be excluded.
+ "advanced_source/README.txt",
+ "advanced_source/cpp_frontend.rst",
+ "advanced_source/torch_script_custom_ops.rst",
+ "beginner_source/README.txt",
+ "beginner_source/basics/tensorqs_tutorial.py",
+ "beginner_source/blitz/README.txt",
+ "beginner_source/blitz/tensor_tutorial.py",
+ "beginner_source/hybrid_frontend/README.txt",
+ "beginner_source/nlp/README.txt",
+ "beginner_source/nlp/pytorch_tutorial.py",
+ "intermediate_source/README.txt",
+ "intermediate_source/TP_tutorial.rst",
+ "intermediate_source/inductor_debug_cpu.py",
+ "unstable_source/README.txt",
+ "recipes_source/README.txt",
+ "recipes_source/recipes/README.txt",
+ "recipes_source/xeon_run_cpu.rst",
+]
+init_command = [
+ 'python3',
+ 'tools/linter/adapters/run_from_link.py',
+ '--lint-name=grep_linter.py',
+ '--lint-link=https://raw.githubusercontent.com/pytorch/pytorch/4805a6ead6f1e7f32351056e2602be4e908f69b7/tools/linter/adapters/grep_linter.py',
+ '--',
+ '--dry-run={{DRYRUN}}',
+]
+command = [
+ 'python3',
+ 'tools/linter/adapters/run_from_link.py',
+ '--run-lint',
+ '--lint-name=grep_linter.py',
+ '--',
+ # @lint-ignore TXT2
+ '--pattern= ',
+ '--linter-name=TABS',
+ '--error-name=saw some tabs',
+ '--replace-pattern=s/\t/ /',
+ """--error-description=\
+ This line has tabs; please replace them with spaces.\
+ """,
+ '--',
+ '@{{PATHSFILE}}'
+]
+
+[[linter]]
+code = 'NEWLINE'
+include_patterns=['**']
+exclude_patterns=[
+ "_static/**/*", # Contains some files that should usually not be linted
+ # All files below this should be checked and either removed from the
+ # exclusion list by fixing them or have a reason to be excluded.
+ "advanced_source/extend_dispatcher.rst",
+ "advanced_source/neural_style_tutorial.py",
+ "advanced_source/sharding.rst",
+ "advanced_source/torch_script_custom_classes/custom_class_project/custom_test.py",
+ "advanced_source/transformer__timeseries_cpp_tutorial/transformer_timeseries.cpp",
+ "beginner_source/blitz/README.txt",
+ "beginner_source/dcgan_faces_tutorial.py",
+ "beginner_source/hta_trace_diff_tutorial.rst",
+ "beginner_source/hybrid_frontend/README.txt",
+ "beginner_source/nlp/pytorch_tutorial.py",
+ "beginner_source/template_tutorial.py",
+ "beginner_source/transfer_learning_tutorial.py",
+ "intermediate_source/custom_function_conv_bn_tutorial.py",
+ "intermediate_source/custom_function_double_backward_tutorial.rst",
+ "intermediate_source/forced_alignment_with_torchaudio_tutorial.rst",
+ "intermediate_source/nlp_from_scratch_index.rst",
+ "intermediate_source/pipeline_tutorial.rst",
+ "recipes_source/README.txt",
+ "recipes_source/script_optimized.rst",
+ "recipes_source/torch_compile_caching_configuration_tutorial.rst",
+ "recipes_source/torch_compile_caching_tutorial.rst",
+]
+init_command = [
+ 'python3',
+ 'tools/linter/adapters/run_from_link.py',
+ '--lint-name=newlines_linter.py',
+ '--lint-link=https://raw.githubusercontent.com/pytorch/pytorch/4805a6ead6f1e7f32351056e2602be4e908f69b7/tools/linter/adapters/newlines_linter.py',
+ '--',
+ '--dry-run={{DRYRUN}}',
+]
+command = [
+ 'python3',
+ 'tools/linter/adapters/run_from_link.py',
+ '--run-lint',
+ '--lint-name=newlines_linter.py',
+ '--',
+ '@{{PATHSFILE}}',
+]
+is_formatter = true
diff --git a/.lycheeignore b/.lycheeignore
new file mode 100644
index 00000000000..fc1e3f1fa85
--- /dev/null
+++ b/.lycheeignore
@@ -0,0 +1,17 @@
+# Used for links to be ignored during the link check.
+# Add link to file along with comment as to why it should be ignored
+
+#Example link in some of the tutorials that should be ignored
+file:///f:/libtmp/some_file
+
+#Ignore links with "file:///" to catch any other example links
+file:\/\/\/.*
+
+# Ignore colab link in the setting of conf.py
+https://pytorch.org/tutorials/beginner/colab/n
+
+# Ignore local host link from intermediate_source/tensorboard_tutorial.rst
+http://localhost:6006
+
+# Ignore local host link from advanced_source/cpp_frontend.rst
+https://www.uber.com/blog/deep-neuroevolution/
diff --git a/.pyspelling.yml b/.pyspelling.yml
new file mode 100644
index 00000000000..bce797e6559
--- /dev/null
+++ b/.pyspelling.yml
@@ -0,0 +1,163 @@
+spellchecker: aspell
+matrix:
+- name: python
+ sources:
+ - "**/*.py"
+ dictionary:
+ wordlists:
+ - en-wordlist.txt
+ pipeline:
+ - pyspelling.filters.python:
+ group_comments: true
+ - pyspelling.filters.context:
+ context_visible_first: true
+ delimiters:
+ # Exclude figure rST tags
+ - open: '\.\.\s+(figure|literalinclude|math|image|grid)::'
+ close: '\n'
+ # Exclude roles:
+ - open: ':(?:(class|py:mod|mod|func|meth|obj)):`'
+ content: '[^`]*'
+ close: '`'
+ # Exclude reStructuredText hyperlinks
+ - open: '\s'
+ content: '\w*'
+ close: '_'
+ # Exclude raw directive
+ - open: '\.\. (raw)::.*$\n*'
+ close: '\n'
+ # Exclude Python coding directives
+ - open: '-\*- coding:'
+ close: '\n'
+ # Exclude Authors:
+ - open: 'Author(|s):'
+ close: '\n'
+ # Exclude .rst directives:
+ - open: ':math:`.*`'
+ close: ' '
+ # Ignore multiline content in codeblock
+ - open: '(?s)^::\n\n '
+ close: '^\n'
+ # Ignore reStructuredText block directives
+ - open: '\.\. (code-block|math)::.*$\n*'
+ content: '(?P(^(?P[ ]+).*$\n))(?P(^([ \t]+.*|[ \t]*)$\n)*)'
+ close: '(^(?![ \t]+.*$))'
+ # Ignore references like "[1] Author: Title"
+ - open: '\[\d\]'
+ close: '\n'
+ - pyspelling.filters.markdown:
+ - pyspelling.filters.html:
+ ignores:
+ - code
+ - pre
+ - pyspelling.filters.url:
+- name: reST
+ sources:
+ - "**/*.rst"
+ dictionary:
+ wordlists:
+ - en-wordlist.txt
+ pipeline:
+ - pyspelling.filters.text:
+ - pyspelling.filters.context:
+ context_visible_first: true
+ delimiters:
+ # Ignore text between inline back ticks
+ - open: '(div style|iframe).*'
+ close: '\n'
+ - open: '(- )?(?P`+)'
+ close: '(?P=open)'
+ - open: ':figure:.*'
+ close: '\n'
+ # Ignore reStructuredText roles
+ - open: ':(?:(class|file|func|math|ref|octicon|meth|obj)):`'
+ content: '[^`]*'
+ close: '`'
+ - open: ':width:'
+ close: '$'
+ # Exclude raw directive
+ - open: '\.\. (raw|grid-item-card|galleryitem|includenodoc)::.*$\n*'
+ close: '\n'
+ # Ignore reStructuredText literals
+ - open: '::$'
+ close: '(?P(?:((?P[ ]+).*$)|(\n))+)'
+ # Ignore reStructuredText hyperlinks
+ - open: '\s'
+ content: '\w*'
+ close: '_'
+ # Ignore hyperlink in the DDP tutorials
+ - open: '`.*'
+ close: '`__'
+ # Ignore reStructuredText header ---
+ - open: '^'
+ content: '--*'
+ close: '$'
+ # Ignore reStructuredText header '''
+ - open: '^'
+ content: '''''*'
+ close: '$'
+ # Ignore reStructuredText block directives
+ - open: '\.\. (code-block|math|table)::.*$\n*'
+ content: '(?P(^(?P[ ]+).*$\n))(?P(^([ \t]+.*|[ \t]*)$\n)*)'
+ close: '(^(?![ \t]+.*$))'
+ - open: '\.\. (raw)::.*$\n*'
+ close: '^\s*$'
+ # Ignore reStructuredText substitution definitions
+ - open: '^\.\. \|[^|]+\|'
+ close: '$'
+ # Ignore reStructuredText substitutions
+ - open: '\|'
+ content: '[^|]*'
+ close: '\|_?'
+ # Ignore reStructuredText toctree
+ - open: '\.\.\s+toctree::'
+ close: '(?P(?:((?P[ ]+).*$)|(\n))+)'
+ # Ignore directives
+ - open: '\.\.\s+(image|include|only)::'
+ close: '$'
+ - pyspelling.filters.url:
+- name: markdown
+ sources:
+ - '**/*.md'
+ dictionary:
+ wordlists:
+ - en-wordlist.txt
+ pipeline:
+ - pyspelling.filters.markdown:
+ markdown_extensions:
+ - markdown.extensions.extra:
+ - markdown.extensions.admonition:
+ - markdown.extensions.codehilite:
+ - markdown.extensions.meta:
+ - markdown.extensions.tables:
+ - markdown.extensions.toc:
+ - pyspelling.filters.html:
+ comments: false
+ ignores:
+ - code
+ - pre
+ - tt
+ - img
+ - a
+ - table
+ - thead
+ - tbody
+ - th
+ - tr
+ - td
+ - pyspelling.filters.context:
+ context_visible_first: true
+ delimiters:
+ # Ignore code blocks
+ - open: '```[a-z]*\n'
+ close: '```\n'
+ # Ignore inline code
+ - open: '`'
+ close: '`'
+ # Ignore links
+ - open: '\[([^]]*)\]'
+ close: '\([^)]*\)'
+ # Ignore HTML comments
+ - open: ''
+ - pyspelling.filters.url:
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 00000000000..b91e23b17c0
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,76 @@
+# Code of Conduct
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as
+contributors and maintainers pledge to make participation in our project and
+our community a harassment-free experience for everyone, regardless of age, body
+size, disability, ethnicity, sex characteristics, gender identity and expression,
+level of experience, education, socio-economic status, nationality, personal
+appearance, race, religion, or sexual identity and orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment
+include:
+
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery and unwelcome sexual attention or
+advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic
+address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+professional setting
+
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable
+behavior and are expected to take appropriate and fair corrective action in
+response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or
+reject comments, commits, code, wiki edits, issues, and other contributions
+that are not aligned to this Code of Conduct, or to ban temporarily or
+permanently any contributor for other behaviors that they deem inappropriate,
+threatening, offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies within all project spaces, and it also applies when
+an individual is representing the project or its community in public spaces.
+Examples of representing a project or community include using an official
+project e-mail address, posting via an official social media account, or acting
+as an appointed representative at an online or offline event. Representation of
+a project may be further defined and clarified by project maintainers.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported by contacting the project team at . All
+complaints will be reviewed and investigated and will result in a response that
+is deemed necessary and appropriate to the circumstances. The project team is
+obligated to maintain confidentiality with regard to the reporter of an incident.
+Further details of specific enforcement policies may be posted separately.
+
+Project maintainers who do not follow or enforce the Code of Conduct in good
+faith may face temporary or permanent repercussions as determined by other
+members of the project's leadership.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
+available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
+
+[homepage]: https://www.contributor-covenant.org
+
+For answers to common questions about this code of conduct, see
+https://www.contributor-covenant.org/faq
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 00000000000..9c52182e85a
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,367 @@
+# Contributing to tutorials
+
+We want to make contributing to this project as easy and transparent as
+possible. This file covers information on flagging issues, contributing
+updates to existing tutorials--and also submitting new tutorials.
+
+NOTE: This guide assumes that you have your GitHub account properly
+configured, such as having an SSH key. If this is your first time
+contributing on GitHub, see the [GitHub
+Documentation](https://docs.github.com/en/get-started/quickstart/contributing-to-projects)
+on contributing to projects.
+
+
+# Issues
+
+We use [GitHub Issues](https://github.com/pytorch/tutorials/issues) to
+track public bugs. Please ensure your description is clear and has
+sufficient instructions to be able to reproduce the issue.
+
+
+# Security Bugs
+
+Facebook has a [bounty program](https://www.facebook.com/whitehat/) for
+the safe disclosure of security bugs. For these types of issues, please
+go through the process outlined on that page and do not file a public
+issue.
+
+# Contributor License Agreement ("CLA")
+
+In order to accept a pull request, you need to submit a CLA. You only
+need to do this once and you will be able to work on all of Facebook's
+open source projects, not just PyTorch.
+
+Complete your CLA here:
+
+
+# License
+
+By contributing to the tutorials, you agree that your contributions will
+be licensed as described in the `LICENSE` file in the root directory of
+this source tree.
+
+
+# Updates to existing tutorials
+
+We welcome your pull requests (PR) for updates and fixes.
+
+1. If you haven't already, complete the Contributor License Agreement
+ ("CLA").
+1. Fork the repo and create a branch from
+ [`main`](https://github.com/pytorch/tutorials).
+1. Test your code.
+1. Lint your code with a tool such as
+ [Pylint](https://pylint.pycqa.org/en/latest/).
+1. Submit your PR for review.
+
+
+# New Tutorials
+
+There are three types of tutorial content that we host on
+[`pytorch.org/tutorials`](https://github.com/pytorch/tutorials):
+
+* **Interactive tutorials** are authored and submitted as Python files.
+ The build system converts these into Jupyter notebooks and HTML. The
+ code in these tutorials is run every time they are built. To keep
+ these tutorials up and running all their package dependencies need to
+ be resolved--which makes it more challenging to maintain this type of
+ tutorial.
+
+* **Non-interactive tutorials** are authored and submitted as
+ reStructuredText files. The build system only converts them into HTML;
+ the code in them does not run on build. These tutorials are easier to
+ create and maintain but they do not provide an interactive experience.
+
+
+* **Recipes** are tutorials that provide bite-sized, actionable
+ examples of how to use specific features, which differentiates them
+ from full-length tutorials. Recipes can be interactive or
+ non-interactive.
+
+
+# Managing data that is used by your tutorial
+
+Your tutorial might depend on external data, such as pre-trained models,
+training data, or test data. We recommend storing this data in a
+commonly-used storage service, such as Amazon S3, and instructing your
+users to download the data at the beginning of your tutorial.
+
+To download your data add a function to the [download.py](https://github.com/pytorch/tutorials/blob/main/.jenkins/download_data.py)
+script. Follow the same pattern as other download functions.
+Please do not add download logic to `Makefile` as it will incur download overhead for all CI shards.
+
+# Python packages used by your tutorial
+
+If your tutorial has dependencies that are not already defined in
+`requirements.txt`, you should add them to that file. We recommend that
+you use only mature, well-supported packages in your tutorial. Packages
+that are obscure or not well-maintained may break as a result of, for
+example, updates to Python or PyTorch or other packages. If your
+tutorial fails to build in our Continuous Integration (CI) system, we
+might contact you in order to resolve the issue.
+
+
+# Deprecation of tutorials
+
+Under some circumstances, we might deprecate--and subsequently
+archive--a tutorial removing it from the site. For example, if the
+tutorial breaks in our CI and we are not able to resolve the issue and
+are also not able to reach you, we might archive the tutorial. In these
+situations, resolving the breaking issue would normally be sufficient to
+make the tutorial available again.
+
+Another situation in which a tutorial might be deprecated is if it
+consistently receives low ratings--or low usage--by the community. Again,
+if this occurs, we will attempt to contact you.
+
+If we identify, or suspect, that your tutorial--or a package that your
+tutorial uses--has a **security or privacy** issue, we will immediately
+take the tutorial off the site.
+
+
+# Guidance for authoring tutorials and recipes
+
+In this section, we describe the process for creating tutorials and
+recipes for Pytorch.
+
+The first step is to decide which type of tutorial you want to create,
+taking into account how much support you can provide to keep the
+tutorial up-to-date. Ideally, your tutorial should demonstrate PyTorch
+functionality that is not duplicated in other tutorials.
+
+As described earlier, tutorials are resources that provide a holistic
+end-to-end understanding of how to use PyTorch. Recipes are scoped
+examples of how to use specific features; the goal of a recipe is to
+teach readers how to easily leverage features of PyTorch for their
+needs. Tutorials and recipes are always _actionable_. If the material is
+purely informative, consider adding it to the API docs instead.
+
+View our current [full-length tutorials](https://pytorch.org/tutorials/).
+
+To create actionable tutorials, start by identifying _learning
+objectives_, which are the end goals. Working backwards from these
+objectives will help to eliminate extraneous information.
+
+
+## Learning objectives ##
+
+To create the learning objectives, focus on what the user will
+implement. Set expectations by explicitly stating what the recipe will
+cover and what users will implement by the end. Here are some examples:
+
+- Create a custom dataset
+- Integrate a dataset using a library
+- Iterate over samples in the dataset
+- Apply a transform to the dataset
+
+
+## Voice and writing style ##
+
+Write for a global audience with an instructive and directive voice.
+
+- PyTorch has a global audience; use clear, easy to understand
+ language. Avoid idioms or other figures of speech.
+- To keep your instructions concise, use
+ [active voice](https://writing.wisc.edu/handbook/style/ccs_activevoice/) as much as possible.
+- For a short guide on the essentials of writing style,
+ [The Elements of Style](https://www.gutenberg.org/files/37134/37134-h/37134-h.htm)
+ is invaluable.
+- For extensive guidance on technical-writing style, the Google developer documentation
+ [google style](https://developers.google.com/style)
+ is a great resource.
+- Think of the process as similar to creating a (really practical)
+ Medium post.
+
+
+## Structure ##
+
+We recommend that tutorials use the following structure which guides users through the learning experience and provides appropriate context:
+
+1. Introduction
+1. Motivation: Why is this topic important?
+1. Link to relevant research papers or other background material.
+1. Learning objectives: Clearly state what the tutorial covers and what
+ users will implement by the end. For example: Provide a summary of
+ how the Integrated Gradients feature works and how to implement it
+ using Captum. The
+ [TensorBoard](https://pytorch.org/tutorials/intermediate/tensorboard_tutorial.html)
+ tutorial provides a good example of how to specify learning
+ objectives.
+1. Setup and requirements. Call out any required setup or data
+ downloads.
+1. Step-by-step instructions. Ideally, the steps in the tutorial should
+ map back to the learning objectives. Consider adding comments in the
+ code that correspond to these steps and that help to clarify what
+ each section of the code is doing.
+1. Link to relevant [PyTorch
+ documentation](https://pytorch.org/docs/stable/index.html). This
+ helps readers have context for the tutorial source code and better
+ understand how and why it implements the technique you’re
+ demonstrating.
+1. Recap/Conclusion: Summarize the steps and concepts covered. Highlight
+ key takeaways.
+1. (Optional) Additional practice exercises for users to test their
+ knowledge. An example is [NLP From Scratch: Generating Names with a
+ Character-Level RNN tutorial](https://pytorch.org/tutorials/intermediate/char_rnn_generation_tutorial.html#exercises).
+1. Additional resources for more learning, such as documentation, other
+ tutorials, or relevant research.
+
+
+## Example Tutorials ##
+
+The following tutorials do a good job of demonstrating the ideas
+described in the preceding sections:
+
+- [Chatbot Tutorial](https://pytorch.org/tutorials/beginner/chatbot_tutorial.html)
+- [Tensorboard Tutorial](https://pytorch.org/tutorials/intermediate/tensorboard_tutorial.html)
+- [NLP From Scratch: Generating Names with a Character-Level RNN
+Tutorial](https://pytorch.org/tutorials/intermediate/char_rnn_generation_tutorial.html)
+
+If you are creating a recipe, [this is a good
+example.](https://github.com/pytorch/tutorials/blob/main/recipes_source/recipes/what_is_state_dict.py)
+
+
+# Submission Process #
+
+Submit your tutorial as either a Python (`.py`) file or a
+reStructuredText (`.rst`) file. For Python files, the filename for your
+tutorial should end in "`_tutorial.py`"; for example,
+"`cool_pytorch_feature_tutorial.py`".
+
+Do not submit a Jupyter notebook. If you develop your tutorial in
+Jupyter, you'll need to convert it to Python. This
+[script](https://gist.github.com/chsasank/7218ca16f8d022e02a9c0deb94a310fe)
+is one option for performing this conversion.
+
+For Python files, our CI system runs your code during each build.
+
+
+## Add Your Tutorial Code ##
+
+1. [Fork and
+ clone](https://docs.github.com/en/get-started/quickstart/contributing-to-projects)
+ the repo:
+ [https://github.com/pytorch/tutorials](https://github.com/pytorch/tutorials)
+
+1. Put the tutorial in one of the
+ [`beginner_source`](https://github.com/pytorch/tutorials/tree/main/beginner_source),
+ [`intermediate_source`](https://github.com/pytorch/tutorials/tree/main/intermediate_source),
+ [`advanced_source`](https://github.com/pytorch/tutorials/tree/main/advanced_source)
+ based on the technical level of the content. For recipes, put the
+ recipe in
+ [`recipes_source`](https://github.com/pytorch/tutorials/tree/main/recipes_source).
+ In addition, for recipes, add the recipe in the recipes
+ [README.txt](https://github.com/pytorch/tutorials/blob/main/recipes_source/recipes/README.txt)
+ file.
+
+
+## Include Your Tutorial in `index.rst`#
+
+In order for your tutorial to appear on the website, and through tag
+search, you need to include it in `index.rst`, or for recipes, in
+`recipes_index.rst`.
+
+1. Open the relevant file
+ [`index.rst`](https://github.com/pytorch/tutorials/blob/main/index.rst)
+ or
+ [`recipes_index.rst`](https://github.com/pytorch/tutorials/blob/main/recipes_index.rst)
+1. Add a _card_ in reStructuredText format similar to the following:
+
+```
+.. customcarditem::
+ :header: Learn the Basics # Tutorial title
+ :card_description: A step-by-step guide to building a complete ML workflow with PyTorch. # Short description
+ :image: _static/img/thumbnails/cropped/60-min-blitz.png # Image that appears with the card
+ :link: beginner/basics/intro.html
+ :tags: Getting-Started
+```
+
+
+### Link ###
+
+The `link` should be the path to your tutorial in the source tree. For
+example, if the tutorial is in `beginner_source`, the link will be
+`beginner_source/rest/of/the/path.html`
+
+
+### Tags ###
+
+Choose tags from the existing tags in the file. Reach out to a project
+maintainer to create a new tag. The list of tags should not have any
+white space between the words. Multi-word tags, such as “Getting
+Started”, should be hyphenated: Getting-Started. Otherwise, the tutorial
+might fail to build, and the cards will not display properly.
+
+
+### Image ###
+
+Add a thumbnail to the
+[`_static/img/thumbnails/cropped`](https://github.com/pytorch/tutorials/tree/main/_static/img/thumbnails/cropped)
+directory. Images that render the best are square--that is, they have
+equal `x` and `y` dimensions--and also have high resolution. [Here is an
+example](https://github.com/pytorch/tutorials/blob/main/_static/img/thumbnails/cropped/loading-data.PNG).
+
+## `toctree` ##
+
+1. Add your tutorial under the corresponding toctree (also in
+ `index.rst`). For example, if you are adding a tutorial that
+ demonstrates the PyTorch ability to process images or video, add it
+ under `Image and Video`:
+
+```
+.. toctree::
+ :maxdepth: 2
+ :includehidden:
+ :hidden:
+ :caption: Image and Video
+
+ intermediate/torchvision_tutorial
+ beginner/my-new-tutorial
+```
+
+
+## Test Your Tutorial Locally ##
+
+The following command builds an HTML version of the tutorial website.
+
+```
+make html-noplot
+```
+
+This command does not run your tutorial code. To build the tutorial in a
+way that executes the code, use `make docs`. However, unless you have a
+GPU-powered machine and a proper PyTorch CUDA setup, running this `make`
+command locally won't work. The continuous integration (CI) system will
+test your tutorial when you submit your PR.
+
+
+## Submit the PR ##
+
+NOTE: Please do not use [ghstack](https://github.com/ezyang/ghstack). We
+do not support ghstack in the [`pytorch/tutorials`](https://github.com/pytorch/tutorials) repo.
+
+Submit the changes as a PR to the main branch of
+[`pytorch/tutorials`](https://github.com/pytorch/tutorials).
+
+1. Add your changes, commit, and push:
+
+ ```
+ git add -A
+ git commit -m "Add "
+ git push --set-upstream mybranch
+ ```
+
+1. Submit the PR and tag individuals on the PyTorch project who can review
+ your PR.
+1. Address all feedback comments from your reviewers.
+1. Make sure all CI checks are passing.
+
+Once you submit your PR, you can see a generated Netlify preview of your
+build. You can see an example Netlify preview at the following URL:
+
+>
+
+
+## Do not merge the PR yourself ##
+
+Please **DO NOT MERGE** your own PR; the tutorial won't be published. In order to avoid potential build breaks with the tutorials site, only certain maintainers can authorize publishing.
diff --git a/Creating Extensions using FFI.md b/Creating Extensions using FFI.md
deleted file mode 100644
index e5a08e248df..00000000000
--- a/Creating Extensions using FFI.md
+++ /dev/null
@@ -1,115 +0,0 @@
-# Custom C extensions for pytorch
-
-## Step 1. prepare your C code
-
-First, you have to write your C functions.
-
-Below you can find an example implementation of forward and backward functions of a module that adds its both inputs.
-
-In your `.c` files you can include TH using an `#include ` directive, and THC using `#include `.
-
-ffi utils will make sure a compiler can find them during the build.
-
-```C
-/* src/my_lib.c */
-#include
-
-int my_lib_add_forward(THFloatTensor *input1, THFloatTensor *input2,
-THFloatTensor *output)
-{
- if (!THFloatTensor_isSameSizeAs(input1, input2))
- return 0;
- THFloatTensor_resizeAs(output, input1);
- THFloatTensor_add(output, input1, input2);
- return 1;
-}
-
-int my_lib_add_backward(THFloatTensor *grad_output, THFloatTensor *grad_input)
-{
- THFloatTensor_resizeAs(grad_input, grad_output);
- THFloatTensor_fill(grad_input, 1);
- return 1;
-}
-```
-
-There are no constraints on the code, except that you will have to prepare a single header,
-which will list all functions want to call from python.
-
-It will be used by the ffi utils to generate appropriate wrappers.
-
-```C
-/* src/my_lib.h */
-int my_lib_add_forward(THFloatTensor *input1, THFloatTensor *input2, THFloatTensor *output);
-int my_lib_add_backward(THFloatTensor *grad_output, THFloatTensor *grad_input);
-```
-
-Now, you'll need a super short file, that will build your custom extension:
-
-```python
-# build.py
-from torch.utils.ffi import create_extension
-ffi = create_extension(
-name='_ext.my_lib',
-headers='src/my_lib.h',
-sources=['src/my_lib.c'],
-with_cuda=False
-)
-ffi.build()
-```
-
-## Step 2: Include it in your Python code
-
-After you run it, pytorch will create an `_ext` directory and put `my_lib` inside.
-
-Package name can have an arbitrary number of packages preceding the final module name (including none).
-If the build succeeded you can import your extension just like a regular python file.
-
-```python
-# functions/add.py
-import torch
-from torch.autograd import Function
-from _ext import my_lib
-
-
-class MyAddFunction(Function):
- def forward(self, input1, input2):
- output = torch.FloatTensor()
- my_lib.my_lib_add_forward(input1, input2, output)
- return output
-
- def backward(self, grad_output):
- grad_input = torch.FloatTensor()
- my_lib.my_lib_add_backward(grad_output, grad_input)
- return grad_input
-```
-
-```python
-# modules/add.py
-from torch.nn import Module
-from functions.add import MyAddFunction
-
-class MyAddModule(Module):
- def forward(self, input1, input2):
- return MyAddFunction()(input1, input2)
-```
-
-```python
-# main.py
-import torch.nn as nn
-from torch.autograd import Variable
-from modules.add import MyAddModule
-
-class MyNetwork(nn.Module):
- def __init__(self):
- super(MyNetwork, self).__init__(
- add=MyAddModule(),
- )
-
- def forward(self, input1, input2):
- return self.add(input1, input2)
-
-model = MyNetwork()
-input1, input2 = Variable(torch.randn(5, 5)), Variable(torch.randn(5, 5))
-print(model(input1, input2))
-print(input1 + input2)
-```
diff --git a/Creating extensions using numpy and scipy.ipynb b/Creating extensions using numpy and scipy.ipynb
deleted file mode 100644
index 8112cd1360e..00000000000
--- a/Creating extensions using numpy and scipy.ipynb
+++ /dev/null
@@ -1,259 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Creating extensions using numpy and scipy\n",
- "\n",
- "In this notebook, we shall go through two tasks:\n",
- "\n",
- "1. Create a neural network layer with no parameters. \n",
- " - This calls into **numpy** as part of it's implementation\n",
- "2. Create a neural network layer that has learnable weights\n",
- " - This calls into **SciPy** as part of it's implementation"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "import torch\n",
- "from torch.autograd import Function\n",
- "from torch.autograd import Variable"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Parameter-less example\n",
- "\n",
- "This layer doesn't particularly do anything useful or mathematically correct.\n",
- "\n",
- "It is aptly named BadFFTFunction\n",
- "\n",
- "**Layer Implementation**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "from numpy.fft import rfft2, irfft2\n",
- "class BadFFTFunction(Function):\n",
- " \n",
- " def forward(self, input):\n",
- " numpy_input = input.numpy()\n",
- " result = abs(rfft2(numpy_input))\n",
- " return torch.FloatTensor(result)\n",
- " \n",
- " def backward(self, grad_output):\n",
- " numpy_go = grad_output.numpy()\n",
- " result = irfft2(numpy_go)\n",
- " return torch.FloatTensor(result)\n",
- "\n",
- "# since this layer does not have any parameters, we can\n",
- "# simply declare this as a function, rather than as an nn.Module class\n",
- "def incorrect_fft(input):\n",
- " return BadFFTFunction()(input)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Example usage of the created layer:**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- " 6.9997 11.0343 9.7395 6.0724 6.0526\n",
- " 7.0250 11.4841 7.1110 5.6337 8.6441\n",
- " 7.8062 10.9281 9.8279 23.4972 7.4842\n",
- " 6.4962 4.5987 0.7936 3.9360 4.9595\n",
- " 9.7913 10.3732 1.6261 2.0874 14.5295\n",
- " 6.4962 5.7111 1.9793 8.8037 4.9595\n",
- " 7.8062 8.7752 6.4442 14.1250 7.4842\n",
- " 7.0250 5.4642 1.7983 4.4346 8.6441\n",
- "[torch.FloatTensor of size 8x5]\n",
- "\n",
- "Variable containing:\n",
- "-0.0129 0.0330 0.0036 -0.0737 0.2354 -0.0737 0.0036 0.0330\n",
- " 0.0542 0.0986 -0.0382 -0.1137 -0.0944 -0.0973 -0.0172 -0.0021\n",
- "-0.1538 -0.1444 0.0356 0.1590 0.0588 -0.0188 -0.0611 0.0346\n",
- " 0.1511 0.0370 -0.2513 -0.1518 0.1513 -0.2312 -0.0896 -0.1450\n",
- "-0.1668 -0.0814 0.1954 0.1405 0.2191 0.1405 0.1954 -0.0814\n",
- " 0.1511 -0.1450 -0.0896 -0.2312 0.1513 -0.1518 -0.2513 0.0370\n",
- "-0.1538 0.0346 -0.0611 -0.0188 0.0588 0.1590 0.0356 -0.1444\n",
- " 0.0542 -0.0021 -0.0172 -0.0973 -0.0944 -0.1137 -0.0382 0.0986\n",
- "[torch.FloatTensor of size 8x8]\n",
- "\n"
- ]
- }
- ],
- "source": [
- "input = Variable(torch.randn(8, 8), requires_grad=True)\n",
- "result = incorrect_fft(input)\n",
- "print(result.data)\n",
- "result.backward(torch.randn(result.size()))\n",
- "print(input.grad)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Parametrized example\n",
- "\n",
- "This implements a layer with learnable weights.\n",
- "\n",
- "It implements the Cross-correlation with a learnable kernel.\n",
- "\n",
- "In deep learning literature, it's confusingly referred to as Convolution.\n",
- "\n",
- "The backward computes the gradients wrt the input and gradients wrt the filter.\n",
- "\n",
- "**Implementation:**\n",
- "\n",
- "*Please Note that the implementation serves as an illustration, and we did not verify it's correctness*"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "from scipy.signal import convolve2d, correlate2d\n",
- "from torch.nn.modules.module import Module\n",
- "from torch.nn.parameter import Parameter\n",
- "\n",
- "class ScipyConv2dFunction(Function):\n",
- " \n",
- " def forward(self, input, filter):\n",
- " result = correlate2d(input.numpy(), filter.numpy(), mode='valid')\n",
- " self.save_for_backward(input, filter)\n",
- " return torch.FloatTensor(result)\n",
- " \n",
- " def backward(self, grad_output):\n",
- " input, filter = self.saved_tensors\n",
- " grad_input = convolve2d(grad_output.numpy(), filter.t().numpy(), mode='full')\n",
- " grad_filter = convolve2d(input.numpy(), grad_output.numpy(), mode='valid')\n",
- " return torch.FloatTensor(grad_input), torch.FloatTensor(grad_filter)\n",
- "\n",
- "\n",
- "class ScipyConv2d(Module):\n",
- " \n",
- " def __init__(self, kh, kw):\n",
- " super(ScipyConv2d, self).__init__()\n",
- " self.filter=Parameter(torch.randn(kh, kw))\n",
- " \n",
- " def forward(self, input):\n",
- " return ScipyConv2dFunction()(input, self.filter)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Example usage: **"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[Parameter containing:\n",
- " 0.0460 0.5052 0.9281\n",
- " 0.8355 1.2642 -0.1283\n",
- " 1.7027 -0.3146 -0.6927\n",
- "[torch.FloatTensor of size 3x3]\n",
- "]\n",
- "Variable containing:\n",
- " 1.4619 -4.0543 0.4391 -0.5423 -4.3719 3.9728 -0.4084 -2.8224\n",
- "-3.6799 -3.9278 4.9932 -3.8952 3.0663 1.6303 2.9775 1.1806\n",
- "-3.1694 2.1434 0.4432 1.6941 1.9344 -0.1196 1.1259 4.3571\n",
- "-0.7934 -1.4610 2.2360 0.6406 0.3729 1.9140 0.2427 0.4298\n",
- "-2.2961 -0.4189 5.6658 0.8090 -1.3030 2.2934 0.7164 -0.0272\n",
- " 1.0649 1.0400 -1.3774 -0.2026 -0.9841 1.7192 3.0843 3.4241\n",
- " 3.2743 -1.8780 -2.3084 0.8508 1.1622 0.6060 2.5559 1.0228\n",
- "-2.3282 -1.1790 -2.4604 -1.9252 -1.3962 1.1054 3.6035 3.1302\n",
- "[torch.FloatTensor of size 8x8]\n",
- "\n",
- "Variable containing:\n",
- " 0.0427 0.7780 1.7383 1.8333 3.8198 0.1135 -3.5576 -4.3994 -0.4354 -0.6021\n",
- " 0.4661 1.2470 2.1080 6.3960 0.6894 -4.5144 -3.2005 -0.2762 0.3508 1.7803\n",
- " 0.8492 0.9083 4.1836 0.6133 -3.4092 -1.8541 0.2254 3.6970 1.0382 0.5031\n",
- " 0.0919 1.7864 1.5422 0.2942 2.0176 1.0741 0.8390 2.6984 2.4786 0.2636\n",
- " 0.2600 0.5248 2.3759 2.1921 -3.4520 -3.2025 2.6008 -0.7395 0.3200 0.0964\n",
- " 0.1632 1.9750 2.5973 -2.0378 -5.2213 1.2097 1.3411 1.6995 -1.4448 -2.6965\n",
- " 0.5332 0.8034 -3.0446 -6.2269 -3.4281 -0.5354 -0.4278 -0.7310 -1.1542 0.7947\n",
- " 0.1243 -1.0476 -2.9011 -5.9247 -2.5209 -3.1030 -4.4343 -2.7956 1.4640 0.0090\n",
- "-0.9033 -0.4323 -2.5873 -1.8884 -1.4657 -1.4747 -0.0032 1.4012 -0.7892 -0.1049\n",
- " 0.0739 -0.7349 -0.3925 -0.9291 -1.1198 0.5321 1.9748 0.1242 -0.4062 0.3108\n",
- "[torch.FloatTensor of size 10x10]\n",
- "\n"
- ]
- }
- ],
- "source": [
- "module = ScipyConv2d(3, 3)\n",
- "print(list(module.parameters()))\n",
- "input = Variable(torch.randn(10, 10), requires_grad=True)\n",
- "output = module(input)\n",
- "print(output)\n",
- "output.backward(torch.randn(8, 8))\n",
- "print(input.grad)"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.5.2"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 1
-}
diff --git a/Deep Learning with PyTorch.ipynb b/Deep Learning with PyTorch.ipynb
deleted file mode 100644
index 28cd2135697..00000000000
--- a/Deep Learning with PyTorch.ipynb
+++ /dev/null
@@ -1,1288 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Deep Learning with PyTorch: A 60-minute Blitz\n",
- "\n",
- "Goal of this tutorial:\n",
- "\n",
- "- Understand PyTorch's Tensor library and neural networks at a high level.\n",
- "- Train a small neural network to classify images\n",
- "\n",
- "*This tutorial assumes that you have a basic familiarity of numpy*\n",
- "\n",
- "\n",
- "**Note:** Make sure you have the [torch](https://github.com/pytorch/pytorch) and [torchvision](https://github.com/pytorch/vision) packages installed.\n",
- "\n",
- "\n",
- "### What is PyTorch?\n",
- "\n",
- "It's a Python based scientific computing package targeted at two sets of audiences:\n",
- "\n",
- "- A replacement for numpy to use the power of GPUs\n",
- "- a deep learning research platform that provides maximum flexibility and speed\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Getting Started\n",
- "\n",
- "#### Tensors\n",
- "Tensors are similar to numpy's ndarrays, with the addition being that Tensors can also be used on a GPU to accelerate computing."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "from __future__ import print_function\n",
- "import torch"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "x = torch.Tensor(5, 3) # construct a 5x3 matrix, uninitialized"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "x = torch.rand(5, 3) # construct a randomly initialized matrix"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "x"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "x.size()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "*NOTE: `torch.Size` is in fact a tuple, so it supports the same operations*"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "y = torch.rand(5, 3)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "# addition: syntax 1\n",
- "x + y"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "# addition: syntax 2\n",
- "torch.add(x, y)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "# addition: giving an output tensor\n",
- "result = torch.Tensor(5, 3)\n",
- "torch.add(x, y, out=result)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "# addition: in-place\n",
- "y.add_(x) # adds x to y"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "> **Note: ** Any operation that mutates a tensor in-place is post-fixed with an `_`\n",
- "> \n",
- "> For example: `x.copy_(y)`, `x.t_()`, will change `x`."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "# standard numpy-like indexing with all bells and whistles\n",
- "x[:,1]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Read later:\n",
- "\n",
- "100+ Tensor operations, including transposing, indexing, slicing, \n",
- "mathematical operations, linear algebra, random numbers, etc.\n",
- "\n",
- "http://pytorch.org/docs/torch.html"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Numpy Bridge\n",
- "\n",
- "Converting a torch Tensor to a numpy array and vice versa is a breeze.\n",
- "\n",
- "The torch Tensor and numpy array will share their underlying memory locations, and changing one will change the other.\n",
- "\n",
- "#### Converting torch Tensor to numpy Array"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "a = torch.ones(5)\n",
- "a"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "b = a.numpy()\n",
- "b"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "a.add_(1)\n",
- "print(a)\n",
- "print(b) # see how the numpy array changed in value"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Converting numpy Array to torch Tensor"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "import numpy as np\n",
- "a = np.ones(5)\n",
- "b = torch.from_numpy(a)\n",
- "np.add(a, 1, out=a)\n",
- "print(a)\n",
- "print(b) # see how changing the np array changed the torch Tensor automatically"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "All the Tensors on the CPU except a CharTensor support converting to NumPy and back.\n",
- "\n",
- "### CUDA Tensors\n",
- "\n",
- "Tensors can be moved onto GPU using the `.cuda` function."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "# let us run this cell only if CUDA is available\n",
- "if torch.cuda.is_available():\n",
- " x = x.cuda()\n",
- " y = y.cuda()\n",
- " x + y"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "\n",
- "**Next: Neural Networks with PyTorch.**\n",
- "\n",
- "Central to all neural networks in PyTorch is the `autograd` package.\n",
- "Let's first briefly visit this, and we will then go to training our first neural network.\n",
- "\n",
- "## Autograd: automatic differentiation\n",
- "\n",
- "The `autograd` package provides automatic differentiation for all operations on Tensors. \n",
- "It is a define-by-run framework, which means that your backprop is defined by how your code is run, and that every single iteration can be different. \n",
- "\n",
- "Let us see this in more simple terms with some examples.\n",
- "\n",
- "`autograd.Variable` is the central class of the package. \n",
- "It wraps a Tensor, and supports nearly all of operations defined on it. Once you finish your computation you can call `.backward()` and have all the gradients computed automatically.\n",
- "\n",
- "You can access the raw tensor through the `.data` attribute, while the gradient w.r.t. this variable is accumulated into `.grad`.\n",
- "\n",
- "\n",
- "\n",
- "There's one more class which is very important for autograd implementation - a `Function`. \n",
- "\n",
- "`Variable` and `Function` are interconnected and build up an acyclic graph, that encodes a complete history of computation. Each variable has a `.creator` attribute that references a `Function` that has created the `Variable` (except for Variables created by the user - their `creator is None`).\n",
- "\n",
- "If you want to compute the derivatives, you can call `.backward()` on a `Variable`. \n",
- "If `Variable` is a scalar (i.e. it holds a one element data), you don't need to specify any arguments to `backward()`, however if it has more elements, you need to specify a `grad_output` argument that is a tensor of matching shape.\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "from torch.autograd import Variable"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "x = Variable(torch.ones(2, 2), requires_grad = True)\n",
- "x"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "y = x + 2\n",
- "y"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "y.creator\n",
- "# y was created as a result of an operation, \n",
- "# so it has a creator"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "z = y * y * 3\n",
- "z"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "out = z.mean()\n",
- "out"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "# let's backprop now\n",
- "out.backward()\n",
- "\n",
- "# out.backward() is equivalent to doing out.backward(torch.Tensor([1.0]))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "# print gradients d(out)/dx\n",
- "x.grad"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "You should have got a matrix of `4.5`.\n",
- "Let's call the `out` *Variable* \"$o$\". \n",
- "We have that $o = \\frac{1}{4}\\sum_i z_i$, $z_i = 3(x_i+2)^2$ and $z_i\\bigr\\rvert_{x_i=1} = 27$. Therefore, $\\frac{\\partial o}{\\partial x_i} = \\frac{3}{2}(x_i+2)$, hence $\\frac{\\partial o}{\\partial x_i}\\bigr\\rvert_{x_i=1} = \\frac{9}{2} = 4.5$."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "> **You can do many crazy things with autograd:**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "x = torch.randn(3)\n",
- "x = Variable(x, requires_grad = True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "y = x * 2\n",
- "while y.data.norm() < 1000:\n",
- " y = y * 2"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "y"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "gradients = torch.FloatTensor([0.1, 1.0, 0.0001])\n",
- "y.backward(gradients)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "x.grad"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "##### Read Later:\n",
- "> You can read more documentation on `Variable` and `Function` here: [pytorch.org/docs/autograd.html](http://pytorch.org/docs/autograd.html)\n",
- "\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Neural Networks\n",
- "Neural networks can be constructed using the `torch.nn` package.\n",
- "\n",
- "Now that you had a glimpse of `autograd`, `nn` depends on `autograd` to define models and differentiate them.\n",
- "\n",
- "An `nn.Module` contains layers, and a method `forward(input)`that returns the `output`.\n",
- "\n",
- "For example, look at this network that classfies digit images:\n",
- "\n",
- "\n",
- "\n",
- "It is a simple feed-forward network.\n",
- "It takes the input, feeds it through several layers one after the other, and then finally gives the output.\n",
- "\n",
- "A typical training procedure for a neural network is as follows:\n",
- "- define the neural network that has some learnable parameters (or weights)\n",
- "- iterate over a dataset of inputs:\n",
- " - process input through network\n",
- " - compute the loss (how far is the output from being correct)\n",
- " - propagate gradients back into the network's parameters\n",
- " - update the weights of the network\n",
- " - typically using a simple update rule: `weight = weight - learning_rate * gradient`\n",
- " \n",
- "\n",
- "Let's define this network:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "import torch.nn as nn\n",
- "import torch.nn.functional as F\n",
- "\n",
- "class Net(nn.Module):\n",
- " def __init__(self):\n",
- " super(Net, self).__init__()\n",
- " self.conv1 = nn.Conv2d(1, 6, 5) # 1 input image channel, 6 output channels, 5x5 square convolution kernel\n",
- " self.conv2 = nn.Conv2d(6, 16, 5)\n",
- " self.fc1 = nn.Linear(16*5*5, 120) # an affine operation: y = Wx + b\n",
- " self.fc2 = nn.Linear(120, 84)\n",
- " self.fc3 = nn.Linear(84, 10)\n",
- "\n",
- " def forward(self, x):\n",
- " x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) # Max pooling over a (2, 2) window\n",
- " x = F.max_pool2d(F.relu(self.conv2(x)), 2) # If the size is a square you can only specify a single number\n",
- " x = x.view(-1, self.num_flat_features(x))\n",
- " x = F.relu(self.fc1(x))\n",
- " x = F.relu(self.fc2(x))\n",
- " x = self.fc3(x)\n",
- " return x\n",
- " \n",
- " def num_flat_features(self, x):\n",
- " size = x.size()[1:] # all dimensions except the batch dimension\n",
- " num_features = 1\n",
- " for s in size:\n",
- " num_features *= s\n",
- " return num_features\n",
- "\n",
- "net = Net()\n",
- "net"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "You just have to define the `forward` function, and the `backward` function (where gradients are computed) is automatically defined for you using `autograd`.\n",
- "\n",
- "You can use any of the Tensor operations in the `forward` function.\n",
- "\n",
- "The learnable parameters of a model are returned by `net.parameters()`"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "params = list(net.parameters())\n",
- "print(len(params))\n",
- "print(params[0].size()) # conv1's .weight"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The input to the forward is an `autograd.Variable`, and so is the output."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "input = Variable(torch.randn(1, 1, 32, 32))\n",
- "out = net(input)\n",
- "out"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "net.zero_grad() # zeroes the gradient buffers of all parameters\n",
- "out.backward(torch.randn(1, 10)) # backprops with random gradients"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "> #### NOTE: `torch.nn` only supports mini-batches\n",
- "The entire `torch.nn` package only supports inputs that are a mini-batch of samples, and not a single sample. \n",
- "For example, `nn.Conv2d` will take in a 4D Tensor of `nSamples x nChannels x Height x Width`.\n",
- "\n",
- "> *If you have a single sample, just use `input.unsqueeze(0)` to add a fake batch dimension.*"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Recap of all the classes you've seen so far:\n",
- "\n",
- "* `torch.Tensor` - A **multi-dimensional array**.\n",
- "* `autograd.Variable` - **Wraps a Tensor and records the history of operations** applied to it. Has the same API as a `Tensor`, with some additions like `backward()`. Also **holds the gradient** w.r.t. the tensor.\n",
- "* `nn.Module` - Neural network module. **Convenient way of encapsulating parameters**, with helpers for moving them to GPU, exporting, loading, etc.\n",
- "* `nn.Parameter` - A kind of Variable, that is **automatically registered as a parameter when assigned as an attribute to a `Module`**.\n",
- "* `autograd.Function` - Implements **forward and backward definitions of an autograd operation**. Every `Variable` operation, creates at least a single `Function` node, that connects to functions that created a `Variable` and **encodes its history**.\n",
- "\n",
- "##### At this point, we covered:\n",
- "- Defining a neural network\n",
- "- Processing inputs and calling backward.\n",
- "\n",
- "##### Still Left:\n",
- "- Computing the loss\n",
- "- Updating the weights of the network\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "collapsed": true
- },
- "source": [
- "A loss function takes the (output, target) pair of inputs, and computes a value that estimates how far away the output is from the target.\n",
- "\n",
- "There are [several different loss functions under the nn package](http://pytorch.org/docs/nn.html#loss-functions).\n",
- "\n",
- "A simple loss is: `nn.MSELoss` which computes the mean-squared error between the input and the target.\n",
- "\n",
- "For example:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "output = net(input)\n",
- "target = Variable(torch.range(1, 10)) # a dummy target, for example\n",
- "criterion = nn.MSELoss()\n",
- "loss = criterion(output, target)\n",
- "loss"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Now, if you follow `loss` in the backward direction, using it's `.creator` attribute, you will see a graph of computations that looks like this:\n",
- "\n",
- "```\n",
- "input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d \n",
- " -> view -> linear -> relu -> linear -> relu -> linear \n",
- " -> MSELoss\n",
- " -> loss\n",
- "```\n",
- "\n",
- "So, when we call `loss.backward()`, the whole graph is differentiated w.r.t. the loss, and all Variables in the graph will have their `.grad` Variable accumulated with the gradient.\n",
- " "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "# For illustration, let us follow a few steps backward\n",
- "print(loss.creator) # MSELoss\n",
- "print(loss.creator.previous_functions[0][0]) # Linear\n",
- "print(loss.creator.previous_functions[0][0].previous_functions[0][0]) # ReLU"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "# now we shall call loss.backward(), and have a look at conv1's bias gradients before and after the backward.\n",
- "net.zero_grad() # zeroes the gradient buffers of all parameters\n",
- "print('conv1.bias.grad before backward')\n",
- "print(net.conv1.bias.grad)\n",
- "loss.backward()\n",
- "print('conv1.bias.grad after backward')\n",
- "print(net.conv1.bias.grad)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Now, we have seen how to use loss functions.\n",
- "\n",
- "##### Read Later:\n",
- "\n",
- "> The neural network package contains various modules and loss functions that form the building blocks of deep neural networks. A full list with documentation is here: http://pytorch.org/docs/nn.html\n",
- "\n",
- "\n",
- "**The only thing left to learn is:**\n",
- "- updating the weights of the network\n",
- "\n",
- "The simplest update rule used in practice is the Stochastic Gradient Descent (SGD):\n",
- "> `weight = weight - learning_rate * gradient`\n",
- "\n",
- "We can implement this using simple python code:\n",
- "\n",
- "```python\n",
- "learning_rate = 0.01\n",
- "for f in net.parameters():\n",
- " f.data.sub_(f.grad.data * learning_rate)\n",
- "```\n",
- "\n",
- "However, as you use neural networks, you want to use various different update rules such as SGD, Nesterov-SGD, Adam, RMSProp, etc.\n",
- "\n",
- "To enable this, we built a small package: `torch.optim` that implements all these methods.\n",
- "Using it is very simple:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "import torch.optim as optim\n",
- "# create your optimizer\n",
- "optimizer = optim.SGD(net.parameters(), lr = 0.01)\n",
- "\n",
- "# in your training loop:\n",
- "optimizer.zero_grad() # zero the gradient buffers\n",
- "output = net(input)\n",
- "loss = criterion(output, target)\n",
- "loss.backward()\n",
- "optimizer.step() # Does the update"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "This is it.\n",
- "\n",
- "Now you might be thinking,\n",
- "\n",
- "### What about data?\n",
- "Generally, when you have to deal with image, text, audio or video data, you can use standard python packages that load data into a numpy array. Then you can convert this array into a `torch.*Tensor`.\n",
- "\n",
- "- For images, packages such as Pillow, OpenCV are useful. \n",
- "- For audio, packages such as scipy and librosa \n",
- "- For text, either raw Python or Cython based loading, or NLTK and SpaCy are useful.\n",
- "\n",
- "Specifically for `vision`, we have created a package called `torchvision`, that \n",
- "has data loaders for common datasets such as Imagenet, CIFAR10, MNIST, etc. and data transformers for images.\n",
- "This provides a huge convenience and avoids writing boilerplate code.\n",
- "\n",
- "For this tutorial, we will use the CIFAR10 dataset. \n",
- "It has the classes: 'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'.\n",
- "The images in CIFAR-10 are of size 3x32x32, i.e. 3-channel color images of 32x32 pixels in size.\n",
- "\n",
- "\n",
- "\n",
- "## Training an image classifier\n",
- "\n",
- "We will do the following steps in order:\n",
- "\n",
- "1. Load and normalizing the CIFAR10 training and test datasets using `torchvision`\n",
- "1. Define a Convolution Neural Network\n",
- "1. Define a loss function\n",
- "1. Train the network on the training data\n",
- "1. Test the network on the test data\n",
- "\n",
- "### 1. Loading and normalizing CIFAR10\n",
- "\n",
- "Using `torchvision`, it's extremely easy to load CIFAR10. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "import torchvision\n",
- "import torchvision.transforms as transforms"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "\n",
- "# The output of torchvision datasets are PILImage images of range [0, 1].\n",
- "# We transform them to Tensors of normalized range [-1, 1]\n",
- "transform=transforms.Compose([transforms.ToTensor(),\n",
- " transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),\n",
- " ])\n",
- "trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)\n",
- "trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, \n",
- " shuffle=True, num_workers=2)\n",
- "\n",
- "testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)\n",
- "testloader = torch.utils.data.DataLoader(testset, batch_size=4, \n",
- " shuffle=False, num_workers=2)\n",
- "classes = ('plane', 'car', 'bird', 'cat',\n",
- " 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "*Let us show some of the training images, for fun.*"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "# functions to show an image\n",
- "import matplotlib.pyplot as plt\n",
- "import numpy as np\n",
- "%matplotlib inline\n",
- "def imshow(img):\n",
- " img = img / 2 + 0.5 # unnormalize\n",
- " npimg = img.numpy()\n",
- " plt.imshow(np.transpose(npimg, (1,2,0)))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "# show some random training images\n",
- "dataiter = iter(trainloader)\n",
- "images, labels = dataiter.next()\n",
- "\n",
- "# print images\n",
- "imshow(torchvision.utils.make_grid(images))\n",
- "# print labels\n",
- "print(' '.join('%5s'%classes[labels[j]] for j in range(4)))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### 2. Define a Convolution Neural Network\n",
- "\n",
- "**Exercise:** Copy the neural network from the Neural Networks section above and modify it to take 3-channel images (instead of 1-channel images as it was defined).\n",
- "\n",
- "Hint: You only have to change the first layer, change the number 1 to be 3.\n",
- "\n",
- "\n",
- "```\n",
- ".\n",
- ".\n",
- ".\n",
- ".\n",
- ".\n",
- ".\n",
- ".\n",
- ".\n",
- ".\n",
- ".\n",
- ".\n",
- ".\n",
- ".\n",
- ".\n",
- ".\n",
- ".\n",
- ".\n",
- ".\n",
- ".\n",
- ".\n",
- ".\n",
- ".\n",
- ".\n",
- ".\n",
- ".\n",
- ".\n",
- ".\n",
- ".\n",
- ".\n",
- ".\n",
- "```\n",
- "\n",
- "**Solution:**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "class Net(nn.Module):\n",
- " def __init__(self):\n",
- " super(Net, self).__init__()\n",
- " self.conv1 = nn.Conv2d(3, 6, 5)\n",
- " self.pool = nn.MaxPool2d(2,2)\n",
- " self.conv2 = nn.Conv2d(6, 16, 5)\n",
- " self.fc1 = nn.Linear(16*5*5, 120)\n",
- " self.fc2 = nn.Linear(120, 84)\n",
- " self.fc3 = nn.Linear(84, 10)\n",
- "\n",
- " def forward(self, x):\n",
- " x = self.pool(F.relu(self.conv1(x)))\n",
- " x = self.pool(F.relu(self.conv2(x)))\n",
- " x = x.view(-1, 16*5*5)\n",
- " x = F.relu(self.fc1(x))\n",
- " x = F.relu(self.fc2(x))\n",
- " x = self.fc3(x)\n",
- " return x\n",
- "\n",
- "net = Net()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### 2. Define a Loss function and optimizer"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "criterion = nn.CrossEntropyLoss() # use a Classification Cross-Entropy loss\n",
- "optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### 3. Train the network\n",
- "\n",
- "This is when things start to get interesting.\n",
- "\n",
- "We simply have to loop over our data iterator, and feed the inputs to \n",
- "the network and optimize"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false,
- "scrolled": false
- },
- "outputs": [],
- "source": [
- "for epoch in range(2): # loop over the dataset multiple times\n",
- " \n",
- " running_loss = 0.0\n",
- " for i, data in enumerate(trainloader, 0):\n",
- " # get the inputs\n",
- " inputs, labels = data\n",
- " \n",
- " # wrap them in Variable\n",
- " inputs, labels = Variable(inputs), Variable(labels)\n",
- " \n",
- " # zero the parameter gradients\n",
- " optimizer.zero_grad()\n",
- " \n",
- " # forward + backward + optimize\n",
- " outputs = net(inputs)\n",
- " loss = criterion(outputs, labels)\n",
- " loss.backward() \n",
- " optimizer.step()\n",
- " \n",
- " # print statistics\n",
- " running_loss += loss.data[0]\n",
- " if i % 2000 == 1999: # print every 2000 mini-batches\n",
- " print('[%d, %5d] loss: %.3f' % (epoch+1, i+1, running_loss / 2000))\n",
- " running_loss = 0.0\n",
- "print('Finished Training')"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "We have trained the network for 2 passes over the training dataset. \n",
- "But we need to check if the network has learnt anything at all.\n",
- "\n",
- "We will check this by predicting the class label that the neural network outputs, and checking it against the ground-truth. If the prediction is correct, we add the sample to the list of correct predictions. \n",
- "\n",
- "Okay, first step. Let us display an image from the test set to get familiar."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "dataiter = iter(testloader)\n",
- "images, labels = dataiter.next()\n",
- "\n",
- "# print images\n",
- "imshow(torchvision.utils.make_grid(images))\n",
- "print('GroundTruth: ', ' '.join('%5s'%classes[labels[j]] for j in range(4)))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Okay, now let us see what the neural network thinks these examples above are:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "outputs = net(Variable(images))\n",
- "\n",
- "# the outputs are energies for the 10 classes. \n",
- "# Higher the energy for a class, the more the network \n",
- "# thinks that the image is of the particular class\n",
- "\n",
- "# So, let's get the index of the highest energy\n",
- "_, predicted = torch.max(outputs.data, 1)\n",
- "\n",
- "print('Predicted: ', ' '.join('%5s'% classes[predicted[j][0]] for j in range(4)))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The results seem pretty good. \n",
- "\n",
- "Let us look at how the network performs on the whole dataset."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "correct = 0\n",
- "total = 0\n",
- "for data in testloader:\n",
- " images, labels = data\n",
- " outputs = net(Variable(images))\n",
- " _, predicted = torch.max(outputs.data, 1)\n",
- " total += labels.size(0)\n",
- " correct += (predicted == labels).sum()\n",
- "\n",
- "print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "That looks waaay better than chance, which is 10% accuracy (randomly picking a class out of 10 classes). \n",
- "Seems like the network learnt something.\n",
- "\n",
- "Hmmm, what are the classes that performed well, and the classes that did not perform well:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "class_correct = list(0. for i in range(10))\n",
- "class_total = list(0. for i in range(10))\n",
- "for data in testloader:\n",
- " images, labels = data\n",
- " outputs = net(Variable(images))\n",
- " _, predicted = torch.max(outputs.data, 1)\n",
- " c = (predicted == labels).squeeze()\n",
- " for i in range(4):\n",
- " label = labels[i]\n",
- " class_correct[label] += c[i]\n",
- " class_total[label] += 1"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "for i in range(10):\n",
- " print('Accuracy of %5s : %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Okay, so what next?\n",
- "\n",
- "How do we run these neural networks on the GPU?"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Training on the GPU\n",
- "Just like how you transfer a Tensor on to the GPU, you transfer the neural net onto the GPU.\n",
- "\n",
- "This will recursively go over all modules and convert their parameters and buffers to CUDA tensors."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "net.cuda()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Remember that you will have to send the inputs and targets at every step to the GPU too:\n",
- "\n",
- " inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())\n",
- "\n",
- "Why dont I notice MASSIVE speedup compared to CPU? Because your network is realllly small.\n",
- "\n",
- "**Exercise:** Try increasing the width of your network \n",
- "(argument 2 of the first `nn.Conv2d`, and argument 1 of the second `nn.Conv2d` -- they need to be the same number), see what kind of speedup you get."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "\n",
- "#### Goals achieved:\n",
- "\n",
- "- Understanding PyTorch's Tensor library and neural networks at a high level.\n",
- "- Train a small neural network to classify images"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Where do I go next?\n",
- "\n",
- "- [Train neural nets to play video games](https://goo.gl/uGOksc)\n",
- "- [Train a state-of-the-art ResNet network on imagenet](https://github.com/pytorch/examples/tree/master/imagenet)\n",
- "- [Train an face generator using Generative Adversarial Networks](https://github.com/pytorch/examples/tree/master/dcgan)\n",
- "- [Train a word-level language model using Recurrent LSTM networks](https://github.com/pytorch/examples/tree/master/word_language_model)\n",
- "- [More examples](https://github.com/pytorch/examples)\n",
- "- [More tutorials](https://github.com/pytorch/tutorials)\n",
- "- [Discuss PyTorch on the Forums](https://discuss.pytorch.org/)\n",
- "- [Chat with other users on Slack](http://pytorch.slack.com/messages/beginner/)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 2",
- "language": "python",
- "name": "python2"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 2
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython2",
- "version": "2.7.12"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
diff --git a/Introduction to PyTorch for former Torchies.ipynb b/Introduction to PyTorch for former Torchies.ipynb
deleted file mode 100644
index be6e153eff1..00000000000
--- a/Introduction to PyTorch for former Torchies.ipynb
+++ /dev/null
@@ -1,996 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Introduction to PyTorch for former Torchies\n",
- "\n",
- "In this tutorial, you will learn the following:\n",
- "\n",
- "1. Using torch Tensors, and important difference against (Lua)Torch\n",
- "2. Using the autograd package\n",
- "3. Building neural networks\n",
- " - Building a ConvNet\n",
- " - Building a Recurrent Net\n",
- " - Using multiple GPUs\n",
- "\n",
- "\n",
- "## Tensors \n",
- "\n",
- "Tensors behave almost exactly the same way in PyTorch as they do in Torch."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "import torch\n",
- "a = torch.FloatTensor(10, 20)\n",
- "# creates tensor of size (10 x 20) with uninitialized memory\n",
- "\n",
- "a = torch.randn(10, 20)\n",
- "# initializes a tensor randomized with a normal distribution with mean=0, var=1\n",
- "\n",
- "a.size()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "*NOTE: `torch.Size` is in fact a tuple, so it supports the same operations*\n",
- "\n",
- "### Inplace / Out-of-place\n",
- "\n",
- "The first difference is that ALL operations on the tensor that operate in-place on it will have an `_` postfix.\n",
- "For example, `add` is the out-of-place version, and `add_` is the in-place version."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "a.fill_(3.5)\n",
- "# a has now been filled with the value 3.5\n",
- "\n",
- "b = a.add(4.0)\n",
- "# a is still filled with 3.5\n",
- "# new tensor b is returned with values 3.5 + 4.0 = 7.5"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Some operations like `narrow` do not have in-place versions, and hence, `.narrow_` does not exist. \n",
- "Similarly, some operations like `fill_` do not have an out-of-place version, so `.fill` does not exist.\n",
- "\n",
- "### Zero Indexing\n",
- "\n",
- "Another difference is that Tensors are zero-indexed. (Torch tensors are one-indexed)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "b = a[0,3] # select 1st row, 4th column from a"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Tensors can be also indexed with Python's slicing"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "b = a[:,3:5] # selects all rows, 4th column and 5th column from a"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### No camel casing\n",
- "\n",
- "The next small difference is that all functions are now NOT camelCase anymore.\n",
- "For example `indexAdd` is now called `index_add_`"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "x = torch.ones(5, 5)\n",
- "print(x)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "z = torch.Tensor(5, 2)\n",
- "z[:,0] = 10\n",
- "z[:,1] = 100\n",
- "print(z)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "x.index_add_(1, torch.LongTensor([4,0]), z)\n",
- "print(x)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Numpy Bridge\n",
- "\n",
- "Converting a torch Tensor to a numpy array and vice versa is a breeze.\n",
- "The torch Tensor and numpy array will share their underlying memory locations, and changing one will change the other.\n",
- "\n",
- "#### Converting torch Tensor to numpy Array"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "a = torch.ones(5)\n",
- "a"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "b = a.numpy()\n",
- "b"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "a.add_(1)\n",
- "print(a)\n",
- "print(b) # see how the numpy array changed in value"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Converting numpy Array to torch Tensor"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "import numpy as np\n",
- "a = np.ones(5)\n",
- "b = torch.from_numpy(a)\n",
- "np.add(a, 1, out=a)\n",
- "print(a)\n",
- "print(b) # see how changing the np array changed the torch Tensor automatically"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "All the Tensors on the CPU except a CharTensor support converting to NumPy and back.\n",
- "\n",
- "### CUDA Tensors\n",
- "\n",
- "CUDA Tensors are nice and easy in pytorch, and they are much more consistent as well.\n",
- "Transfering a CUDA tensor from the CPU to GPU will retain it's type."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "# let us run this cell only if CUDA is available\n",
- "if torch.cuda.is_available():\n",
- " # creates a LongTensor and transfers it \n",
- " # to GPU as torch.cuda.LongTensor\n",
- " a = torch.LongTensor(10).fill_(3).cuda()\n",
- " print(type(a))\n",
- " b = a.cpu()\n",
- " # transfers it to CPU, back to \n",
- " # being a torch.LongTensor"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Autograd\n",
- "\n",
- "Autograd is now a core torch package for automatic differentiation. \n",
- "\n",
- "It uses a tape based system for automatic differentiation. \n",
- "\n",
- "In the forward phase, the autograd tape will remember all the operations it executed, and in the backward phase, it will replay the operations.\n",
- "\n",
- "In autograd, we introduce a `Variable` class, which is a very thin wrapper around a `Tensor`. \n",
- "You can access the raw tensor through the `.data` attribute, and after computing the backward pass, a gradient w.r.t. this variable is accumulated into `.grad` attribute.\n",
- "\n",
- "\n",
- "\n",
- "There's one more class which is very important for autograd implementation - a `Function`. `Variable` and `Function` are interconnected and build up an acyclic graph, that encodes a complete history of computation. Each variable has a `.creator` attribute that references a function that has created a function (except for Variables created by the user - these have `None` as `.creator`).\n",
- "\n",
- "If you want to compute the derivatives, you can call `.backward()` on a `Variable`. \n",
- "If `Variable` is a scalar (i.e. it holds a one element tensor), you don't need to specify any arguments to `backward()`, however if it has more elements, you need to specify a `grad_output` argument that is a tensor of matching shape."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "from torch.autograd import Variable\n",
- "x = Variable(torch.ones(2, 2), requires_grad = True)\n",
- "x # notice the \"Variable containing\" line"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "x.data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "x.grad"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "x.creator is None # we've created x ourselves"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "y = x + 2\n",
- "y"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "y.creator\n",
- "# y was created as a result of an operation, \n",
- "# so it has a creator"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "z = y * y * 3\n",
- "z"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "out = z.mean()\n",
- "out"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "# let's backprop now\n",
- "out.backward()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "# print gradients d(out)/dx\n",
- "x.grad"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "By default, gradient computation flushes all the internal buffers contained in the graph, so if you even want to do the backward on some part of the graph twice, you need to pass in `retain_variables = True` during the first pass."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "x = Variable(torch.ones(2, 2), requires_grad = True)\n",
- "y = x * x\n",
- "y.backward(torch.ones(2, 2), retain_variables=True)\n",
- "# the retain_variables flag will prevent the internal buffers from being freed\n",
- "x.grad"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "# just backproping random gradients\n",
- "gradient = torch.randn(2, 2)\n",
- "\n",
- "# this would fail if we didn't specify \n",
- "# that we want to retain variables\n",
- "y.backward(gradient)\n",
- "\n",
- "x.grad"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## nn package"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "import torch.nn as nn"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "We've redesigned the nn package, so that it's fully integrated with autograd.\n",
- "\n",
- "### Replace containers with autograd\n",
- "\n",
- "You no longer have to use Containers like ConcatTable, or modules like CAddTable, or use and debug with nngraph. \n",
- "We will seamlessly use autograd to define our neural networks.\n",
- "For example, \n",
- "\n",
- "`output = nn.CAddTable():forward({input1, input2})` simply becomes `output = input1 + input2`\n",
- "\n",
- "`output = nn.MulConstant(0.5):forward(input)` simply becomes `output = input * 0.5`\n",
- "\n",
- "### State is no longer held in the module, but in the network graph\n",
- "\n",
- "Using recurrent networks should be simpler because of this reason. If you want to create a recurrent network, simply use the same Linear layer multiple times, without having to think about sharing weights.\n",
- "\n",
- "\n",
- "\n",
- "### Simplified debugging\n",
- "\n",
- "Debugging is intuitive using Python's pdb debugger, and **the debugger and stack traces stop at exactly where an error occurred.** What you see is what you get.\n",
- "\n",
- "### Example 1: ConvNet\n",
- "\n",
- "Let's see how to create a small ConvNet. \n",
- "\n",
- "All of your networks are derived from the base class `nn.Module`.\n",
- "\n",
- "- In the constructor, you declare all the layers you want to use.\n",
- "- In the forward function, you define how your model is going to be run, from input to output"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "import torch.nn.functional as F\n",
- "\n",
- "class MNISTConvNet(nn.Module):\n",
- " def __init__(self):\n",
- " # this is the place where you instantiate all your modules\n",
- " # you can later access them using the same names you've given them in here\n",
- " super(MNISTConvNet, self).__init__()\n",
- " self.conv1 = nn.Conv2d(1, 10, 5)\n",
- " self.pool1 = nn.MaxPool2d(2,2)\n",
- " self.conv2 = nn.Conv2d(10, 20, 5)\n",
- " self.pool2 = nn.MaxPool2d(2, 2)\n",
- " self.fc1 = nn.Linear(320, 50)\n",
- " self.fc2 = nn.Linear(50, 10)\n",
- " \n",
- " # it's the forward function that defines the network structure\n",
- " # we're accepting only a single input in here, but if you want,\n",
- " # feel free to use more\n",
- " def forward(self, input):\n",
- " x = self.pool1(F.relu(self.conv1(input)))\n",
- " x = self.pool2(F.relu(self.conv2(x)))\n",
- "\n",
- " # in your model definition you can go full crazy and use arbitrary\n",
- " # python code to define your model structure\n",
- " # all these are perfectly legal, and will be handled correctly \n",
- " # by autograd:\n",
- " # if x.gt(0) > x.numel() / 2:\n",
- " # ...\n",
- " # \n",
- " # you can even do a loop and reuse the same module inside it\n",
- " # modules no longer hold ephemeral state, so you can use them\n",
- " # multiple times during your forward pass \n",
- " # while x.norm(2) < 10:\n",
- " # x = self.conv1(x) \n",
- " \n",
- " x = x.view(x.size(0), -1)\n",
- " x = F.relu(self.fc1(x))\n",
- " x = F.relu(self.fc2(x))\n",
- " return x"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Let's use the defined ConvNet now. \n",
- "You create an instance of the class first."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "net = MNISTConvNet()\n",
- "print(net)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "> #### NOTE: `torch.nn` only supports mini-batches\n",
- "The entire `torch.nn` package only supports inputs that are a mini-batch of samples, and not a single sample. \n",
- "For example, `nn.Conv2d` will take in a 4D Tensor of `nSamples x nChannels x Height x Width`.\n",
- "\n",
- "> *If you have a single sample, just use `input.unsqueeze(0)` to add a fake batch dimension.*"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "# create a mini-batch containing a single sample of random data\n",
- "input = Variable(torch.randn(1, 1, 28, 28))\n",
- "\n",
- "# send the sample through the ConvNet\n",
- "out = net(input)\n",
- "print(out.size())"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false,
- "scrolled": true
- },
- "outputs": [],
- "source": [
- "# define a dummy target label\n",
- "target = Variable(torch.LongTensor([3]))\n",
- "\n",
- "# create a loss function\n",
- "loss_fn = nn.CrossEntropyLoss() # LogSoftmax + ClassNLL Loss\n",
- "err = loss_fn(out, target)\n",
- "print(err)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "err.backward()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The output of the ConvNet `out` is a `Variable`. We compute the loss using that, and that results in `err` which is also a `Variable`.\n",
- "\n",
- "Calling `.backward` on `err` hence will propagate gradients all the way through the ConvNet to it's weights"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "##### Let's access individual layer weights and gradients"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "print(net.conv1.weight.grad.size())"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "print(net.conv1.weight.data.norm()) # norm of the weight\n",
- "print(net.conv1.weight.grad.data.norm()) # norm of the gradients"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Forward and Backward Function Hooks\n",
- "We've inspected the weights and the gradients. \n",
- "But how about inspecting / modifying the output and grad_output of a layer?\n",
- "\n",
- "We introduce **hooks** for this purpose.\n",
- "\n",
- "You can register a function on a *Module* or a *Variable*. \n",
- "The hook can be a forward hook or a backward hook. \n",
- "The forward hook will be executed when a forward call is executed. \n",
- "The backward hook will be executed in the backward phase. \n",
- "Let's look at an example.\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "# We register a forward hook on conv2 and print some information\n",
- "def printnorm(self, input, output):\n",
- " # input is a tuple of packed inputs\n",
- " # output is a Variable. output.data is the Tensor we are interested\n",
- " print('Inside ' + self.__class__.__name__ + ' forward')\n",
- " print('')\n",
- " print('input: ', type(input))\n",
- " print('input[0]: ', type(input[0]))\n",
- " print('output: ', type(output))\n",
- " print('')\n",
- " print('input size:', input[0].size())\n",
- " print('output size:', output.data.size())\n",
- " print('output norm:', output.data.norm())\n",
- "\n",
- "net.conv2.register_forward_hook(printnorm)\n",
- "\n",
- "out = net(input)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "# We register a backward hook on conv2 and print some information\n",
- "def printgradnorm(self, grad_input, grad_output):\n",
- " print('Inside ' + self.__class__.__name__ + ' backward') \n",
- " print('Inside class:' + self.__class__.__name__)\n",
- " print('') \n",
- " print('grad_input: ', type(grad_input))\n",
- " print('grad_input[0]: ', type(grad_input[0]))\n",
- " print('grad_output: ', type(grad_output))\n",
- " print('grad_output[0]: ', type(grad_output[0]))\n",
- " print('') \n",
- " print('grad_input size:', grad_input[0].size())\n",
- " print('grad_output size:', grad_output[0].size())\n",
- " print('grad_input norm:', grad_input[0].data.norm())\n",
- "\n",
- "net.conv2.register_backward_hook(printgradnorm)\n",
- "\n",
- "out = net(input)\n",
- "err = loss_fn(out, target)\n",
- "err.backward()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "A full and working MNIST example is located here\n",
- "https://github.com/pytorch/examples/tree/master/mnist\n",
- "\n",
- "### Example 2: Recurrent Net\n",
- "\n",
- "Next, let's lookm at building recurrent nets with PyTorch.\n",
- "\n",
- "Since the state of the network is held in the graph and not\n",
- "in the layers, you can simply create an nn.Linear and \n",
- "reuse it over and over again for the recurrence."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "class RNN(nn.Module):\n",
- "\n",
- " # you can also accept arguments in your model constructor\n",
- " def __init__(self, data_size, hidden_size, output_size):\n",
- " super(RNN, self).__init__()\n",
- " \n",
- " self.hidden_size = hidden_size\n",
- " input_size = data_size + hidden_size\n",
- " \n",
- " self.i2h = nn.Linear(input_size, hidden_size)\n",
- " self.h2o = nn.Linear(hidden_size, output_size)\n",
- " \n",
- " def forward(self, data, last_hidden):\n",
- " input = torch.cat((data, last_hidden), 1)\n",
- " hidden = self.i2h(input)\n",
- " output = self.h2o(hidden)\n",
- " return hidden, output\n",
- "\n",
- "rnn = RNN(50, 20, 10)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "loss_fn = nn.MSELoss()\n",
- "\n",
- "batch_size = 10\n",
- "TIMESTEPS = 5\n",
- "\n",
- "# Create some fake data\n",
- "batch = Variable(torch.randn(batch_size, 50))\n",
- "hidden = Variable(torch.zeros(batch_size, 20))\n",
- "target = Variable(torch.zeros(batch_size, 10))\n",
- "\n",
- "loss = 0\n",
- "for t in range(TIMESTEPS): \n",
- " # yes! you can reuse the same network several times,\n",
- " # sum up the losses, and call backward!\n",
- " hidden, output = rnn(batch, hidden)\n",
- " loss += loss_fn(output, target)\n",
- "loss.backward()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "A more complete Language Modeling example using LSTMs and Penn Tree-bank is located here: https://github.com/pytorch/examples/tree/master/word_language_model\n",
- "\n",
- "PyTorch by default has seamless CuDNN integration for ConvNets and Recurrent Nets"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Multi-GPU examples\n",
- "\n",
- "Data Parallelism is when we split the mini-batch of samples into multiple smaller mini-batches and run the computation for each of the smaller mini-batches in parallel.\n",
- "\n",
- "Data Parallelism is implemented using `torch.nn.DataParallel`.\n",
- "\n",
- "One can wrap a Module in `DataParallel` and it will be parallelized over multiple GPUs in the batch dimension."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Data Parallel"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "class DataParallelModel(nn.Module):\n",
- " def __init__(self):\n",
- " super().__init__()\n",
- " self.block1=nn.Linear(10, 20)\n",
- " \n",
- " # wrap block2 in DataParallel\n",
- " self.block2=nn.Linear(20, 20)\n",
- " self.block2 = nn.DataParallel(self.block2)\n",
- " \n",
- " self.block3=nn.Linear(20, 20)\n",
- " \n",
- " def forward(self, x):\n",
- " x = self.block1(x)\n",
- " x = self.block2(x)\n",
- " x = self.block3(x)\n",
- " return x"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The code does not need to be changed in CPU-mode.\n",
- "\n",
- "[The documentation for DataParallel is here](http://pytorch.org/docs/nn.html#torch.nn.DataParallel)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Primitives on which data parallel is implemented upon\n",
- "In general, pytorch's nn.parallel primitives can be used independently.\n",
- "We have implemented simple MPI-like primitives:\n",
- "- replicate: replicate a Module on multiple devices\n",
- "- scatter: distribute the input in the first-dimension\n",
- "- gather: gather and concatenate the input in the first-dimension\n",
- "- parallel_apply: apply a set of already-distributed inputs to a set of already-distributed models.\n",
- "\n",
- "To give a better clarity, here function `data_parallel` composed using these collectives"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "def data_parallel(module, input, device_ids, output_device=None):\n",
- " if not device_ids:\n",
- " return module(input)\n",
- "\n",
- " if output_device is None:\n",
- " output_device = device_ids[0]\n",
- "\n",
- " replicas = nn.parallel.replicate(module, device_ids)\n",
- " inputs = nn.parallel.scatter(input, device_ids)\n",
- " replicas = replicas[:len(inputs)]\n",
- " outputs = nn.parallel.parallel_apply(replicas, inputs)\n",
- " return nn.parallel.gather(outputs, output_device)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Part of the model on CPU and part on the GPU\n",
- "\n",
- "Let's look at a small example of implementing a network where part of it is on the CPU and part on the GPU\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "class DistributedModel(nn.Module):\n",
- " def __init__(self):\n",
- " super().__init__(\n",
- " embedding=nn.Embedding(1000, 10),\n",
- " rnn=nn.Linear(10, 10).cuda(0),\n",
- " )\n",
- " \n",
- " def forward(self, x):\n",
- " # Compute embedding on CPU\n",
- " x = self.embedding(x)\n",
- " \n",
- " # Transfer to GPU\n",
- " x = x.cuda(0)\n",
- " \n",
- " # Compute RNN on GPU\n",
- " x = self.rnn(x)\n",
- " return x"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "This was a small introduction to PyTorch for former Torch users.\n",
- "\n",
- "There's a lot more to learn.\n",
- "\n",
- "Look at our more comprehensive introductory tutorial which introduces the `optim` package, data loaders etc.: [Deep Learning with PyTorch: a 60-minute blitz](https://github.com/pytorch/tutorials/blob/master/Deep%20Learning%20with%20PyTorch.ipynb)\n",
- "\n",
- "Also look at \n",
- "\n",
- "- [Train neural nets to play video games](https://goo.gl/uGOksc)\n",
- "- [Train a state-of-the-art ResNet network on imagenet](https://github.com/pytorch/examples/tree/master/imagenet)\n",
- "- [Train an face generator using Generative Adversarial Networks](https://github.com/pytorch/examples/tree/master/dcgan)\n",
- "- [Train a word-level language model using Recurrent LSTM networks](https://github.com/pytorch/examples/tree/master/word_language_model)\n",
- "- [More examples](https://github.com/pytorch/examples)\n",
- "- [More tutorials](https://github.com/pytorch/tutorials)\n",
- "- [Discuss PyTorch on the Forums](https://discuss.pytorch.org/)\n",
- "- [Chat with other users on Slack](pytorch.slack.com/messages/beginner/)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 2",
- "language": "python",
- "name": "python2"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 2
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython2",
- "version": "2.7.12"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 00000000000..338dffbfe74
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,29 @@
+BSD 3-Clause License
+
+Copyright (c) 2017-2022, Pytorch contributors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000000..7fcf1de6636
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,96 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# Locale
+export LC_ALL=C
+
+# You can set these variables from the command line.
+SPHINXOPTS ?=
+SPHINXBUILD = sphinx-build
+SPHINXPROJ = PyTorchTutorials
+SOURCEDIR = .
+BUILDDIR = _build
+DATADIR = _data
+GH_PAGES_SOURCES = $(SOURCEDIR) Makefile
+
+ZIPOPTS ?= -qo
+TAROPTS ?=
+
+# Put it first so that "make" without argument is like "make help".
+help:
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile docs
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -v
+
+download:
+ # IMPORTANT NOTE: Please make sure your dataset is downloaded to *_source/data folder,
+ # otherwise CI might silently break.
+
+ # NOTE: Please consider using the Step1 and one of Step2 for new dataset,
+ # [something] should be replaced with the actual value.
+ # Step1. DOWNLOAD: wget -nv -N [SOURCE_FILE] -P $(DATADIR)
+ # Step2-1. UNZIP: unzip -o $(DATADIR)/[SOURCE_FILE] -d [*_source/data/]
+ # Step2-2. UNTAR: tar -xzf $(DATADIR)/[SOURCE_FILE] -C [*_source/data/]
+ # Step2-3. AS-IS: cp $(DATADIR)/[SOURCE_FILE] [*_source/data/]
+
+ # Run structured downloads first (will also make directories
+ python3 .jenkins/download_data.py
+
+ # data loader tutorial
+ wget -nv -N https://download.pytorch.org/tutorial/faces.zip -P $(DATADIR)
+ unzip $(ZIPOPTS) $(DATADIR)/faces.zip -d beginner_source/data/
+
+ wget -nv -N https://download.pytorch.org/models/tutorials/4000_checkpoint.tar -P $(DATADIR)
+ cp $(DATADIR)/4000_checkpoint.tar beginner_source/data/
+
+ # neural style images
+ rm -rf advanced_source/data/images/ || true
+ mkdir -p advanced_source/data/images/
+ cp -r _static/img/neural-style/ advanced_source/data/images/
+
+ # Download dataset for beginner_source/hybrid_frontend/introduction_to_hybrid_frontend_tutorial.py
+ wget -nv -N https://s3.amazonaws.com/pytorch-tutorial-assets/iris.data -P $(DATADIR)
+ cp $(DATADIR)/iris.data beginner_source/data/
+
+ # Download dataset for beginner_source/chatbot_tutorial.py
+ wget -nv -N https://s3.amazonaws.com/pytorch-tutorial-assets/cornell_movie_dialogs_corpus_v2.zip -P $(DATADIR)
+ unzip $(ZIPOPTS) $(DATADIR)/cornell_movie_dialogs_corpus_v2.zip -d beginner_source/data/
+
+ # Download PennFudanPed dataset for intermediate_source/torchvision_tutorial.py
+ wget https://www.cis.upenn.edu/~jshi/ped_html/PennFudanPed.zip -P $(DATADIR)
+ unzip -o $(DATADIR)/PennFudanPed.zip -d intermediate_source/data/
+
+download-last-reviewed-json:
+ @echo "Downloading tutorials-review-data.json..."
+ curl -o tutorials-review-data.json https://raw.githubusercontent.com/pytorch/tutorials/refs/heads/last-reviewed-data-json/tutorials-review-data.json
+ @echo "Finished downloading tutorials-review-data.json."
+docs:
+ make download
+ make download-last-reviewed-json
+ make html
+ @python .jenkins/insert_last_verified.py $(BUILDDIR)/html
+ rm -rf docs
+ cp -r $(BUILDDIR)/html docs
+ touch docs/.nojekyll
+ rm -rf tutorials-review-data.json
+
+html-noplot:
+ $(SPHINXBUILD) -D plot_gallery=0 -b html $(SPHINXOPTS) "$(SOURCEDIR)" "$(BUILDDIR)/html"
+ # bash .jenkins/remove_invisible_code_block_batch.sh "$(BUILDDIR)/html"
+ @echo
+ make download-last-reviewed-json
+ @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+ @echo "Running post-processing script to insert 'Last Verified' dates..."
+ @python .jenkins/insert_last_verified.py $(BUILDDIR)/html
+ rm -rf tutorials-review-data.json
+
+clean-cache:
+ make clean
+ rm -rf advanced beginner intermediate recipes
+ # remove additional python files downloaded for torchvision_tutorial.py
+ rm -rf intermediate_source/engine.py intermediate_source/utils.py intermediate_source/transforms.py intermediate_source/coco_eval.py intermediate_source/coco_utils.py
diff --git a/README.md b/README.md
index 9ccb0ce99e2..3b858a3882b 100644
--- a/README.md
+++ b/README.md
@@ -1,11 +1,79 @@
-PyTorch Tutorials
------------------
+# PyTorch Tutorials
-1. [Deep Learning with PyTorch: a 60-minute blitz](Deep Learning with PyTorch.ipynb)
- - A perfect introduction to PyTorch's torch, autograd, nn and optim APIs
- - If you are a former Torch user, you can check out this instead: [Introduction to PyTorch for former Torchies](Introduction to PyTorch for former Torchies.ipynb)
-2. Custom C extensions
- - [Write your own C code that interfaces into PyTorch via FFI](Creating%20Extensions%20using%20FFI.md)
-3. [Writing your own neural network module that uses numpy and scipy](Creating extensions using numpy and scipy.ipynb)
-4. [Reinforcement (Q-)Learning with PyTorch](Reinforcement (Q-)Learning with PyTorch.ipynb)
+All the tutorials are now presented as sphinx style documentation at:
+
+## [https://pytorch.org/tutorials](https://pytorch.org/tutorials)
+
+# Asking a question
+
+If you have a question about a tutorial, post in https://dev-discuss.pytorch.org/ rather than creating an issue in this repo. Your question will be answered much faster on the dev-discuss forum.
+
+# Submitting an issue
+
+You can submit the following types of issues:
+
+* Feature request - request a new tutorial to be added. Please explain why this tutorial is needed and how it demonstrates PyTorch value.
+* Bug report - report a failure or outdated information in an existing tutorial. When submitting a bug report, please run: `python3 -m torch.utils.collect_env` to get information about your environment and add the output to the bug report.
+
+# Contributing
+
+We use sphinx-gallery's [notebook styled examples](https://sphinx-gallery.github.io/stable/tutorials/index.html) to create the tutorials. Syntax is very simple. In essence, you write a slightly well formatted Python file and it shows up as an HTML page. In addition, a Jupyter notebook is autogenerated and available to run in Google Colab.
+
+Here is how you can create a new tutorial (for a detailed description, see [CONTRIBUTING.md](./CONTRIBUTING.md)):
+
+NOTE: Before submitting a new tutorial, read [PyTorch Tutorial Submission Policy](./tutorial_submission_policy.md).
+
+1. Create a Python file. If you want it executed while inserted into documentation, save the file with the suffix `tutorial` so that the file name is `your_tutorial.py`.
+2. Put it in one of the `beginner_source`, `intermediate_source`, `advanced_source` directory based on the level of difficulty. If it is a recipe, add it to `recipes_source`. For tutorials demonstrating unstable prototype features, add to the `prototype_source`.
+3. For Tutorials (except if it is a prototype feature), include it in the `toctree` directive and create a `customcarditem` in [index.rst](./index.rst).
+4. For Tutorials (except if it is a prototype feature), create a thumbnail in the [index.rst file](https://github.com/pytorch/tutorials/blob/main/index.rst) using a command like `.. customcarditem:: beginner/your_tutorial.html`. For Recipes, create a thumbnail in the [recipes_index.rst](https://github.com/pytorch/tutorials/blob/main/recipes_index.rst)
+
+If you are starting off with a Jupyter notebook, you can use [this script](https://gist.github.com/chsasank/7218ca16f8d022e02a9c0deb94a310fe) to convert the notebook to Python file. After conversion and addition to the project, please make sure that section headings and other things are in logical order.
+
+## Building locally
+
+The tutorial build is very large and requires a GPU. If your machine does not have a GPU device, you can preview your HTML build without actually downloading the data and running the tutorial code:
+
+1. Install required dependencies by running: `pip install -r requirements.txt`.
+
+> Typically, you would run either in `conda` or `virtualenv`. If you want to use `virtualenv`, in the root of the repo, run: `virtualenv venv`, then `source venv/bin/activate`.
+
+- If you have a GPU-powered laptop, you can build using `make docs`. This will download the data, execute the tutorials and build the documentation to `docs/` directory. This might take about 60-120 min for systems with GPUs. If you do not have a GPU installed on your system, then see next step.
+- You can skip the computationally intensive graph generation by running `make html-noplot` to build basic html documentation to `_build/html`. This way, you can quickly preview your tutorial.
+
+## Building a single tutorial
+
+You can build a single tutorial by using the `GALLERY_PATTERN` environment variable. For example to run only `neural_style_transfer_tutorial.py`, run:
+
+```
+GALLERY_PATTERN="neural_style_transfer_tutorial.py" make html
+```
+or
+
+```
+GALLERY_PATTERN="neural_style_transfer_tutorial.py" sphinx-build . _build
+```
+
+The `GALLERY_PATTERN` variable respects regular expressions.
+
+## Spell Check
+You can run pyspelling to check for spelling errors in the tutorials. To check only Python files, run pyspelling -n python. To check only .rst files, use pyspelling -n reST. Currently, .rst spell checking is limited to the beginner/ directory. Contributions to enable spell checking in other directories are welcome!
+
+
+```
+pyspelling # full check (~3 mins)
+pyspelling -n python # Python files only
+pyspelling -n reST # reST files (only beginner/ dir currently included)
+```
+
+
+## About contributing to PyTorch Documentation and Tutorials
+* You can find information about contributing to PyTorch documentation in the
+PyTorch Repo [README.md](https://github.com/pytorch/pytorch/blob/master/README.md) file.
+* Additional information can be found in [PyTorch CONTRIBUTING.md](https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md).
+
+
+## License
+
+PyTorch Tutorials is BSD licensed, as found in the LICENSE file.
diff --git a/Reinforcement (Q-)Learning with PyTorch.ipynb b/Reinforcement (Q-)Learning with PyTorch.ipynb
deleted file mode 100644
index 756e7b6ebd0..00000000000
--- a/Reinforcement (Q-)Learning with PyTorch.ipynb
+++ /dev/null
@@ -1,431 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# PyTorch DQN tutorial\n",
- "\n",
- "This tutorial shows how to use PyTorch to train a DQN agent on the CartPole-v0 task from the [OpenAI Gym](https://gym.openai.com/).\n",
- "\n",
- "### Task\n",
- "\n",
- "The agent has to decide between two actions - moving the cart left or right - so that the pole attached to it stays upright. You can find an official leaderboard with various algorithms and visualizations at the [Gym website](https://gym.openai.com/envs/CartPole-v0).\n",
- "\n",
- "\n",
- "\n",
- "As the agent observes the current state of the environment and chooses an action, the environment *transitions* to a new state, and also returns a reward that indicates the consequences of the action. In this task, the environment terminates if the pole falls over too far.\n",
- "\n",
- "The CartPole task is designed so that the inputs to the agent are 4 real values representing the environment state (position, velocity, etc.). However, neural networks can solve the task purely by looking at the scene, so we'll use a patch of the screen centered on the cart as an input. Because of this, our results aren't directly comparable to the ones from the official leaderboard - our task is much harder. Unfortunately this does slow down the training, because we have to render all the frames.\n",
- "\n",
- "Strictly speaking, we will present the state as the difference between the current screen patch and the previous one. This will allow the agent to take the velocity of the pole into account from one image."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Packages\n",
- "\n",
- "First, let's import needed packages. Firstly, we need [`gym`](https://gym.openai.com/docs) for the environment. We'll also use the following from PyTorch:\n",
- "\n",
- "* neural networks (`torch.nn`)\n",
- "* optimization (`torch.optim`)\n",
- "* automatic differentiation (`torch.autograd`)\n",
- "* utilities for vision tasks (`torchvision` - [a separate package](https://github.com/pytorch/vision))."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "import gym\n",
- "import math\n",
- "import random\n",
- "import numpy as np\n",
- "import matplotlib.pyplot as plt\n",
- "from collections import namedtuple\n",
- "from itertools import count\n",
- "from copy import deepcopy\n",
- "from PIL import Image\n",
- "\n",
- "import torch\n",
- "import torch.nn as nn\n",
- "import torch.optim as optim\n",
- "import torch.autograd as autograd\n",
- "import torch.nn.functional as F\n",
- "import torchvision.transforms as T\n",
- "\n",
- "from torch.autograd import Variable\n",
- "\n",
- "env = gym.make('CartPole-v0')\n",
- "env = env.unwrapped"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "from IPython import display\n",
- "%matplotlib inline"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Replay Memory\n",
- "\n",
- "We'll be using experience replay memory for training our DQN. It stores the transitions that the agent observes, allowing us to reuse this data later. By sampling from it randomly, the transitions that build up a batch are decorrelated. It has been shown that this greatly stabilizes and improves the DQN training procedure.\n",
- "\n",
- "For this, we're going to need two classses:\n",
- "\n",
- "* `Transition` - a named tuple representing a single transition in our environment\n",
- "* `ReplayMemory` - a cyclic buffer of bounded size that holds the transitions observed recently. It also implements a `.sample()` method for selecting a random batch of transitions for training."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "Transition = namedtuple('Transition', ('state', 'action', 'next_state', 'reward'))\n",
- "\n",
- "class ReplayMemory(object):\n",
- " \n",
- " def __init__(self, capacity):\n",
- " self.capacity = capacity\n",
- " self.memory = []\n",
- " self.position = 0\n",
- " \n",
- " def push(self, *args):\n",
- " \"\"\"Saves a transition.\"\"\"\n",
- " if len(self.memory) < self.capacity:\n",
- " self.memory.append(None) \n",
- " self.memory[self.position] = Transition(*args)\n",
- " self.position = (self.position + 1) % self.capacity\n",
- " \n",
- " def sample(self, batch_size):\n",
- " return random.sample(self.memory, batch_size)\n",
- " \n",
- " def __len__(self):\n",
- " return len(self.memory)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Now, let's define our model. But first, let quickly recap what a DQN is.\n",
- "\n",
- "### DQN algorithm\n",
- "\n",
- "Our environment is deterministic, so all equations presented here are also formulated deterministically for the sake of simplicity. In the reinforcement learning literature, they would also contain expectations over stochastic transitions in the environment.\n",
- "\n",
- "Our aim will be to train a policy that tries to maximize the discounted, cumulative reward $R_{t_0} = \\sum_{t=t_0}^{\\infty} \\gamma^{t - t_0} r_t$, where $R_{t_0}$ is also known as the *return*. The discount, $\\gamma$, should be a constant between $0$ and $1$ that ensures the sum converges. It makes rewards from the uncertain far future less important for our agent than the ones in the near future that it can be fairly confident about.\n",
- "\n",
- "The main idea behind Q-learning is that if we had a function $Q^*: State \\times Action \\rightarrow \\mathbb{R}$, that could tell us what our return would be, if we were to take an action in a given state, then we could easily construct a policy that maximizes our rewards:\n",
- "\n",
- "$$\\pi^*(s) = \\arg\\!\\max_a \\ Q^*(s, a)$$\n",
- "\n",
- "However, we don't know everything about the world, so we don't have access to $Q^*$. But, since neural networks are universal function approximators, we can simply create one and train it to resemble $Q^*$.\n",
- "\n",
- "For our training update rule, we'll use a fact that every $Q$ function for some policy obeys the Bellman equation:\n",
- "\n",
- "$$Q^{\\pi}(s, a) = r + \\gamma Q^{\\pi}(s', \\pi(s'))$$\n",
- "\n",
- "The difference between the two sides of the equality is known as the temporal difference error, $\\delta$:\n",
- "\n",
- "$$\\delta = Q(s, a) - (r + \\gamma \\max_a Q(s', a))$$\n",
- "\n",
- "To minimise this error, we will use the [Huber loss](https://en.wikipedia.org/wiki/Huber_loss). The Huber loss acts like the mean squared error when the error is small, but like the mean absolute error when the error is large - this makes it more robust to outliers when the estimates of $Q$ are very noisy. We calculate this over a batch of transitions, $B$, sampled from the replay memory:\n",
- "\n",
- "$$\\mathcal{L} = \\frac{1}{|B|}\\sum_{(s, a, s', r) \\ \\in \\ B} \\mathcal{L}(\\delta) \\quad \\text{where} \\quad \\mathcal{L}(\\delta) = \\begin{cases}\n",
- " \\frac{1}{2}{\\delta^2} & \\text{for } |\\delta| \\le 1, \\\\\n",
- " |\\delta| - \\frac{1}{2} & \\text{otherwise.}\n",
- "\\end{cases}$$\n",
- "\n",
- "### Q-network\n",
- "\n",
- "Our model will be a convolutional neural network that takes in the difference between the current and previous screen patches. It has two outputs, representing $Q(s, \\mathrm{left})$ and $Q(s, \\mathrm{right})$ (where $s$ is the input to the network). In effect, the network is trying to predict the *quality* of taking each action given the current input."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "class DQN(nn.Module):\n",
- " def __init__(self):\n",
- " super(DQN, self).__init__()\n",
- " self.conv1 = nn.Conv2d(3, 16, kernel_size=5, stride=2)\n",
- " self.bn1 = nn.BatchNorm2d(16)\n",
- " self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=2)\n",
- " self.bn2 = nn.BatchNorm2d(32)\n",
- " self.conv3 = nn.Conv2d(32, 32, kernel_size=5, stride=2)\n",
- " self.bn3 = nn.BatchNorm2d(32)\n",
- " self.head = nn.Linear(448, 2)\n",
- " \n",
- " def forward(self, x):\n",
- " x = F.relu(self.bn1(self.conv1(x)))\n",
- " x = F.relu(self.bn2(self.conv2(x)))\n",
- " x = F.relu(self.bn3(self.conv3(x)))\n",
- " return self.head(x.view(x.size(0), -1))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Input extraction\n",
- "\n",
- "The code below are utilities for extracting and processing rendered images from the environment. It uses the `torchvision` package, which makes it easy to compose image transforms. Once you run the cell it will display an example patch that it extracted."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false,
- "scrolled": false
- },
- "outputs": [],
- "source": [
- "resize = T.Compose([T.ToPILImage(), T.Scale(40, interpolation=Image.CUBIC), T.ToTensor()])\n",
- "\n",
- "# This is based on the code from gym.\n",
- "screen_width = 600\n",
- "def get_cart_location():\n",
- " world_width = env.x_threshold * 2\n",
- " scale = screen_width / world_width\n",
- " return int(env.state[0] * scale + screen_width / 2.0) # MIDDLE OF CART\n",
- " \n",
- " \n",
- "def get_screen():\n",
- " screen = env.render(mode='rgb_array').transpose((2, 0, 1)) # transpose into torch order (CHW)\n",
- " # Strip off the top and bottom of the screen\n",
- " screen = screen[:, 160:320]\n",
- " view_width = 320\n",
- " cart_location = get_cart_location()\n",
- " if cart_location < view_width // 2:\n",
- " slice_range = slice(view_width)\n",
- " elif cart_location > (screen_width - view_width // 2):\n",
- " slice_range = slice(-view_width,None)\n",
- " else:\n",
- " slice_range = slice(cart_location - view_width // 2, cart_location + view_width // 2)\n",
- " # Strip off the edges, so that we have a square image centered on a cart\n",
- " screen = screen[:, :, slice_range]\n",
- " # Convert to float, rescare, convert to torch tensor (this doesn't require a copy)\n",
- " screen = np.ascontiguousarray(screen, dtype=np.float32) / 255\n",
- " screen = torch.from_numpy(screen)\n",
- " # Resize, and add a batch dimension (BCHW)\n",
- " return resize(screen).unsqueeze(0)\n",
- "\n",
- "env.reset()\n",
- "plt.imshow(get_screen().squeeze(0).permute(1, 2, 0).numpy(), interpolation='none')\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Training hyperparameters and utilities\n",
- "\n",
- "This cell instantiates our model and its optimizer, and defines some utilities:\n",
- "\n",
- "* `Variable` - this is a simple wrapper around `torch.autograd.Variable` that will automatically send the data to the GPU every time we construct a Variable.\n",
- "* `select_action` - will select an action accordingly to an epsilon greedy policy. Simply put, we'll sometimes use our model for choosing the action, and sometimes we'll just sample one uniformly. The probability of choosing a random action will start at `EPS_START` and will decay exponentially towards `EPS_END`. `EPS_DECAY` controls the rate of the decay.\n",
- "* `plot_durations` - a helper for plotting the durations of episodes, along with an average over the last 100 episodes (the measure used in the official evaluations). The plot will be underneath the cell containing the main training loop, and will update after every episode."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "BATCH_SIZE = 128\n",
- "GAMMA = 0.999\n",
- "EPS_START = 0.9\n",
- "EPS_END = 0.05\n",
- "EPS_DECAY = 200\n",
- "USE_CUDA = torch.cuda.is_available()\n",
- "dtype = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor\n",
- "\n",
- "model = DQN()\n",
- "memory = ReplayMemory(10000)\n",
- "optimizer = optim.RMSprop(model.parameters())\n",
- "\n",
- "model.type(dtype)\n",
- "\n",
- "\n",
- "steps_done = 0\n",
- "def select_action(state):\n",
- " global steps_done\n",
- " sample = random.random()\n",
- " eps_threshold = EPS_END + (EPS_START - EPS_END) * math.exp(-1. * steps_done / EPS_DECAY)\n",
- " steps_done += 1\n",
- " if sample > eps_threshold:\n",
- " return model(Variable(state.type(dtype), volatile=True)).data.max(1)[1].cpu()\n",
- " else:\n",
- " return torch.LongTensor([[random.randrange(2)]])\n",
- " \n",
- "\n",
- "episode_durations = []\n",
- "def plot_durations():\n",
- " plt.figure(1)\n",
- " plt.clf()\n",
- " durations_t = torch.Tensor(episode_durations)\n",
- " plt.plot(durations_t.numpy())\n",
- " # Take 100 episode averages and plot them too\n",
- " if len(durations_t) >= 100:\n",
- " means = durations_t.unfold(0, 100, 1).mean(1).view(-1)\n",
- " means = torch.cat((torch.zeros(99), means))\n",
- " plt.plot(means.numpy())\n",
- " display.clear_output(wait=True)\n",
- " display.display(plt.gcf())"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Training loop\n",
- "\n",
- "Finally, the code for training our model.\n",
- "\n",
- "At the top you can find an `optimize_model` function that performs a single step of the optimization. It first samples a batch, concatenates all the tensors into a single one, computes $Q(s_t, a_t)$ and $V(s_{t+1}) = \\max_a Q(s_{t+1}, a)$, and combines them into our loss. By defition we set $V(s) = 0$ if $s$ is a terminal state.\n",
- "\n",
- "Below, you can find the main training loop. At the beginning we reset the environment and initialize the `state` variable. Then, we sample an action, execute it, observe the next screen and the reward (always 1), and optimize our model once. When the episode ends (our model fails), we restart the loop."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false,
- "scrolled": false
- },
- "outputs": [],
- "source": [
- "last_sync = 0\n",
- "def optimize_model():\n",
- " global last_sync\n",
- " if len(memory) < BATCH_SIZE:\n",
- " return\n",
- " transitions = memory.sample(BATCH_SIZE)\n",
- " # Transpose the batch (see http://stackoverflow.com/a/19343/3343043 for detailed explanation).\n",
- " batch = Transition(*zip(*transitions)) \n",
- "\n",
- " # Compute a mask of non-final states and concatenate the batch elements\n",
- " non_final_mask = torch.ByteTensor(tuple(map(lambda s: s is not None, batch.next_state)))\n",
- " # We don't want to backprop through the expected action values and volatile will save us\n",
- " # on temporarily changing the model parameters' requires_grad to False!\n",
- " non_final_next_states_t = torch.cat(tuple(s for s in batch.next_state if s is not None)).type(dtype)\n",
- " non_final_next_states = Variable(non_final_next_states_t, volatile=True)\n",
- " state_batch = Variable(torch.cat(batch.state))\n",
- " action_batch = Variable(torch.cat(batch.action))\n",
- " reward_batch = Variable(torch.cat(batch.reward))\n",
- "\n",
- " if USE_CUDA:\n",
- " state_batch = state_batch.cuda()\n",
- " action_batch = action_batch.cuda()\n",
- "\n",
- " # Compute Q(s_t, a) - the model computes Q(s_t), then we select the columns of actions taken\n",
- " state_action_values = model(state_batch).gather(1, action_batch).cpu()\n",
- "\n",
- " # Compute V(s_{t+1}) for all next states.\n",
- " next_state_values = Variable(torch.zeros(BATCH_SIZE))\n",
- " next_state_values[non_final_mask] = model(non_final_next_states).max(1)[0].cpu()\n",
- " # Now, we don't want to mess up the loss with a volatile flag, so let's clear it.\n",
- " # After this, we'll just end up with a Variable that has requires_grad=False\n",
- " next_state_values.volatile = False\n",
- " # Compute the expected Q values\n",
- " expected_state_action_values = (next_state_values * GAMMA) + reward_batch\n",
- "\n",
- " # Compute Huber loss\n",
- " loss = F.smooth_l1_loss(state_action_values, expected_state_action_values)\n",
- " \n",
- " # Optimize the model\n",
- " optimizer.zero_grad()\n",
- " loss.backward()\n",
- " for param in model.parameters():\n",
- " param.grad.data.clamp_(-1, 1)\n",
- " optimizer.step()\n",
- "\n",
- "\n",
- "for i_episode in count(1):\n",
- " # Initialize the environment and state\n",
- " env.reset()\n",
- " last_screen = get_screen()\n",
- " current_screen = get_screen()\n",
- " state = current_screen - last_screen\n",
- " for t in count():\n",
- " # Select and perform an action\n",
- " action = select_action(state)\n",
- " _, reward, done, _ = env.step(action[0,0])\n",
- " reward = torch.Tensor([reward])\n",
- " \n",
- " # Observe new state\n",
- " last_screen = current_screen\n",
- " current_screen = get_screen()\n",
- " if not done:\n",
- " next_state = current_screen - last_screen\n",
- " else:\n",
- " next_state = None\n",
- " \n",
- " # Store the transition in memory\n",
- " memory.push(state, action, next_state, reward)\n",
- " \n",
- " # Move to the next state\n",
- " state = next_state\n",
- " \n",
- " # Perform one step of the optimization (on the target network)\n",
- " optimize_model()\n",
- " \n",
- " if done:\n",
- " episode_durations.append(t+1)\n",
- " plot_durations()\n",
- " break"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 2",
- "language": "python",
- "name": "python2"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 2
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython2",
- "version": "2.7.12"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/_static/ajax-loader.gif b/_static/ajax-loader.gif
new file mode 100755
index 00000000000..61faf8cab23
Binary files /dev/null and b/_static/ajax-loader.gif differ
diff --git a/_static/basic.css b/_static/basic.css
new file mode 100755
index 00000000000..19ced1057ae
--- /dev/null
+++ b/_static/basic.css
@@ -0,0 +1,665 @@
+/*
+ * basic.css
+ * ~~~~~~~~~
+ *
+ * Sphinx stylesheet -- basic theme.
+ *
+ * :copyright: Copyright 2007-2018 by the Sphinx team, see AUTHORS.
+ * :license: BSD, see LICENSE for details.
+ *
+ */
+
+/* -- main layout ----------------------------------------------------------- */
+
+div.clearer {
+ clear: both;
+}
+
+/* -- relbar ---------------------------------------------------------------- */
+
+div.related {
+ width: 100%;
+ font-size: 90%;
+}
+
+div.related h3 {
+ display: none;
+}
+
+div.related ul {
+ margin: 0;
+ padding: 0 0 0 10px;
+ list-style: none;
+}
+
+div.related li {
+ display: inline;
+}
+
+div.related li.right {
+ float: right;
+ margin-right: 5px;
+}
+
+/* -- sidebar --------------------------------------------------------------- */
+
+div.sphinxsidebarwrapper {
+ padding: 10px 5px 0 10px;
+}
+
+div.sphinxsidebar {
+ float: left;
+ width: 230px;
+ margin-left: -100%;
+ font-size: 90%;
+ word-wrap: break-word;
+ overflow-wrap : break-word;
+}
+
+div.sphinxsidebar ul {
+ list-style: none;
+}
+
+div.sphinxsidebar ul ul,
+div.sphinxsidebar ul.want-points {
+ margin-left: 20px;
+ list-style: square;
+}
+
+div.sphinxsidebar ul ul {
+ margin-top: 0;
+ margin-bottom: 0;
+}
+
+div.sphinxsidebar form {
+ margin-top: 10px;
+}
+
+div.sphinxsidebar input {
+ border: 1px solid #98dbcc;
+ font-family: sans-serif;
+ font-size: 1em;
+}
+
+div.sphinxsidebar #searchbox input[type="text"] {
+ float: left;
+ width: 80%;
+ padding: 0.25em;
+ box-sizing: border-box;
+}
+
+div.sphinxsidebar #searchbox input[type="submit"] {
+ float: left;
+ width: 20%;
+ border-left: none;
+ padding: 0.25em;
+ box-sizing: border-box;
+}
+
+
+img {
+ border: 0;
+ max-width: 100%;
+}
+
+/* -- search page ----------------------------------------------------------- */
+
+ul.search {
+ margin: 10px 0 0 20px;
+ padding: 0;
+}
+
+ul.search li {
+ padding: 5px 0 5px 20px;
+ background-image: url(file.png);
+ background-repeat: no-repeat;
+ background-position: 0 7px;
+}
+
+ul.search li a {
+ font-weight: bold;
+}
+
+ul.search li div.context {
+ color: #888;
+ margin: 2px 0 0 30px;
+ text-align: left;
+}
+
+ul.keywordmatches li.goodmatch a {
+ font-weight: bold;
+}
+
+/* -- index page ------------------------------------------------------------ */
+
+table.contentstable {
+ width: 90%;
+ margin-left: auto;
+ margin-right: auto;
+}
+
+table.contentstable p.biglink {
+ line-height: 150%;
+}
+
+a.biglink {
+ font-size: 1.3em;
+}
+
+span.linkdescr {
+ font-style: italic;
+ padding-top: 5px;
+ font-size: 90%;
+}
+
+/* -- general index --------------------------------------------------------- */
+
+table.indextable {
+ width: 100%;
+}
+
+table.indextable td {
+ text-align: left;
+ vertical-align: top;
+}
+
+table.indextable ul {
+ margin-top: 0;
+ margin-bottom: 0;
+ list-style-type: none;
+}
+
+table.indextable > tbody > tr > td > ul {
+ padding-left: 0em;
+}
+
+table.indextable tr.pcap {
+ height: 10px;
+}
+
+table.indextable tr.cap {
+ margin-top: 10px;
+ background-color: #f2f2f2;
+}
+
+img.toggler {
+ margin-right: 3px;
+ margin-top: 3px;
+ cursor: pointer;
+}
+
+div.modindex-jumpbox {
+ border-top: 1px solid #ddd;
+ border-bottom: 1px solid #ddd;
+ margin: 1em 0 1em 0;
+ padding: 0.4em;
+}
+
+div.genindex-jumpbox {
+ border-top: 1px solid #ddd;
+ border-bottom: 1px solid #ddd;
+ margin: 1em 0 1em 0;
+ padding: 0.4em;
+}
+
+/* -- domain module index --------------------------------------------------- */
+
+table.modindextable td {
+ padding: 2px;
+ border-collapse: collapse;
+}
+
+/* -- general body styles --------------------------------------------------- */
+
+div.body {
+ min-width: 450px;
+ max-width: 800px;
+}
+
+div.body p, div.body dd, div.body li, div.body blockquote {
+ -moz-hyphens: auto;
+ -ms-hyphens: auto;
+ -webkit-hyphens: auto;
+ hyphens: auto;
+}
+
+a.headerlink {
+ visibility: hidden;
+}
+
+h1:hover > a.headerlink,
+h2:hover > a.headerlink,
+h3:hover > a.headerlink,
+h4:hover > a.headerlink,
+h5:hover > a.headerlink,
+h6:hover > a.headerlink,
+dt:hover > a.headerlink,
+caption:hover > a.headerlink,
+p.caption:hover > a.headerlink,
+div.code-block-caption:hover > a.headerlink {
+ visibility: visible;
+}
+
+div.body p.caption {
+ text-align: inherit;
+}
+
+div.body td {
+ text-align: left;
+}
+
+.first {
+ margin-top: 0 !important;
+}
+
+p.rubric {
+ margin-top: 30px;
+ font-weight: bold;
+}
+
+img.align-left, .figure.align-left, object.align-left {
+ clear: left;
+ float: left;
+ margin-right: 1em;
+}
+
+img.align-right, .figure.align-right, object.align-right {
+ clear: right;
+ float: right;
+ margin-left: 1em;
+}
+
+img.align-center, .figure.align-center, object.align-center {
+ display: block;
+ margin-left: auto;
+ margin-right: auto;
+}
+
+.align-left {
+ text-align: left;
+}
+
+.align-center {
+ text-align: center;
+}
+
+.align-right {
+ text-align: right;
+}
+
+/* -- sidebars -------------------------------------------------------------- */
+
+div.sidebar {
+ margin: 0 0 0.5em 1em;
+ border: 1px solid #ddb;
+ padding: 7px 7px 0 7px;
+ background-color: #ffe;
+ width: 40%;
+ float: right;
+}
+
+p.sidebar-title {
+ font-weight: bold;
+}
+
+/* -- topics ---------------------------------------------------------------- */
+
+div.topic {
+ border: 1px solid #ccc;
+ padding: 7px 7px 0 7px;
+ margin: 10px 0 10px 0;
+}
+
+p.topic-title {
+ font-size: 1.1em;
+ font-weight: bold;
+ margin-top: 10px;
+}
+
+/* -- admonitions ----------------------------------------------------------- */
+
+div.admonition {
+ margin-top: 10px;
+ margin-bottom: 10px;
+ padding: 7px;
+}
+
+div.admonition dt {
+ font-weight: bold;
+}
+
+div.admonition dl {
+ margin-bottom: 0;
+}
+
+p.admonition-title {
+ margin: 0px 10px 5px 0px;
+ font-weight: bold;
+}
+
+div.body p.centered {
+ text-align: center;
+ margin-top: 25px;
+}
+
+/* -- tables ---------------------------------------------------------------- */
+
+table.docutils {
+ border: 0;
+ border-collapse: collapse;
+}
+
+table.align-center {
+ margin-left: auto;
+ margin-right: auto;
+}
+
+table caption span.caption-number {
+ font-style: italic;
+}
+
+table caption span.caption-text {
+}
+
+table.docutils td, table.docutils th {
+ padding: 1px 8px 1px 5px;
+ border-top: 0;
+ border-left: 0;
+ border-right: 0;
+ border-bottom: 1px solid #aaa;
+}
+
+table.footnote td, table.footnote th {
+ border: 0 !important;
+}
+
+th {
+ text-align: left;
+ padding-right: 5px;
+}
+
+table.citation {
+ border-left: solid 1px gray;
+ margin-left: 1px;
+}
+
+table.citation td {
+ border-bottom: none;
+}
+
+/* -- figures --------------------------------------------------------------- */
+
+div.figure {
+ margin: 0.5em;
+ padding: 0.5em;
+}
+
+div.figure p.caption {
+ padding: 0.3em;
+}
+
+div.figure p.caption span.caption-number {
+ font-style: italic;
+}
+
+div.figure p.caption span.caption-text {
+}
+
+/* -- field list styles ----------------------------------------------------- */
+
+table.field-list td, table.field-list th {
+ border: 0 !important;
+}
+
+.field-list ul {
+ margin: 0;
+ padding-left: 1em;
+}
+
+.field-list p {
+ margin: 0;
+}
+
+.field-name {
+ -moz-hyphens: manual;
+ -ms-hyphens: manual;
+ -webkit-hyphens: manual;
+ hyphens: manual;
+}
+
+/* -- other body styles ----------------------------------------------------- */
+
+ol.arabic {
+ list-style: decimal;
+}
+
+ol.loweralpha {
+ list-style: lower-alpha;
+}
+
+ol.upperalpha {
+ list-style: upper-alpha;
+}
+
+ol.lowerroman {
+ list-style: lower-roman;
+}
+
+ol.upperroman {
+ list-style: upper-roman;
+}
+
+dl {
+ margin-bottom: 15px;
+}
+
+dd p {
+ margin-top: 0px;
+}
+
+dd ul, dd table {
+ margin-bottom: 10px;
+}
+
+dd {
+ margin-top: 3px;
+ margin-bottom: 10px;
+ margin-left: 30px;
+}
+
+dt:target, span.highlighted {
+ background-color: #fbe54e;
+}
+
+rect.highlighted {
+ fill: #fbe54e;
+}
+
+dl.glossary dt {
+ font-weight: bold;
+ font-size: 1.1em;
+}
+
+.optional {
+ font-size: 1.3em;
+}
+
+.sig-paren {
+ font-size: larger;
+}
+
+.versionmodified {
+ font-style: italic;
+}
+
+.system-message {
+ background-color: #fda;
+ padding: 5px;
+ border: 3px solid red;
+}
+
+.footnote:target {
+ background-color: #ffa;
+}
+
+.line-block {
+ display: block;
+ margin-top: 1em;
+ margin-bottom: 1em;
+}
+
+.line-block .line-block {
+ margin-top: 0;
+ margin-bottom: 0;
+ margin-left: 1.5em;
+}
+
+.guilabel, .menuselection {
+ font-family: sans-serif;
+}
+
+.accelerator {
+ text-decoration: underline;
+}
+
+.classifier {
+ font-style: oblique;
+}
+
+abbr, acronym {
+ border-bottom: dotted 1px;
+ cursor: help;
+}
+
+/* -- code displays --------------------------------------------------------- */
+
+pre {
+ overflow: auto;
+ overflow-y: hidden; /* fixes display issues on Chrome browsers */
+}
+
+span.pre {
+ -moz-hyphens: none;
+ -ms-hyphens: none;
+ -webkit-hyphens: none;
+ hyphens: none;
+}
+
+td.linenos pre {
+ padding: 5px 0px;
+ border: 0;
+ background-color: transparent;
+ color: #aaa;
+}
+
+table.highlighttable {
+ margin-left: 0.5em;
+}
+
+table.highlighttable td {
+ padding: 0 0.5em 0 0.5em;
+}
+
+div.code-block-caption {
+ padding: 2px 5px;
+ font-size: small;
+}
+
+div.code-block-caption code {
+ background-color: transparent;
+}
+
+div.code-block-caption + div > div.highlight > pre {
+ margin-top: 0;
+}
+
+div.code-block-caption span.caption-number {
+ padding: 0.1em 0.3em;
+ font-style: italic;
+}
+
+div.code-block-caption span.caption-text {
+}
+
+div.literal-block-wrapper {
+ padding: 1em 1em 0;
+}
+
+div.literal-block-wrapper div.highlight {
+ margin: 0;
+}
+
+code.descname {
+ background-color: transparent;
+ font-weight: bold;
+ font-size: 1.2em;
+}
+
+code.descclassname {
+ background-color: transparent;
+}
+
+code.xref, a code {
+ background-color: transparent;
+ font-weight: bold;
+}
+
+h1 code, h2 code, h3 code, h4 code, h5 code, h6 code {
+ background-color: transparent;
+}
+
+.viewcode-link {
+ float: right;
+}
+
+.viewcode-back {
+ float: right;
+ font-family: sans-serif;
+}
+
+div.viewcode-block:target {
+ margin: -1px -10px;
+ padding: 0 10px;
+}
+
+/* -- math display ---------------------------------------------------------- */
+
+img.math {
+ vertical-align: middle;
+}
+
+div.body div.math p {
+ text-align: center;
+}
+
+span.eqno {
+ float: right;
+}
+
+span.eqno a.headerlink {
+ position: relative;
+ left: 0px;
+ z-index: 1;
+}
+
+div.math:hover a.headerlink {
+ visibility: visible;
+}
+
+/* -- printout stylesheet --------------------------------------------------- */
+
+@media print {
+ div.document,
+ div.documentwrapper,
+ div.bodywrapper {
+ margin: 0 !important;
+ width: 100%;
+ }
+
+ div.sphinxsidebar,
+ div.related,
+ div.footer,
+ #top-link {
+ display: none;
+ }
+}
\ No newline at end of file
diff --git a/_static/broken_example.png b/_static/broken_example.png
new file mode 100755
index 00000000000..4fea24e7df4
Binary files /dev/null and b/_static/broken_example.png differ
diff --git a/_static/comment-bright.png b/_static/comment-bright.png
new file mode 100755
index 00000000000..15e27edb12a
Binary files /dev/null and b/_static/comment-bright.png differ
diff --git a/_static/comment-close.png b/_static/comment-close.png
new file mode 100755
index 00000000000..4d91bcf57de
Binary files /dev/null and b/_static/comment-close.png differ
diff --git a/_static/comment.png b/_static/comment.png
new file mode 100755
index 00000000000..dfbc0cbd512
Binary files /dev/null and b/_static/comment.png differ
diff --git a/_static/css/custom.css b/_static/css/custom.css
new file mode 100755
index 00000000000..a0882c1d4fc
--- /dev/null
+++ b/_static/css/custom.css
@@ -0,0 +1,97 @@
+/* sphinx-design styles for cards/tabs
+*/
+
+:root {
+ --sd-color-info: #ee4c2c;
+ --sd-color-primary: #6c6c6d;
+ --sd-color-primary-highlight: #f3f4f7;
+ --sd-color-card-border-hover: #ee4c2c;
+ --sd-color-card-border: #f3f4f7;
+ --sd-color-card-background: #fff;
+ --sd-color-card-text: inherit;
+ --sd-color-card-header: transparent;
+ --sd-color-card-footer: transparent;
+ --sd-color-tabs-label-active: hsla(231, 99%, 66%, 1);
+ --sd-color-tabs-label-hover: hsla(231, 99%, 66%, 1);
+ --sd-color-tabs-label-inactive: hsl(0, 0%, 66%);
+ --sd-color-tabs-underline-active: hsla(231, 99%, 66%, 1);
+ --sd-color-tabs-underline-hover: rgba(178, 206, 245, 0.62);
+ --sd-color-tabs-underline-inactive: transparent;
+ --sd-color-tabs-overline: rgb(222, 222, 222);
+ --sd-color-tabs-underline: rgb(222, 222, 222);
+}
+
+.sd-text-info {
+ color: #ee4c2c;
+}
+
+
+.sd-card {
+ position: relative;
+ background-color: #fff;
+ opacity: 1.0;
+ border-radius: 0px;
+ width: 30%;
+ border: none;
+ padding-bottom: 0px;
+}
+
+
+.sd-card-img {
+ opacity: 0.5;
+ width: 200px;
+ padding: 0px;
+}
+
+.sd-card-img:hover {
+ opacity: 1.0;
+ background-color: #f3f4f7;
+}
+
+
+.sd-card:after {
+ display: block;
+ opacity: 1;
+ content: '';
+ border-bottom: solid 1px #ee4c2c;
+ background-color: #fff;
+ transform: scaleX(0);
+ transition: transform .250s ease-in-out;
+ transform-origin: 0% 50%;
+}
+
+.sd-card:hover {
+ background-color: #fff;
+ opacity: 1;
+ border-top: 1px solid #f3f4f7;
+ border-left: 1px solid #f3f4f7;
+ border-right: 1px solid #f3f4f7;
+}
+
+.sd-card:hover:after {
+ transform: scaleX(1);
+}
+
+.card-prerequisites:hover {
+ transition: none;
+ border: none;
+}
+
+.card-prerequisites:hover:after {
+ transition: none;
+ transform: none;
+}
+
+.card-prerequisites:after {
+ display: block;
+ content: '';
+ border-bottom: none;
+ background-color: #fff;
+ transform: none;
+ transition: none;
+ transform-origin: none;
+}
+
+.pytorch-left-menu-search input[type=text] {
+ background-image: url("../images/search-icon.svg");
+}
diff --git a/_static/css/custom2.css b/_static/css/custom2.css
new file mode 100644
index 00000000000..a24ee796872
--- /dev/null
+++ b/_static/css/custom2.css
@@ -0,0 +1,112 @@
+/* Survey banner .css */
+
+.survey-banner {
+ margin-top: 10px;
+ background-color: #f3f4f7;
+ padding-top: 15px;
+ padding-left: 10px;
+ padding-bottom: 1px;
+}
+
+@media screen and (max-width: 600px) {
+ .survey-banner {
+ padding-top: 5px;
+ padding-left: 5px;
+ padding-bottom: -1px;
+ font-size: 12px;
+ margin-bottom: 5px;
+ }
+}
+
+/* Left nav for 2nd level nav */
+
+.pytorch-left-menu li.toctree-l2 {
+ padding-left: 10px;
+}
+
+.pytorch-left-menu li.toctree-l2.current > a, {
+ color: #ee4c2c;
+}
+
+.pytorch-left-menu li.toctree-l2.current a:link.reference.internal {
+ color: #ee4c2c;
+}
+
+.pytorch-left-menu li.toctree-l1.current > a:before {
+ content: "";
+}
+
+/* search radio button*/
+
+input[type="radio"] {
+ accent-color: #ee4c2c;
+}
+
+.gsst_b {
+ display: none;
+}
+
+#gsc-i-id1 {
+ height: 1.5rem;
+ text-indent: 12px !important;
+ font-size: 1rem !important;
+ font-family: "FreightSansi";
+ background-image: url(../images/search-icon.svg) !important;
+ background-repeat: no-repeat !important;
+ background-size: 18px 18px !important;
+ background-position: 5px 0px !important;
+ padding-left: 20px !important;
+}
+
+#gsc-i-id1::placeholder {
+ font-family: 'FreightSans';
+ font-size: 1rem;
+ color: #262626;
+}
+
+.gsc-control-cse {
+ padding: 0 !important;
+ border-radius: 0px !important;
+ border: none !important;
+}
+
+.gsc-overflow-hidden {
+ overflow: visible !important;
+}
+
+#___gcse_0 {
+ height: 44px !important;
+ padding: 0 !important;
+}
+
+table.gsc-search-box td.gsc-input {
+ padding-right: 0 !important;
+}
+
+table.gsc-search-box td {
+ height: 44px;
+ margin-bottom: 0 !important;
+ padding-bottom: 0 !important;
+}
+
+.gsc-search-button-v2 {
+ display: none;
+}
+
+.gs_id50 {
+ width: 308px;
+}
+
+.gsib_a {
+ padding: 0px 8px 4px 9px !important;
+}
+
+.gsc-input-box {
+ border-radius: 0px !important;
+ border: none !important;
+}
+
+form.gsc-search-box {
+ margin-bottom 0px;
+}
+
diff --git a/_static/css/pytorch_theme.css b/_static/css/pytorch_theme.css
new file mode 100755
index 00000000000..153f4889c08
--- /dev/null
+++ b/_static/css/pytorch_theme.css
@@ -0,0 +1,127 @@
+body {
+ font-family: "Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;
+}
+
+/* Default header fonts are ugly */
+h1, h2, .rst-content .toctree-wrapper p.caption, h3, h4, h5, h6, legend, p.caption {
+ font-family: "Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;
+}
+
+/* Use white for docs background */
+.wy-side-nav-search {
+ background-color: #fff;
+}
+
+.wy-nav-content-wrap, .wy-menu li.current > a {
+ background-color: #fff;
+}
+
+@media screen and (min-width: 1400px) {
+ .wy-nav-content-wrap {
+ background-color: rgba(0, 0, 0, 0.0470588);
+ }
+
+ .wy-nav-content {
+ background-color: #fff;
+ }
+}
+
+/* Fixes for mobile */
+.wy-nav-top {
+ background-color: #fff;
+ background-image: url('../img/pytorch-logo-dark.svg');
+ background-repeat: no-repeat;
+ background-position: center;
+ padding: 0;
+ margin: 0.4045em 0.809em;
+ color: #333;
+}
+
+.wy-nav-top > a {
+ display: none;
+}
+
+@media screen and (max-width: 768px) {
+ .wy-side-nav-search>a img.logo {
+ height: 60px;
+ }
+}
+
+/* This is needed to ensure that logo above search scales properly */
+.wy-side-nav-search a {
+ display: block;
+}
+
+/* This ensures that multiple constructors will remain in separate lines. */
+.rst-content dl:not(.docutils) dt {
+ display: table;
+}
+
+/* Use our red for literals (it's very similar to the original color) */
+.rst-content tt.literal, .rst-content tt.literal, .rst-content code.literal {
+ color: #F05732;
+}
+
+.rst-content tt.xref, a .rst-content tt, .rst-content tt.xref,
+.rst-content code.xref, a .rst-content tt, a .rst-content code {
+ color: #404040;
+}
+
+/* Change link colors (except for the menu) */
+
+a {
+ color: #F05732;
+}
+
+a:hover {
+ color: #F05732;
+}
+
+
+a:visited {
+ color: #D44D2C;
+}
+
+.wy-menu a {
+ color: #b3b3b3;
+}
+
+.wy-menu a:hover {
+ color: #b3b3b3;
+}
+
+a.icon.icon-home {
+ color: #D44D2C;
+}
+
+.version{
+ color: #D44D2C !important;
+}
+
+/* Default footer text is quite big */
+footer {
+ font-size: 80%;
+}
+
+footer .rst-footer-buttons {
+ font-size: 125%; /* revert footer settings - 1/80% = 125% */
+}
+
+footer p {
+ font-size: 100%;
+}
+
+/* For hidden headers that appear in TOC tree */
+/* see https://stackoverflow.com/a/32363545/3343043 */
+.rst-content .hidden-section {
+ display: none;
+}
+
+nav .hidden-section {
+ display: inherit;
+}
+
+/* Make code blocks have a background */
+.codeblock,pre.literal-block,.rst-content .literal-block,.rst-content pre.literal-block,div[class^='highlight'] {
+ background: rgba(0, 0, 0, 0.0470588);
+}
diff --git a/_static/down-pressed.png b/_static/down-pressed.png
new file mode 100755
index 00000000000..5756c8cad88
Binary files /dev/null and b/_static/down-pressed.png differ
diff --git a/_static/down.png b/_static/down.png
new file mode 100755
index 00000000000..1b3bdad2cef
Binary files /dev/null and b/_static/down.png differ
diff --git a/_static/file.png b/_static/file.png
new file mode 100755
index 00000000000..a858a410e4f
Binary files /dev/null and b/_static/file.png differ
diff --git a/_static/fonts/FreightSans/freight-sans-light.woff b/_static/fonts/FreightSans/freight-sans-light.woff
new file mode 100755
index 00000000000..e67ed72a13a
Binary files /dev/null and b/_static/fonts/FreightSans/freight-sans-light.woff differ
diff --git a/_static/fonts/FreightSans/freight-sans-light.woff2 b/_static/fonts/FreightSans/freight-sans-light.woff2
new file mode 100755
index 00000000000..5a6b1890aeb
Binary files /dev/null and b/_static/fonts/FreightSans/freight-sans-light.woff2 differ
diff --git a/_static/fonts/FreightSans/freight-sans-regular.woff b/_static/fonts/FreightSans/freight-sans-regular.woff
new file mode 100755
index 00000000000..7e39281921d
Binary files /dev/null and b/_static/fonts/FreightSans/freight-sans-regular.woff differ
diff --git a/_static/fonts/FreightSans/freight-sans-regular.woff2 b/_static/fonts/FreightSans/freight-sans-regular.woff2
new file mode 100755
index 00000000000..e77a2cc2c5e
Binary files /dev/null and b/_static/fonts/FreightSans/freight-sans-regular.woff2 differ
diff --git a/_static/fonts/IBMPlexMono/IBMPlexMono-Light.woff b/_static/fonts/IBMPlexMono/IBMPlexMono-Light.woff
new file mode 100755
index 00000000000..cf37a5c50bd
Binary files /dev/null and b/_static/fonts/IBMPlexMono/IBMPlexMono-Light.woff differ
diff --git a/_static/fonts/IBMPlexMono/IBMPlexMono-Light.woff2 b/_static/fonts/IBMPlexMono/IBMPlexMono-Light.woff2
new file mode 100755
index 00000000000..955a6eab5bb
Binary files /dev/null and b/_static/fonts/IBMPlexMono/IBMPlexMono-Light.woff2 differ
diff --git a/_static/fonts/IBMPlexMono/IBMPlexMono-Medium.woff b/_static/fonts/IBMPlexMono/IBMPlexMono-Medium.woff
new file mode 100755
index 00000000000..fc65a679c22
Binary files /dev/null and b/_static/fonts/IBMPlexMono/IBMPlexMono-Medium.woff differ
diff --git a/_static/fonts/IBMPlexMono/IBMPlexMono-Medium.woff2 b/_static/fonts/IBMPlexMono/IBMPlexMono-Medium.woff2
new file mode 100755
index 00000000000..c352e40e34a
Binary files /dev/null and b/_static/fonts/IBMPlexMono/IBMPlexMono-Medium.woff2 differ
diff --git a/_static/fonts/IBMPlexMono/IBMPlexMono-Regular.woff b/_static/fonts/IBMPlexMono/IBMPlexMono-Regular.woff
new file mode 100755
index 00000000000..7d63d89f24b
Binary files /dev/null and b/_static/fonts/IBMPlexMono/IBMPlexMono-Regular.woff differ
diff --git a/_static/fonts/IBMPlexMono/IBMPlexMono-Regular.woff2 b/_static/fonts/IBMPlexMono/IBMPlexMono-Regular.woff2
new file mode 100755
index 00000000000..d0d7ded9079
Binary files /dev/null and b/_static/fonts/IBMPlexMono/IBMPlexMono-Regular.woff2 differ
diff --git a/_static/fonts/IBMPlexMono/IBMPlexMono-SemiBold.woff b/_static/fonts/IBMPlexMono/IBMPlexMono-SemiBold.woff
new file mode 100755
index 00000000000..1da7753cf28
Binary files /dev/null and b/_static/fonts/IBMPlexMono/IBMPlexMono-SemiBold.woff differ
diff --git a/_static/fonts/IBMPlexMono/IBMPlexMono-SemiBold.woff2 b/_static/fonts/IBMPlexMono/IBMPlexMono-SemiBold.woff2
new file mode 100755
index 00000000000..79dffdb85f7
Binary files /dev/null and b/_static/fonts/IBMPlexMono/IBMPlexMono-SemiBold.woff2 differ
diff --git a/_static/gallery.css b/_static/gallery.css
new file mode 100755
index 00000000000..07caf0e7090
--- /dev/null
+++ b/_static/gallery.css
@@ -0,0 +1,189 @@
+/*
+Sphinx-Gallery has compatible CSS to fix default sphinx themes
+Tested for Sphinx 1.3.1 for all themes: default, alabaster, sphinxdoc,
+scrolls, agogo, traditional, nature, haiku, pyramid
+Tested for Read the Docs theme 0.1.7 */
+.sphx-glr-thumbcontainer {
+ background: #fff;
+ border: solid #fff 1px;
+ -moz-border-radius: 5px;
+ -webkit-border-radius: 5px;
+ border-radius: 5px;
+ box-shadow: none;
+ float: left;
+ margin: 5px;
+ min-height: 230px;
+ padding-top: 5px;
+ position: relative;
+}
+.sphx-glr-thumbcontainer:hover {
+ border: solid #b4ddfc 1px;
+ box-shadow: 0 0 15px rgba(142, 176, 202, 0.5);
+}
+.sphx-glr-thumbcontainer a.internal {
+ bottom: 0;
+ display: block;
+ left: 0;
+ padding: 150px 10px 0;
+ position: absolute;
+ right: 0;
+ top: 0;
+}
+/* Next one is to avoid Sphinx traditional theme to cover all the
+thumbnail with its default link Background color */
+.sphx-glr-thumbcontainer a.internal:hover {
+ background-color: transparent;
+}
+
+.sphx-glr-thumbcontainer p {
+ margin: 0 0 .1em 0;
+}
+.sphx-glr-thumbcontainer .figure {
+ margin: 10px;
+ width: 160px;
+}
+.sphx-glr-thumbcontainer img {
+ display: inline;
+ max-height: 160px;
+ width: 160px;
+}
+.sphx-glr-thumbcontainer[tooltip]:hover:after {
+ background: rgba(0, 0, 0, 0.8);
+ -webkit-border-radius: 5px;
+ -moz-border-radius: 5px;
+ border-radius: 5px;
+ color: #fff;
+ content: attr(tooltip);
+ left: 95%;
+ padding: 5px 15px;
+ position: absolute;
+ z-index: 98;
+ width: 220px;
+ bottom: 52%;
+}
+.sphx-glr-thumbcontainer[tooltip]:hover:before {
+ border: solid;
+ border-color: #333 transparent;
+ border-width: 18px 0 0 20px;
+ bottom: 58%;
+ content: '';
+ left: 85%;
+ position: absolute;
+ z-index: 99;
+}
+
+.highlight-pytb pre {
+ background-color: #ffe4e4;
+ border: 1px solid #f66;
+ margin-top: 10px;
+ padding: 7px;
+}
+
+.sphx-glr-script-out {
+ color: #888;
+ margin: 0;
+}
+.sphx-glr-script-out .highlight {
+ background-color: transparent;
+ margin-left: 2.5em;
+ margin-top: -1.4em;
+}
+.sphx-glr-script-out .highlight pre {
+ background-color: #fafae2;
+ border: 0;
+ max-height: 30em;
+ overflow: auto;
+ padding-left: 1ex;
+ margin: 0px;
+ word-break: break-word;
+}
+.sphx-glr-script-out + p {
+ margin-top: 1.8em;
+}
+blockquote.sphx-glr-script-out {
+ margin-left: 0pt;
+}
+
+div.sphx-glr-footer {
+ text-align: center;
+}
+
+div.binder-badge {
+ margin: 1em auto;
+ vertical-align: middle;
+}
+
+div.sphx-glr-download {
+ margin: 1em auto;
+ vertical-align: middle;
+}
+
+div.sphx-glr-download a {
+ background-color: #ffc;
+ background-image: linear-gradient(to bottom, #FFC, #d5d57e);
+ border-radius: 4px;
+ border: 1px solid #c2c22d;
+ color: #000;
+ display: inline-block;
+ font-weight: bold;
+ padding: 1ex;
+ text-align: center;
+}
+
+div.sphx-glr-download code.download {
+ display: inline-block;
+ white-space: normal;
+ word-break: normal;
+ overflow-wrap: break-word;
+ /* border and background are given by the enclosing 'a' */
+ border: none;
+ background: none;
+}
+
+div.sphx-glr-download a:hover {
+ box-shadow: inset 0 1px 0 rgba(255,255,255,.1), 0 1px 5px rgba(0,0,0,.25);
+ text-decoration: none;
+ background-image: none;
+ background-color: #d5d57e;
+}
+
+.sphx-glr-example-title > :target::before {
+ display: block;
+ content: "";
+ margin-top: -50px;
+ height: 50px;
+ visibility: hidden;
+}
+
+ul.sphx-glr-horizontal {
+ list-style: none;
+ padding: 0;
+}
+ul.sphx-glr-horizontal li {
+ display: inline;
+}
+ul.sphx-glr-horizontal img {
+ height: auto !important;
+}
+
+.sphx-glr-single-img {
+ margin: auto;
+ display: block;
+ max-width: 100%;
+}
+
+.sphx-glr-multi-img {
+ max-width: 42%;
+ height: auto;
+}
+
+p.sphx-glr-signature a.reference.external {
+ -moz-border-radius: 5px;
+ -webkit-border-radius: 5px;
+ border-radius: 5px;
+ padding: 3px;
+ font-size: 75%;
+ text-align: right;
+ margin-left: auto;
+ display: table;
+}
diff --git a/_static/imagenet_class_index.json b/_static/imagenet_class_index.json
new file mode 100644
index 00000000000..5fe0dfefcd3
--- /dev/null
+++ b/_static/imagenet_class_index.json
@@ -0,0 +1 @@
+{"0": ["n01440764", "tench"], "1": ["n01443537", "goldfish"], "2": ["n01484850", "great_white_shark"], "3": ["n01491361", "tiger_shark"], "4": ["n01494475", "hammerhead"], "5": ["n01496331", "electric_ray"], "6": ["n01498041", "stingray"], "7": ["n01514668", "cock"], "8": ["n01514859", "hen"], "9": ["n01518878", "ostrich"], "10": ["n01530575", "brambling"], "11": ["n01531178", "goldfinch"], "12": ["n01532829", "house_finch"], "13": ["n01534433", "junco"], "14": ["n01537544", "indigo_bunting"], "15": ["n01558993", "robin"], "16": ["n01560419", "bulbul"], "17": ["n01580077", "jay"], "18": ["n01582220", "magpie"], "19": ["n01592084", "chickadee"], "20": ["n01601694", "water_ouzel"], "21": ["n01608432", "kite"], "22": ["n01614925", "bald_eagle"], "23": ["n01616318", "vulture"], "24": ["n01622779", "great_grey_owl"], "25": ["n01629819", "European_fire_salamander"], "26": ["n01630670", "common_newt"], "27": ["n01631663", "eft"], "28": ["n01632458", "spotted_salamander"], "29": ["n01632777", "axolotl"], "30": ["n01641577", "bullfrog"], "31": ["n01644373", "tree_frog"], "32": ["n01644900", "tailed_frog"], "33": ["n01664065", "loggerhead"], "34": ["n01665541", "leatherback_turtle"], "35": ["n01667114", "mud_turtle"], "36": ["n01667778", "terrapin"], "37": ["n01669191", "box_turtle"], "38": ["n01675722", "banded_gecko"], "39": ["n01677366", "common_iguana"], "40": ["n01682714", "American_chameleon"], "41": ["n01685808", "whiptail"], "42": ["n01687978", "agama"], "43": ["n01688243", "frilled_lizard"], "44": ["n01689811", "alligator_lizard"], "45": ["n01692333", "Gila_monster"], "46": ["n01693334", "green_lizard"], "47": ["n01694178", "African_chameleon"], "48": ["n01695060", "Komodo_dragon"], "49": ["n01697457", "African_crocodile"], "50": ["n01698640", "American_alligator"], "51": ["n01704323", "triceratops"], "52": ["n01728572", "thunder_snake"], "53": ["n01728920", "ringneck_snake"], "54": ["n01729322", "hognose_snake"], "55": ["n01729977", "green_snake"], "56": ["n01734418", "king_snake"], "57": ["n01735189", "garter_snake"], "58": ["n01737021", "water_snake"], "59": ["n01739381", "vine_snake"], "60": ["n01740131", "night_snake"], "61": ["n01742172", "boa_constrictor"], "62": ["n01744401", "rock_python"], "63": ["n01748264", "Indian_cobra"], "64": ["n01749939", "green_mamba"], "65": ["n01751748", "sea_snake"], "66": ["n01753488", "horned_viper"], "67": ["n01755581", "diamondback"], "68": ["n01756291", "sidewinder"], "69": ["n01768244", "trilobite"], "70": ["n01770081", "harvestman"], "71": ["n01770393", "scorpion"], "72": ["n01773157", "black_and_gold_garden_spider"], "73": ["n01773549", "barn_spider"], "74": ["n01773797", "garden_spider"], "75": ["n01774384", "black_widow"], "76": ["n01774750", "tarantula"], "77": ["n01775062", "wolf_spider"], "78": ["n01776313", "tick"], "79": ["n01784675", "centipede"], "80": ["n01795545", "black_grouse"], "81": ["n01796340", "ptarmigan"], "82": ["n01797886", "ruffed_grouse"], "83": ["n01798484", "prairie_chicken"], "84": ["n01806143", "peacock"], "85": ["n01806567", "quail"], "86": ["n01807496", "partridge"], "87": ["n01817953", "African_grey"], "88": ["n01818515", "macaw"], "89": ["n01819313", "sulphur-crested_cockatoo"], "90": ["n01820546", "lorikeet"], "91": ["n01824575", "coucal"], "92": ["n01828970", "bee_eater"], "93": ["n01829413", "hornbill"], "94": ["n01833805", "hummingbird"], "95": ["n01843065", "jacamar"], "96": ["n01843383", "toucan"], "97": ["n01847000", "drake"], "98": ["n01855032", "red-breasted_merganser"], "99": ["n01855672", "goose"], "100": ["n01860187", "black_swan"], "101": ["n01871265", "tusker"], "102": ["n01872401", "echidna"], "103": ["n01873310", "platypus"], "104": ["n01877812", "wallaby"], "105": ["n01882714", "koala"], "106": ["n01883070", "wombat"], "107": ["n01910747", "jellyfish"], "108": ["n01914609", "sea_anemone"], "109": ["n01917289", "brain_coral"], "110": ["n01924916", "flatworm"], "111": ["n01930112", "nematode"], "112": ["n01943899", "conch"], "113": ["n01944390", "snail"], "114": ["n01945685", "slug"], "115": ["n01950731", "sea_slug"], "116": ["n01955084", "chiton"], "117": ["n01968897", "chambered_nautilus"], "118": ["n01978287", "Dungeness_crab"], "119": ["n01978455", "rock_crab"], "120": ["n01980166", "fiddler_crab"], "121": ["n01981276", "king_crab"], "122": ["n01983481", "American_lobster"], "123": ["n01984695", "spiny_lobster"], "124": ["n01985128", "crayfish"], "125": ["n01986214", "hermit_crab"], "126": ["n01990800", "isopod"], "127": ["n02002556", "white_stork"], "128": ["n02002724", "black_stork"], "129": ["n02006656", "spoonbill"], "130": ["n02007558", "flamingo"], "131": ["n02009229", "little_blue_heron"], "132": ["n02009912", "American_egret"], "133": ["n02011460", "bittern"], "134": ["n02012849", "crane"], "135": ["n02013706", "limpkin"], "136": ["n02017213", "European_gallinule"], "137": ["n02018207", "American_coot"], "138": ["n02018795", "bustard"], "139": ["n02025239", "ruddy_turnstone"], "140": ["n02027492", "red-backed_sandpiper"], "141": ["n02028035", "redshank"], "142": ["n02033041", "dowitcher"], "143": ["n02037110", "oystercatcher"], "144": ["n02051845", "pelican"], "145": ["n02056570", "king_penguin"], "146": ["n02058221", "albatross"], "147": ["n02066245", "grey_whale"], "148": ["n02071294", "killer_whale"], "149": ["n02074367", "dugong"], "150": ["n02077923", "sea_lion"], "151": ["n02085620", "Chihuahua"], "152": ["n02085782", "Japanese_spaniel"], "153": ["n02085936", "Maltese_dog"], "154": ["n02086079", "Pekinese"], "155": ["n02086240", "Shih-Tzu"], "156": ["n02086646", "Blenheim_spaniel"], "157": ["n02086910", "papillon"], "158": ["n02087046", "toy_terrier"], "159": ["n02087394", "Rhodesian_ridgeback"], "160": ["n02088094", "Afghan_hound"], "161": ["n02088238", "basset"], "162": ["n02088364", "beagle"], "163": ["n02088466", "bloodhound"], "164": ["n02088632", "bluetick"], "165": ["n02089078", "black-and-tan_coonhound"], "166": ["n02089867", "Walker_hound"], "167": ["n02089973", "English_foxhound"], "168": ["n02090379", "redbone"], "169": ["n02090622", "borzoi"], "170": ["n02090721", "Irish_wolfhound"], "171": ["n02091032", "Italian_greyhound"], "172": ["n02091134", "whippet"], "173": ["n02091244", "Ibizan_hound"], "174": ["n02091467", "Norwegian_elkhound"], "175": ["n02091635", "otterhound"], "176": ["n02091831", "Saluki"], "177": ["n02092002", "Scottish_deerhound"], "178": ["n02092339", "Weimaraner"], "179": ["n02093256", "Staffordshire_bullterrier"], "180": ["n02093428", "American_Staffordshire_terrier"], "181": ["n02093647", "Bedlington_terrier"], "182": ["n02093754", "Border_terrier"], "183": ["n02093859", "Kerry_blue_terrier"], "184": ["n02093991", "Irish_terrier"], "185": ["n02094114", "Norfolk_terrier"], "186": ["n02094258", "Norwich_terrier"], "187": ["n02094433", "Yorkshire_terrier"], "188": ["n02095314", "wire-haired_fox_terrier"], "189": ["n02095570", "Lakeland_terrier"], "190": ["n02095889", "Sealyham_terrier"], "191": ["n02096051", "Airedale"], "192": ["n02096177", "cairn"], "193": ["n02096294", "Australian_terrier"], "194": ["n02096437", "Dandie_Dinmont"], "195": ["n02096585", "Boston_bull"], "196": ["n02097047", "miniature_schnauzer"], "197": ["n02097130", "giant_schnauzer"], "198": ["n02097209", "standard_schnauzer"], "199": ["n02097298", "Scotch_terrier"], "200": ["n02097474", "Tibetan_terrier"], "201": ["n02097658", "silky_terrier"], "202": ["n02098105", "soft-coated_wheaten_terrier"], "203": ["n02098286", "West_Highland_white_terrier"], "204": ["n02098413", "Lhasa"], "205": ["n02099267", "flat-coated_retriever"], "206": ["n02099429", "curly-coated_retriever"], "207": ["n02099601", "golden_retriever"], "208": ["n02099712", "Labrador_retriever"], "209": ["n02099849", "Chesapeake_Bay_retriever"], "210": ["n02100236", "German_short-haired_pointer"], "211": ["n02100583", "vizsla"], "212": ["n02100735", "English_setter"], "213": ["n02100877", "Irish_setter"], "214": ["n02101006", "Gordon_setter"], "215": ["n02101388", "Brittany_spaniel"], "216": ["n02101556", "clumber"], "217": ["n02102040", "English_springer"], "218": ["n02102177", "Welsh_springer_spaniel"], "219": ["n02102318", "cocker_spaniel"], "220": ["n02102480", "Sussex_spaniel"], "221": ["n02102973", "Irish_water_spaniel"], "222": ["n02104029", "kuvasz"], "223": ["n02104365", "schipperke"], "224": ["n02105056", "groenendael"], "225": ["n02105162", "malinois"], "226": ["n02105251", "briard"], "227": ["n02105412", "kelpie"], "228": ["n02105505", "komondor"], "229": ["n02105641", "Old_English_sheepdog"], "230": ["n02105855", "Shetland_sheepdog"], "231": ["n02106030", "collie"], "232": ["n02106166", "Border_collie"], "233": ["n02106382", "Bouvier_des_Flandres"], "234": ["n02106550", "Rottweiler"], "235": ["n02106662", "German_shepherd"], "236": ["n02107142", "Doberman"], "237": ["n02107312", "miniature_pinscher"], "238": ["n02107574", "Greater_Swiss_Mountain_dog"], "239": ["n02107683", "Bernese_mountain_dog"], "240": ["n02107908", "Appenzeller"], "241": ["n02108000", "EntleBucher"], "242": ["n02108089", "boxer"], "243": ["n02108422", "bull_mastiff"], "244": ["n02108551", "Tibetan_mastiff"], "245": ["n02108915", "French_bulldog"], "246": ["n02109047", "Great_Dane"], "247": ["n02109525", "Saint_Bernard"], "248": ["n02109961", "Eskimo_dog"], "249": ["n02110063", "malamute"], "250": ["n02110185", "Siberian_husky"], "251": ["n02110341", "dalmatian"], "252": ["n02110627", "affenpinscher"], "253": ["n02110806", "basenji"], "254": ["n02110958", "pug"], "255": ["n02111129", "Leonberg"], "256": ["n02111277", "Newfoundland"], "257": ["n02111500", "Great_Pyrenees"], "258": ["n02111889", "Samoyed"], "259": ["n02112018", "Pomeranian"], "260": ["n02112137", "chow"], "261": ["n02112350", "keeshond"], "262": ["n02112706", "Brabancon_griffon"], "263": ["n02113023", "Pembroke"], "264": ["n02113186", "Cardigan"], "265": ["n02113624", "toy_poodle"], "266": ["n02113712", "miniature_poodle"], "267": ["n02113799", "standard_poodle"], "268": ["n02113978", "Mexican_hairless"], "269": ["n02114367", "timber_wolf"], "270": ["n02114548", "white_wolf"], "271": ["n02114712", "red_wolf"], "272": ["n02114855", "coyote"], "273": ["n02115641", "dingo"], "274": ["n02115913", "dhole"], "275": ["n02116738", "African_hunting_dog"], "276": ["n02117135", "hyena"], "277": ["n02119022", "red_fox"], "278": ["n02119789", "kit_fox"], "279": ["n02120079", "Arctic_fox"], "280": ["n02120505", "grey_fox"], "281": ["n02123045", "tabby"], "282": ["n02123159", "tiger_cat"], "283": ["n02123394", "Persian_cat"], "284": ["n02123597", "Siamese_cat"], "285": ["n02124075", "Egyptian_cat"], "286": ["n02125311", "cougar"], "287": ["n02127052", "lynx"], "288": ["n02128385", "leopard"], "289": ["n02128757", "snow_leopard"], "290": ["n02128925", "jaguar"], "291": ["n02129165", "lion"], "292": ["n02129604", "tiger"], "293": ["n02130308", "cheetah"], "294": ["n02132136", "brown_bear"], "295": ["n02133161", "American_black_bear"], "296": ["n02134084", "ice_bear"], "297": ["n02134418", "sloth_bear"], "298": ["n02137549", "mongoose"], "299": ["n02138441", "meerkat"], "300": ["n02165105", "tiger_beetle"], "301": ["n02165456", "ladybug"], "302": ["n02167151", "ground_beetle"], "303": ["n02168699", "long-horned_beetle"], "304": ["n02169497", "leaf_beetle"], "305": ["n02172182", "dung_beetle"], "306": ["n02174001", "rhinoceros_beetle"], "307": ["n02177972", "weevil"], "308": ["n02190166", "fly"], "309": ["n02206856", "bee"], "310": ["n02219486", "ant"], "311": ["n02226429", "grasshopper"], "312": ["n02229544", "cricket"], "313": ["n02231487", "walking_stick"], "314": ["n02233338", "cockroach"], "315": ["n02236044", "mantis"], "316": ["n02256656", "cicada"], "317": ["n02259212", "leafhopper"], "318": ["n02264363", "lacewing"], "319": ["n02268443", "dragonfly"], "320": ["n02268853", "damselfly"], "321": ["n02276258", "admiral"], "322": ["n02277742", "ringlet"], "323": ["n02279972", "monarch"], "324": ["n02280649", "cabbage_butterfly"], "325": ["n02281406", "sulphur_butterfly"], "326": ["n02281787", "lycaenid"], "327": ["n02317335", "starfish"], "328": ["n02319095", "sea_urchin"], "329": ["n02321529", "sea_cucumber"], "330": ["n02325366", "wood_rabbit"], "331": ["n02326432", "hare"], "332": ["n02328150", "Angora"], "333": ["n02342885", "hamster"], "334": ["n02346627", "porcupine"], "335": ["n02356798", "fox_squirrel"], "336": ["n02361337", "marmot"], "337": ["n02363005", "beaver"], "338": ["n02364673", "guinea_pig"], "339": ["n02389026", "sorrel"], "340": ["n02391049", "zebra"], "341": ["n02395406", "hog"], "342": ["n02396427", "wild_boar"], "343": ["n02397096", "warthog"], "344": ["n02398521", "hippopotamus"], "345": ["n02403003", "ox"], "346": ["n02408429", "water_buffalo"], "347": ["n02410509", "bison"], "348": ["n02412080", "ram"], "349": ["n02415577", "bighorn"], "350": ["n02417914", "ibex"], "351": ["n02422106", "hartebeest"], "352": ["n02422699", "impala"], "353": ["n02423022", "gazelle"], "354": ["n02437312", "Arabian_camel"], "355": ["n02437616", "llama"], "356": ["n02441942", "weasel"], "357": ["n02442845", "mink"], "358": ["n02443114", "polecat"], "359": ["n02443484", "black-footed_ferret"], "360": ["n02444819", "otter"], "361": ["n02445715", "skunk"], "362": ["n02447366", "badger"], "363": ["n02454379", "armadillo"], "364": ["n02457408", "three-toed_sloth"], "365": ["n02480495", "orangutan"], "366": ["n02480855", "gorilla"], "367": ["n02481823", "chimpanzee"], "368": ["n02483362", "gibbon"], "369": ["n02483708", "siamang"], "370": ["n02484975", "guenon"], "371": ["n02486261", "patas"], "372": ["n02486410", "baboon"], "373": ["n02487347", "macaque"], "374": ["n02488291", "langur"], "375": ["n02488702", "colobus"], "376": ["n02489166", "proboscis_monkey"], "377": ["n02490219", "marmoset"], "378": ["n02492035", "capuchin"], "379": ["n02492660", "howler_monkey"], "380": ["n02493509", "titi"], "381": ["n02493793", "spider_monkey"], "382": ["n02494079", "squirrel_monkey"], "383": ["n02497673", "Madagascar_cat"], "384": ["n02500267", "indri"], "385": ["n02504013", "Indian_elephant"], "386": ["n02504458", "African_elephant"], "387": ["n02509815", "lesser_panda"], "388": ["n02510455", "giant_panda"], "389": ["n02514041", "barracouta"], "390": ["n02526121", "eel"], "391": ["n02536864", "coho"], "392": ["n02606052", "rock_beauty"], "393": ["n02607072", "anemone_fish"], "394": ["n02640242", "sturgeon"], "395": ["n02641379", "gar"], "396": ["n02643566", "lionfish"], "397": ["n02655020", "puffer"], "398": ["n02666196", "abacus"], "399": ["n02667093", "abaya"], "400": ["n02669723", "academic_gown"], "401": ["n02672831", "accordion"], "402": ["n02676566", "acoustic_guitar"], "403": ["n02687172", "aircraft_carrier"], "404": ["n02690373", "airliner"], "405": ["n02692877", "airship"], "406": ["n02699494", "altar"], "407": ["n02701002", "ambulance"], "408": ["n02704792", "amphibian"], "409": ["n02708093", "analog_clock"], "410": ["n02727426", "apiary"], "411": ["n02730930", "apron"], "412": ["n02747177", "ashcan"], "413": ["n02749479", "assault_rifle"], "414": ["n02769748", "backpack"], "415": ["n02776631", "bakery"], "416": ["n02777292", "balance_beam"], "417": ["n02782093", "balloon"], "418": ["n02783161", "ballpoint"], "419": ["n02786058", "Band_Aid"], "420": ["n02787622", "banjo"], "421": ["n02788148", "bannister"], "422": ["n02790996", "barbell"], "423": ["n02791124", "barber_chair"], "424": ["n02791270", "barbershop"], "425": ["n02793495", "barn"], "426": ["n02794156", "barometer"], "427": ["n02795169", "barrel"], "428": ["n02797295", "barrow"], "429": ["n02799071", "baseball"], "430": ["n02802426", "basketball"], "431": ["n02804414", "bassinet"], "432": ["n02804610", "bassoon"], "433": ["n02807133", "bathing_cap"], "434": ["n02808304", "bath_towel"], "435": ["n02808440", "bathtub"], "436": ["n02814533", "beach_wagon"], "437": ["n02814860", "beacon"], "438": ["n02815834", "beaker"], "439": ["n02817516", "bearskin"], "440": ["n02823428", "beer_bottle"], "441": ["n02823750", "beer_glass"], "442": ["n02825657", "bell_cote"], "443": ["n02834397", "bib"], "444": ["n02835271", "bicycle-built-for-two"], "445": ["n02837789", "bikini"], "446": ["n02840245", "binder"], "447": ["n02841315", "binoculars"], "448": ["n02843684", "birdhouse"], "449": ["n02859443", "boathouse"], "450": ["n02860847", "bobsled"], "451": ["n02865351", "bolo_tie"], "452": ["n02869837", "bonnet"], "453": ["n02870880", "bookcase"], "454": ["n02871525", "bookshop"], "455": ["n02877765", "bottlecap"], "456": ["n02879718", "bow"], "457": ["n02883205", "bow_tie"], "458": ["n02892201", "brass"], "459": ["n02892767", "brassiere"], "460": ["n02894605", "breakwater"], "461": ["n02895154", "breastplate"], "462": ["n02906734", "broom"], "463": ["n02909870", "bucket"], "464": ["n02910353", "buckle"], "465": ["n02916936", "bulletproof_vest"], "466": ["n02917067", "bullet_train"], "467": ["n02927161", "butcher_shop"], "468": ["n02930766", "cab"], "469": ["n02939185", "caldron"], "470": ["n02948072", "candle"], "471": ["n02950826", "cannon"], "472": ["n02951358", "canoe"], "473": ["n02951585", "can_opener"], "474": ["n02963159", "cardigan"], "475": ["n02965783", "car_mirror"], "476": ["n02966193", "carousel"], "477": ["n02966687", "carpenter's_kit"], "478": ["n02971356", "carton"], "479": ["n02974003", "car_wheel"], "480": ["n02977058", "cash_machine"], "481": ["n02978881", "cassette"], "482": ["n02979186", "cassette_player"], "483": ["n02980441", "castle"], "484": ["n02981792", "catamaran"], "485": ["n02988304", "CD_player"], "486": ["n02992211", "cello"], "487": ["n02992529", "cellular_telephone"], "488": ["n02999410", "chain"], "489": ["n03000134", "chainlink_fence"], "490": ["n03000247", "chain_mail"], "491": ["n03000684", "chain_saw"], "492": ["n03014705", "chest"], "493": ["n03016953", "chiffonier"], "494": ["n03017168", "chime"], "495": ["n03018349", "china_cabinet"], "496": ["n03026506", "Christmas_stocking"], "497": ["n03028079", "church"], "498": ["n03032252", "cinema"], "499": ["n03041632", "cleaver"], "500": ["n03042490", "cliff_dwelling"], "501": ["n03045698", "cloak"], "502": ["n03047690", "clog"], "503": ["n03062245", "cocktail_shaker"], "504": ["n03063599", "coffee_mug"], "505": ["n03063689", "coffeepot"], "506": ["n03065424", "coil"], "507": ["n03075370", "combination_lock"], "508": ["n03085013", "computer_keyboard"], "509": ["n03089624", "confectionery"], "510": ["n03095699", "container_ship"], "511": ["n03100240", "convertible"], "512": ["n03109150", "corkscrew"], "513": ["n03110669", "cornet"], "514": ["n03124043", "cowboy_boot"], "515": ["n03124170", "cowboy_hat"], "516": ["n03125729", "cradle"], "517": ["n03126707", "crane"], "518": ["n03127747", "crash_helmet"], "519": ["n03127925", "crate"], "520": ["n03131574", "crib"], "521": ["n03133878", "Crock_Pot"], "522": ["n03134739", "croquet_ball"], "523": ["n03141823", "crutch"], "524": ["n03146219", "cuirass"], "525": ["n03160309", "dam"], "526": ["n03179701", "desk"], "527": ["n03180011", "desktop_computer"], "528": ["n03187595", "dial_telephone"], "529": ["n03188531", "diaper"], "530": ["n03196217", "digital_clock"], "531": ["n03197337", "digital_watch"], "532": ["n03201208", "dining_table"], "533": ["n03207743", "dishrag"], "534": ["n03207941", "dishwasher"], "535": ["n03208938", "disk_brake"], "536": ["n03216828", "dock"], "537": ["n03218198", "dogsled"], "538": ["n03220513", "dome"], "539": ["n03223299", "doormat"], "540": ["n03240683", "drilling_platform"], "541": ["n03249569", "drum"], "542": ["n03250847", "drumstick"], "543": ["n03255030", "dumbbell"], "544": ["n03259280", "Dutch_oven"], "545": ["n03271574", "electric_fan"], "546": ["n03272010", "electric_guitar"], "547": ["n03272562", "electric_locomotive"], "548": ["n03290653", "entertainment_center"], "549": ["n03291819", "envelope"], "550": ["n03297495", "espresso_maker"], "551": ["n03314780", "face_powder"], "552": ["n03325584", "feather_boa"], "553": ["n03337140", "file"], "554": ["n03344393", "fireboat"], "555": ["n03345487", "fire_engine"], "556": ["n03347037", "fire_screen"], "557": ["n03355925", "flagpole"], "558": ["n03372029", "flute"], "559": ["n03376595", "folding_chair"], "560": ["n03379051", "football_helmet"], "561": ["n03384352", "forklift"], "562": ["n03388043", "fountain"], "563": ["n03388183", "fountain_pen"], "564": ["n03388549", "four-poster"], "565": ["n03393912", "freight_car"], "566": ["n03394916", "French_horn"], "567": ["n03400231", "frying_pan"], "568": ["n03404251", "fur_coat"], "569": ["n03417042", "garbage_truck"], "570": ["n03424325", "gasmask"], "571": ["n03425413", "gas_pump"], "572": ["n03443371", "goblet"], "573": ["n03444034", "go-kart"], "574": ["n03445777", "golf_ball"], "575": ["n03445924", "golfcart"], "576": ["n03447447", "gondola"], "577": ["n03447721", "gong"], "578": ["n03450230", "gown"], "579": ["n03452741", "grand_piano"], "580": ["n03457902", "greenhouse"], "581": ["n03459775", "grille"], "582": ["n03461385", "grocery_store"], "583": ["n03467068", "guillotine"], "584": ["n03476684", "hair_slide"], "585": ["n03476991", "hair_spray"], "586": ["n03478589", "half_track"], "587": ["n03481172", "hammer"], "588": ["n03482405", "hamper"], "589": ["n03483316", "hand_blower"], "590": ["n03485407", "hand-held_computer"], "591": ["n03485794", "handkerchief"], "592": ["n03492542", "hard_disc"], "593": ["n03494278", "harmonica"], "594": ["n03495258", "harp"], "595": ["n03496892", "harvester"], "596": ["n03498962", "hatchet"], "597": ["n03527444", "holster"], "598": ["n03529860", "home_theater"], "599": ["n03530642", "honeycomb"], "600": ["n03532672", "hook"], "601": ["n03534580", "hoopskirt"], "602": ["n03535780", "horizontal_bar"], "603": ["n03538406", "horse_cart"], "604": ["n03544143", "hourglass"], "605": ["n03584254", "iPod"], "606": ["n03584829", "iron"], "607": ["n03590841", "jack-o'-lantern"], "608": ["n03594734", "jean"], "609": ["n03594945", "jeep"], "610": ["n03595614", "jersey"], "611": ["n03598930", "jigsaw_puzzle"], "612": ["n03599486", "jinrikisha"], "613": ["n03602883", "joystick"], "614": ["n03617480", "kimono"], "615": ["n03623198", "knee_pad"], "616": ["n03627232", "knot"], "617": ["n03630383", "lab_coat"], "618": ["n03633091", "ladle"], "619": ["n03637318", "lampshade"], "620": ["n03642806", "laptop"], "621": ["n03649909", "lawn_mower"], "622": ["n03657121", "lens_cap"], "623": ["n03658185", "letter_opener"], "624": ["n03661043", "library"], "625": ["n03662601", "lifeboat"], "626": ["n03666591", "lighter"], "627": ["n03670208", "limousine"], "628": ["n03673027", "liner"], "629": ["n03676483", "lipstick"], "630": ["n03680355", "Loafer"], "631": ["n03690938", "lotion"], "632": ["n03691459", "loudspeaker"], "633": ["n03692522", "loupe"], "634": ["n03697007", "lumbermill"], "635": ["n03706229", "magnetic_compass"], "636": ["n03709823", "mailbag"], "637": ["n03710193", "mailbox"], "638": ["n03710637", "maillot"], "639": ["n03710721", "maillot"], "640": ["n03717622", "manhole_cover"], "641": ["n03720891", "maraca"], "642": ["n03721384", "marimba"], "643": ["n03724870", "mask"], "644": ["n03729826", "matchstick"], "645": ["n03733131", "maypole"], "646": ["n03733281", "maze"], "647": ["n03733805", "measuring_cup"], "648": ["n03742115", "medicine_chest"], "649": ["n03743016", "megalith"], "650": ["n03759954", "microphone"], "651": ["n03761084", "microwave"], "652": ["n03763968", "military_uniform"], "653": ["n03764736", "milk_can"], "654": ["n03769881", "minibus"], "655": ["n03770439", "miniskirt"], "656": ["n03770679", "minivan"], "657": ["n03773504", "missile"], "658": ["n03775071", "mitten"], "659": ["n03775546", "mixing_bowl"], "660": ["n03776460", "mobile_home"], "661": ["n03777568", "Model_T"], "662": ["n03777754", "modem"], "663": ["n03781244", "monastery"], "664": ["n03782006", "monitor"], "665": ["n03785016", "moped"], "666": ["n03786901", "mortar"], "667": ["n03787032", "mortarboard"], "668": ["n03788195", "mosque"], "669": ["n03788365", "mosquito_net"], "670": ["n03791053", "motor_scooter"], "671": ["n03792782", "mountain_bike"], "672": ["n03792972", "mountain_tent"], "673": ["n03793489", "mouse"], "674": ["n03794056", "mousetrap"], "675": ["n03796401", "moving_van"], "676": ["n03803284", "muzzle"], "677": ["n03804744", "nail"], "678": ["n03814639", "neck_brace"], "679": ["n03814906", "necklace"], "680": ["n03825788", "nipple"], "681": ["n03832673", "notebook"], "682": ["n03837869", "obelisk"], "683": ["n03838899", "oboe"], "684": ["n03840681", "ocarina"], "685": ["n03841143", "odometer"], "686": ["n03843555", "oil_filter"], "687": ["n03854065", "organ"], "688": ["n03857828", "oscilloscope"], "689": ["n03866082", "overskirt"], "690": ["n03868242", "oxcart"], "691": ["n03868863", "oxygen_mask"], "692": ["n03871628", "packet"], "693": ["n03873416", "paddle"], "694": ["n03874293", "paddlewheel"], "695": ["n03874599", "padlock"], "696": ["n03876231", "paintbrush"], "697": ["n03877472", "pajama"], "698": ["n03877845", "palace"], "699": ["n03884397", "panpipe"], "700": ["n03887697", "paper_towel"], "701": ["n03888257", "parachute"], "702": ["n03888605", "parallel_bars"], "703": ["n03891251", "park_bench"], "704": ["n03891332", "parking_meter"], "705": ["n03895866", "passenger_car"], "706": ["n03899768", "patio"], "707": ["n03902125", "pay-phone"], "708": ["n03903868", "pedestal"], "709": ["n03908618", "pencil_box"], "710": ["n03908714", "pencil_sharpener"], "711": ["n03916031", "perfume"], "712": ["n03920288", "Petri_dish"], "713": ["n03924679", "photocopier"], "714": ["n03929660", "pick"], "715": ["n03929855", "pickelhaube"], "716": ["n03930313", "picket_fence"], "717": ["n03930630", "pickup"], "718": ["n03933933", "pier"], "719": ["n03935335", "piggy_bank"], "720": ["n03937543", "pill_bottle"], "721": ["n03938244", "pillow"], "722": ["n03942813", "ping-pong_ball"], "723": ["n03944341", "pinwheel"], "724": ["n03947888", "pirate"], "725": ["n03950228", "pitcher"], "726": ["n03954731", "plane"], "727": ["n03956157", "planetarium"], "728": ["n03958227", "plastic_bag"], "729": ["n03961711", "plate_rack"], "730": ["n03967562", "plow"], "731": ["n03970156", "plunger"], "732": ["n03976467", "Polaroid_camera"], "733": ["n03976657", "pole"], "734": ["n03977966", "police_van"], "735": ["n03980874", "poncho"], "736": ["n03982430", "pool_table"], "737": ["n03983396", "pop_bottle"], "738": ["n03991062", "pot"], "739": ["n03992509", "potter's_wheel"], "740": ["n03995372", "power_drill"], "741": ["n03998194", "prayer_rug"], "742": ["n04004767", "printer"], "743": ["n04005630", "prison"], "744": ["n04008634", "projectile"], "745": ["n04009552", "projector"], "746": ["n04019541", "puck"], "747": ["n04023962", "punching_bag"], "748": ["n04026417", "purse"], "749": ["n04033901", "quill"], "750": ["n04033995", "quilt"], "751": ["n04037443", "racer"], "752": ["n04039381", "racket"], "753": ["n04040759", "radiator"], "754": ["n04041544", "radio"], "755": ["n04044716", "radio_telescope"], "756": ["n04049303", "rain_barrel"], "757": ["n04065272", "recreational_vehicle"], "758": ["n04067472", "reel"], "759": ["n04069434", "reflex_camera"], "760": ["n04070727", "refrigerator"], "761": ["n04074963", "remote_control"], "762": ["n04081281", "restaurant"], "763": ["n04086273", "revolver"], "764": ["n04090263", "rifle"], "765": ["n04099969", "rocking_chair"], "766": ["n04111531", "rotisserie"], "767": ["n04116512", "rubber_eraser"], "768": ["n04118538", "rugby_ball"], "769": ["n04118776", "rule"], "770": ["n04120489", "running_shoe"], "771": ["n04125021", "safe"], "772": ["n04127249", "safety_pin"], "773": ["n04131690", "saltshaker"], "774": ["n04133789", "sandal"], "775": ["n04136333", "sarong"], "776": ["n04141076", "sax"], "777": ["n04141327", "scabbard"], "778": ["n04141975", "scale"], "779": ["n04146614", "school_bus"], "780": ["n04147183", "schooner"], "781": ["n04149813", "scoreboard"], "782": ["n04152593", "screen"], "783": ["n04153751", "screw"], "784": ["n04154565", "screwdriver"], "785": ["n04162706", "seat_belt"], "786": ["n04179913", "sewing_machine"], "787": ["n04192698", "shield"], "788": ["n04200800", "shoe_shop"], "789": ["n04201297", "shoji"], "790": ["n04204238", "shopping_basket"], "791": ["n04204347", "shopping_cart"], "792": ["n04208210", "shovel"], "793": ["n04209133", "shower_cap"], "794": ["n04209239", "shower_curtain"], "795": ["n04228054", "ski"], "796": ["n04229816", "ski_mask"], "797": ["n04235860", "sleeping_bag"], "798": ["n04238763", "slide_rule"], "799": ["n04239074", "sliding_door"], "800": ["n04243546", "slot"], "801": ["n04251144", "snorkel"], "802": ["n04252077", "snowmobile"], "803": ["n04252225", "snowplow"], "804": ["n04254120", "soap_dispenser"], "805": ["n04254680", "soccer_ball"], "806": ["n04254777", "sock"], "807": ["n04258138", "solar_dish"], "808": ["n04259630", "sombrero"], "809": ["n04263257", "soup_bowl"], "810": ["n04264628", "space_bar"], "811": ["n04265275", "space_heater"], "812": ["n04266014", "space_shuttle"], "813": ["n04270147", "spatula"], "814": ["n04273569", "speedboat"], "815": ["n04275548", "spider_web"], "816": ["n04277352", "spindle"], "817": ["n04285008", "sports_car"], "818": ["n04286575", "spotlight"], "819": ["n04296562", "stage"], "820": ["n04310018", "steam_locomotive"], "821": ["n04311004", "steel_arch_bridge"], "822": ["n04311174", "steel_drum"], "823": ["n04317175", "stethoscope"], "824": ["n04325704", "stole"], "825": ["n04326547", "stone_wall"], "826": ["n04328186", "stopwatch"], "827": ["n04330267", "stove"], "828": ["n04332243", "strainer"], "829": ["n04335435", "streetcar"], "830": ["n04336792", "stretcher"], "831": ["n04344873", "studio_couch"], "832": ["n04346328", "stupa"], "833": ["n04347754", "submarine"], "834": ["n04350905", "suit"], "835": ["n04355338", "sundial"], "836": ["n04355933", "sunglass"], "837": ["n04356056", "sunglasses"], "838": ["n04357314", "sunscreen"], "839": ["n04366367", "suspension_bridge"], "840": ["n04367480", "swab"], "841": ["n04370456", "sweatshirt"], "842": ["n04371430", "swimming_trunks"], "843": ["n04371774", "swing"], "844": ["n04372370", "switch"], "845": ["n04376876", "syringe"], "846": ["n04380533", "table_lamp"], "847": ["n04389033", "tank"], "848": ["n04392985", "tape_player"], "849": ["n04398044", "teapot"], "850": ["n04399382", "teddy"], "851": ["n04404412", "television"], "852": ["n04409515", "tennis_ball"], "853": ["n04417672", "thatch"], "854": ["n04418357", "theater_curtain"], "855": ["n04423845", "thimble"], "856": ["n04428191", "thresher"], "857": ["n04429376", "throne"], "858": ["n04435653", "tile_roof"], "859": ["n04442312", "toaster"], "860": ["n04443257", "tobacco_shop"], "861": ["n04447861", "toilet_seat"], "862": ["n04456115", "torch"], "863": ["n04458633", "totem_pole"], "864": ["n04461696", "tow_truck"], "865": ["n04462240", "toyshop"], "866": ["n04465501", "tractor"], "867": ["n04467665", "trailer_truck"], "868": ["n04476259", "tray"], "869": ["n04479046", "trench_coat"], "870": ["n04482393", "tricycle"], "871": ["n04483307", "trimaran"], "872": ["n04485082", "tripod"], "873": ["n04486054", "triumphal_arch"], "874": ["n04487081", "trolleybus"], "875": ["n04487394", "trombone"], "876": ["n04493381", "tub"], "877": ["n04501370", "turnstile"], "878": ["n04505470", "typewriter_keyboard"], "879": ["n04507155", "umbrella"], "880": ["n04509417", "unicycle"], "881": ["n04515003", "upright"], "882": ["n04517823", "vacuum"], "883": ["n04522168", "vase"], "884": ["n04523525", "vault"], "885": ["n04525038", "velvet"], "886": ["n04525305", "vending_machine"], "887": ["n04532106", "vestment"], "888": ["n04532670", "viaduct"], "889": ["n04536866", "violin"], "890": ["n04540053", "volleyball"], "891": ["n04542943", "waffle_iron"], "892": ["n04548280", "wall_clock"], "893": ["n04548362", "wallet"], "894": ["n04550184", "wardrobe"], "895": ["n04552348", "warplane"], "896": ["n04553703", "washbasin"], "897": ["n04554684", "washer"], "898": ["n04557648", "water_bottle"], "899": ["n04560804", "water_jug"], "900": ["n04562935", "water_tower"], "901": ["n04579145", "whiskey_jug"], "902": ["n04579432", "whistle"], "903": ["n04584207", "wig"], "904": ["n04589890", "window_screen"], "905": ["n04590129", "window_shade"], "906": ["n04591157", "Windsor_tie"], "907": ["n04591713", "wine_bottle"], "908": ["n04592741", "wing"], "909": ["n04596742", "wok"], "910": ["n04597913", "wooden_spoon"], "911": ["n04599235", "wool"], "912": ["n04604644", "worm_fence"], "913": ["n04606251", "wreck"], "914": ["n04612504", "yawl"], "915": ["n04613696", "yurt"], "916": ["n06359193", "web_site"], "917": ["n06596364", "comic_book"], "918": ["n06785654", "crossword_puzzle"], "919": ["n06794110", "street_sign"], "920": ["n06874185", "traffic_light"], "921": ["n07248320", "book_jacket"], "922": ["n07565083", "menu"], "923": ["n07579787", "plate"], "924": ["n07583066", "guacamole"], "925": ["n07584110", "consomme"], "926": ["n07590611", "hot_pot"], "927": ["n07613480", "trifle"], "928": ["n07614500", "ice_cream"], "929": ["n07615774", "ice_lolly"], "930": ["n07684084", "French_loaf"], "931": ["n07693725", "bagel"], "932": ["n07695742", "pretzel"], "933": ["n07697313", "cheeseburger"], "934": ["n07697537", "hotdog"], "935": ["n07711569", "mashed_potato"], "936": ["n07714571", "head_cabbage"], "937": ["n07714990", "broccoli"], "938": ["n07715103", "cauliflower"], "939": ["n07716358", "zucchini"], "940": ["n07716906", "spaghetti_squash"], "941": ["n07717410", "acorn_squash"], "942": ["n07717556", "butternut_squash"], "943": ["n07718472", "cucumber"], "944": ["n07718747", "artichoke"], "945": ["n07720875", "bell_pepper"], "946": ["n07730033", "cardoon"], "947": ["n07734744", "mushroom"], "948": ["n07742313", "Granny_Smith"], "949": ["n07745940", "strawberry"], "950": ["n07747607", "orange"], "951": ["n07749582", "lemon"], "952": ["n07753113", "fig"], "953": ["n07753275", "pineapple"], "954": ["n07753592", "banana"], "955": ["n07754684", "jackfruit"], "956": ["n07760859", "custard_apple"], "957": ["n07768694", "pomegranate"], "958": ["n07802026", "hay"], "959": ["n07831146", "carbonara"], "960": ["n07836838", "chocolate_sauce"], "961": ["n07860988", "dough"], "962": ["n07871810", "meat_loaf"], "963": ["n07873807", "pizza"], "964": ["n07875152", "potpie"], "965": ["n07880968", "burrito"], "966": ["n07892512", "red_wine"], "967": ["n07920052", "espresso"], "968": ["n07930864", "cup"], "969": ["n07932039", "eggnog"], "970": ["n09193705", "alp"], "971": ["n09229709", "bubble"], "972": ["n09246464", "cliff"], "973": ["n09256479", "coral_reef"], "974": ["n09288635", "geyser"], "975": ["n09332890", "lakeside"], "976": ["n09399592", "promontory"], "977": ["n09421951", "sandbar"], "978": ["n09428293", "seashore"], "979": ["n09468604", "valley"], "980": ["n09472597", "volcano"], "981": ["n09835506", "ballplayer"], "982": ["n10148035", "groom"], "983": ["n10565667", "scuba_diver"], "984": ["n11879895", "rapeseed"], "985": ["n11939491", "daisy"], "986": ["n12057211", "yellow_lady's_slipper"], "987": ["n12144580", "corn"], "988": ["n12267677", "acorn"], "989": ["n12620546", "hip"], "990": ["n12768682", "buckeye"], "991": ["n12985857", "coral_fungus"], "992": ["n12998815", "agaric"], "993": ["n13037406", "gyromitra"], "994": ["n13040303", "stinkhorn"], "995": ["n13044778", "earthstar"], "996": ["n13052670", "hen-of-the-woods"], "997": ["n13054560", "bolete"], "998": ["n13133613", "ear"], "999": ["n15075141", "toilet_tissue"]}
\ No newline at end of file
diff --git a/_static/images/arrow-down-orange.svg b/_static/images/arrow-down-orange.svg
new file mode 100755
index 00000000000..e9d8e9ecf24
--- /dev/null
+++ b/_static/images/arrow-down-orange.svg
@@ -0,0 +1,19 @@
+
+
\ No newline at end of file
diff --git a/_static/images/arrow-right-with-tail.svg b/_static/images/arrow-right-with-tail.svg
new file mode 100755
index 00000000000..5843588fca6
--- /dev/null
+++ b/_static/images/arrow-right-with-tail.svg
@@ -0,0 +1,19 @@
+
+
\ No newline at end of file
diff --git a/_static/images/chevron-down-grey.svg b/_static/images/chevron-down-grey.svg
new file mode 100755
index 00000000000..82d6514f250
--- /dev/null
+++ b/_static/images/chevron-down-grey.svg
@@ -0,0 +1,18 @@
+
+
+
+
diff --git a/_static/images/chevron-right-orange.svg b/_static/images/chevron-right-orange.svg
new file mode 100755
index 00000000000..7033fc93bf4
--- /dev/null
+++ b/_static/images/chevron-right-orange.svg
@@ -0,0 +1,17 @@
+
+
+
+
diff --git a/_static/images/chevron-right-white.svg b/_static/images/chevron-right-white.svg
new file mode 100755
index 00000000000..dd9e77f2616
--- /dev/null
+++ b/_static/images/chevron-right-white.svg
@@ -0,0 +1,17 @@
+
+
+
+
\ No newline at end of file
diff --git a/_static/images/home-footer-background.jpg b/_static/images/home-footer-background.jpg
new file mode 100755
index 00000000000..b307bb57f48
Binary files /dev/null and b/_static/images/home-footer-background.jpg differ
diff --git a/_static/images/icon-close.svg b/_static/images/icon-close.svg
new file mode 100755
index 00000000000..348964e79f7
--- /dev/null
+++ b/_static/images/icon-close.svg
@@ -0,0 +1,21 @@
+
+
\ No newline at end of file
diff --git a/_static/images/icon-menu-dots-dark.svg b/_static/images/icon-menu-dots-dark.svg
new file mode 100755
index 00000000000..fa2ad044b3f
--- /dev/null
+++ b/_static/images/icon-menu-dots-dark.svg
@@ -0,0 +1,42 @@
+
+
\ No newline at end of file
diff --git a/_static/images/logo-dark.svg b/_static/images/logo-dark.svg
new file mode 100755
index 00000000000..9b4c1a56ac6
--- /dev/null
+++ b/_static/images/logo-dark.svg
@@ -0,0 +1,30 @@
+
+
+
+
diff --git a/_static/images/logo-facebook-dark.svg b/_static/images/logo-facebook-dark.svg
new file mode 100755
index 00000000000..cff17915c4f
--- /dev/null
+++ b/_static/images/logo-facebook-dark.svg
@@ -0,0 +1,8 @@
+
+
+
+
diff --git a/_static/images/logo-icon.svg b/_static/images/logo-icon.svg
new file mode 100755
index 00000000000..575f6823e47
--- /dev/null
+++ b/_static/images/logo-icon.svg
@@ -0,0 +1,12 @@
+
+
+
+
diff --git a/_static/images/logo-twitter-dark.svg b/_static/images/logo-twitter-dark.svg
new file mode 100755
index 00000000000..1572570f88c
--- /dev/null
+++ b/_static/images/logo-twitter-dark.svg
@@ -0,0 +1,16 @@
+
+
+
+
diff --git a/_static/images/logo.svg b/_static/images/logo.svg
new file mode 100755
index 00000000000..f8d44b98425
--- /dev/null
+++ b/_static/images/logo.svg
@@ -0,0 +1,31 @@
+
+
+
+
diff --git a/_static/images/microsoft-logo.svg b/_static/images/microsoft-logo.svg
new file mode 100644
index 00000000000..a1a7ce2d7a7
--- /dev/null
+++ b/_static/images/microsoft-logo.svg
@@ -0,0 +1,80 @@
+
+
+
diff --git a/_static/images/view-page-source-icon.svg b/_static/images/view-page-source-icon.svg
new file mode 100755
index 00000000000..6f5bbe0748f
--- /dev/null
+++ b/_static/images/view-page-source-icon.svg
@@ -0,0 +1,13 @@
+
+
+
diff --git a/_static/img/8_workers.png b/_static/img/8_workers.png
new file mode 100644
index 00000000000..9a51182eb4b
Binary files /dev/null and b/_static/img/8_workers.png differ
diff --git a/_static/img/ExecuTorch-Logo-cropped.svg b/_static/img/ExecuTorch-Logo-cropped.svg
new file mode 100644
index 00000000000..9e0ef52fbd8
--- /dev/null
+++ b/_static/img/ExecuTorch-Logo-cropped.svg
@@ -0,0 +1,57 @@
+
+
diff --git a/_static/img/ONNXLive.png b/_static/img/ONNXLive.png
new file mode 100755
index 00000000000..74fd4c891aa
Binary files /dev/null and b/_static/img/ONNXLive.png differ
diff --git a/_static/img/SRResNet.png b/_static/img/SRResNet.png
new file mode 100755
index 00000000000..8e3070fc65e
Binary files /dev/null and b/_static/img/SRResNet.png differ
diff --git a/_static/img/Variable.png b/_static/img/Variable.png
new file mode 100755
index 00000000000..a4455d7500a
Binary files /dev/null and b/_static/img/Variable.png differ
diff --git a/_static/img/audio_preprocessing_tutorial_waveform.png b/_static/img/audio_preprocessing_tutorial_waveform.png
new file mode 100644
index 00000000000..320b53c996d
Binary files /dev/null and b/_static/img/audio_preprocessing_tutorial_waveform.png differ
diff --git a/_static/img/autodiff.png b/_static/img/autodiff.png
new file mode 100644
index 00000000000..a102bdeee4e
Binary files /dev/null and b/_static/img/autodiff.png differ
diff --git a/_static/img/ax_logo.png b/_static/img/ax_logo.png
new file mode 100644
index 00000000000..ecb4302b524
Binary files /dev/null and b/_static/img/ax_logo.png differ
diff --git a/_static/img/ax_scheduler_illustration.png b/_static/img/ax_scheduler_illustration.png
new file mode 100644
index 00000000000..65e5a004a1b
Binary files /dev/null and b/_static/img/ax_scheduler_illustration.png differ
diff --git a/_static/img/basics/comp-graph.png b/_static/img/basics/comp-graph.png
new file mode 100644
index 00000000000..cfa6163d58a
Binary files /dev/null and b/_static/img/basics/comp-graph.png differ
diff --git a/_static/img/basics/fashion_mnist.png b/_static/img/basics/fashion_mnist.png
new file mode 100644
index 00000000000..213b1e1f17b
Binary files /dev/null and b/_static/img/basics/fashion_mnist.png differ
diff --git a/_static/img/basics/optimizationloops.png b/_static/img/basics/optimizationloops.png
new file mode 100644
index 00000000000..c43d83f2799
Binary files /dev/null and b/_static/img/basics/optimizationloops.png differ
diff --git a/_static/img/basics/typesdata.png b/_static/img/basics/typesdata.png
new file mode 100644
index 00000000000..5d0e0291eef
Binary files /dev/null and b/_static/img/basics/typesdata.png differ
diff --git a/_static/img/bert_mrpc.png b/_static/img/bert_mrpc.png
new file mode 100644
index 00000000000..fb0ff796f79
Binary files /dev/null and b/_static/img/bert_mrpc.png differ
diff --git a/images/cartpole.gif b/_static/img/cartpole.gif
old mode 100644
new mode 100755
similarity index 100%
rename from images/cartpole.gif
rename to _static/img/cartpole.gif
diff --git a/_static/img/cat.jpg b/_static/img/cat.jpg
new file mode 100755
index 00000000000..ccb575c02c3
Binary files /dev/null and b/_static/img/cat.jpg differ
diff --git a/_static/img/channels_last_memory_format.png b/_static/img/channels_last_memory_format.png
new file mode 100644
index 00000000000..d2b2922023f
Binary files /dev/null and b/_static/img/channels_last_memory_format.png differ
diff --git a/_static/img/char_rnn_generation.png b/_static/img/char_rnn_generation.png
new file mode 100755
index 00000000000..9702e4e1d35
Binary files /dev/null and b/_static/img/char_rnn_generation.png differ
diff --git a/_static/img/chat.png b/_static/img/chat.png
new file mode 100755
index 00000000000..02457c0697f
Binary files /dev/null and b/_static/img/chat.png differ
diff --git a/_static/img/chatbot/RNN-bidirectional.png b/_static/img/chatbot/RNN-bidirectional.png
new file mode 100755
index 00000000000..4dbec078f82
Binary files /dev/null and b/_static/img/chatbot/RNN-bidirectional.png differ
diff --git a/_static/img/chatbot/attn1.png b/_static/img/chatbot/attn1.png
new file mode 100755
index 00000000000..fe151c3cfb5
Binary files /dev/null and b/_static/img/chatbot/attn1.png differ
diff --git a/_static/img/chatbot/attn2.png b/_static/img/chatbot/attn2.png
new file mode 100755
index 00000000000..790f7430219
Binary files /dev/null and b/_static/img/chatbot/attn2.png differ
diff --git a/_static/img/chatbot/bot.png b/_static/img/chatbot/bot.png
new file mode 100755
index 00000000000..99502f546f0
Binary files /dev/null and b/_static/img/chatbot/bot.png differ
diff --git a/_static/img/chatbot/diff.png b/_static/img/chatbot/diff.png
new file mode 100644
index 00000000000..fc3cc56789b
Binary files /dev/null and b/_static/img/chatbot/diff.png differ
diff --git a/_static/img/chatbot/global_attn.png b/_static/img/chatbot/global_attn.png
new file mode 100755
index 00000000000..de18b75848a
Binary files /dev/null and b/_static/img/chatbot/global_attn.png differ
diff --git a/_static/img/chatbot/grad_clip.png b/_static/img/chatbot/grad_clip.png
new file mode 100755
index 00000000000..d9e5ca977b2
Binary files /dev/null and b/_static/img/chatbot/grad_clip.png differ
diff --git a/_static/img/chatbot/pytorch_workflow.png b/_static/img/chatbot/pytorch_workflow.png
new file mode 100644
index 00000000000..8a81f1995f0
Binary files /dev/null and b/_static/img/chatbot/pytorch_workflow.png differ
diff --git a/_static/img/chatbot/scores.png b/_static/img/chatbot/scores.png
new file mode 100755
index 00000000000..78f214f7508
Binary files /dev/null and b/_static/img/chatbot/scores.png differ
diff --git a/_static/img/chatbot/seq2seq_batches.png b/_static/img/chatbot/seq2seq_batches.png
new file mode 100755
index 00000000000..0543c445e2a
Binary files /dev/null and b/_static/img/chatbot/seq2seq_batches.png differ
diff --git a/_static/img/chatbot/seq2seq_ts.png b/_static/img/chatbot/seq2seq_ts.png
new file mode 100755
index 00000000000..f497297ac45
Binary files /dev/null and b/_static/img/chatbot/seq2seq_ts.png differ
diff --git a/images/cifar10.png b/_static/img/cifar10.png
old mode 100644
new mode 100755
similarity index 100%
rename from images/cifar10.png
rename to _static/img/cifar10.png
diff --git a/_static/img/classic_memory_format.png b/_static/img/classic_memory_format.png
new file mode 100644
index 00000000000..65cff010d88
Binary files /dev/null and b/_static/img/classic_memory_format.png differ
diff --git a/_static/img/compiled_autograd/call_hook_node.png b/_static/img/compiled_autograd/call_hook_node.png
new file mode 100644
index 00000000000..3e094cf6f73
Binary files /dev/null and b/_static/img/compiled_autograd/call_hook_node.png differ
diff --git a/_static/img/compiled_autograd/entire_verbose_log.png b/_static/img/compiled_autograd/entire_verbose_log.png
new file mode 100644
index 00000000000..4ce2b8538ee
Binary files /dev/null and b/_static/img/compiled_autograd/entire_verbose_log.png differ
diff --git a/_static/img/compiled_autograd/recompile_due_to_dynamic.png b/_static/img/compiled_autograd/recompile_due_to_dynamic.png
new file mode 100644
index 00000000000..41ae56acf2d
Binary files /dev/null and b/_static/img/compiled_autograd/recompile_due_to_dynamic.png differ
diff --git a/_static/img/compiled_autograd/recompile_due_to_node.png b/_static/img/compiled_autograd/recompile_due_to_node.png
new file mode 100644
index 00000000000..800a1784587
Binary files /dev/null and b/_static/img/compiled_autograd/recompile_due_to_node.png differ
diff --git a/_static/img/cpp-frontend/digits.png b/_static/img/cpp-frontend/digits.png
new file mode 100644
index 00000000000..0f7fa57543e
Binary files /dev/null and b/_static/img/cpp-frontend/digits.png differ
diff --git a/_static/img/cpp-pytorch.png b/_static/img/cpp-pytorch.png
new file mode 100644
index 00000000000..c19bcd8f964
Binary files /dev/null and b/_static/img/cpp-pytorch.png differ
diff --git a/_static/img/cpp_logo.png b/_static/img/cpp_logo.png
new file mode 100755
index 00000000000..432d4eb5a6e
Binary files /dev/null and b/_static/img/cpp_logo.png differ
diff --git a/_static/img/dag_autograd.png b/_static/img/dag_autograd.png
new file mode 100644
index 00000000000..cdc50fed625
Binary files /dev/null and b/_static/img/dag_autograd.png differ
diff --git a/_static/img/data_parallel.png b/_static/img/data_parallel.png
new file mode 100755
index 00000000000..5a729e9eae5
Binary files /dev/null and b/_static/img/data_parallel.png differ
diff --git a/_static/img/dcgan_generator.png b/_static/img/dcgan_generator.png
new file mode 100755
index 00000000000..9349577e4ea
Binary files /dev/null and b/_static/img/dcgan_generator.png differ
diff --git a/_static/img/deeplabv3_android.png b/_static/img/deeplabv3_android.png
new file mode 100644
index 00000000000..e0a451be8ef
Binary files /dev/null and b/_static/img/deeplabv3_android.png differ
diff --git a/_static/img/deeplabv3_android2.png b/_static/img/deeplabv3_android2.png
new file mode 100644
index 00000000000..0ae041479aa
Binary files /dev/null and b/_static/img/deeplabv3_android2.png differ
diff --git a/_static/img/deeplabv3_ios.png b/_static/img/deeplabv3_ios.png
new file mode 100644
index 00000000000..c901179e1ee
Binary files /dev/null and b/_static/img/deeplabv3_ios.png differ
diff --git a/_static/img/deeplabv3_ios2.png b/_static/img/deeplabv3_ios2.png
new file mode 100644
index 00000000000..3dc0073ca13
Binary files /dev/null and b/_static/img/deeplabv3_ios2.png differ
diff --git a/_static/img/distributed/DDP_memory.gif b/_static/img/distributed/DDP_memory.gif
new file mode 100644
index 00000000000..4049b9dba43
Binary files /dev/null and b/_static/img/distributed/DDP_memory.gif differ
diff --git a/_static/img/distributed/DistPyTorch.jpg b/_static/img/distributed/DistPyTorch.jpg
new file mode 100755
index 00000000000..65537a444f5
Binary files /dev/null and b/_static/img/distributed/DistPyTorch.jpg differ
diff --git a/_static/img/distributed/FSDP_autowrap.gif b/_static/img/distributed/FSDP_autowrap.gif
new file mode 100644
index 00000000000..d9e782d4c95
Binary files /dev/null and b/_static/img/distributed/FSDP_autowrap.gif differ
diff --git a/_static/img/distributed/FSDP_memory.gif b/_static/img/distributed/FSDP_memory.gif
new file mode 100644
index 00000000000..aece4e4b8a0
Binary files /dev/null and b/_static/img/distributed/FSDP_memory.gif differ
diff --git a/_static/img/distributed/all_gather.pdf b/_static/img/distributed/all_gather.pdf
new file mode 100755
index 00000000000..aadf4400432
Binary files /dev/null and b/_static/img/distributed/all_gather.pdf differ
diff --git a/_static/img/distributed/all_gather.png b/_static/img/distributed/all_gather.png
new file mode 100755
index 00000000000..6ccc912baf8
Binary files /dev/null and b/_static/img/distributed/all_gather.png differ
diff --git a/_static/img/distributed/all_reduce.pdf b/_static/img/distributed/all_reduce.pdf
new file mode 100755
index 00000000000..464574d8353
Binary files /dev/null and b/_static/img/distributed/all_reduce.pdf differ
diff --git a/_static/img/distributed/all_reduce.png b/_static/img/distributed/all_reduce.png
new file mode 100755
index 00000000000..789fb14739e
Binary files /dev/null and b/_static/img/distributed/all_reduce.png differ
diff --git a/_static/img/distributed/broadcast.png b/_static/img/distributed/broadcast.png
new file mode 100755
index 00000000000..3e615aeba5b
Binary files /dev/null and b/_static/img/distributed/broadcast.png differ
diff --git a/_static/img/distributed/device_mesh.png b/_static/img/distributed/device_mesh.png
new file mode 100644
index 00000000000..2ccabcc4824
Binary files /dev/null and b/_static/img/distributed/device_mesh.png differ
diff --git a/_static/img/distributed/distributed_checkpoint_generated_files.png b/_static/img/distributed/distributed_checkpoint_generated_files.png
new file mode 100644
index 00000000000..b32dddb7e62
Binary files /dev/null and b/_static/img/distributed/distributed_checkpoint_generated_files.png differ
diff --git a/_static/img/distributed/fsdp_implicit.png b/_static/img/distributed/fsdp_implicit.png
new file mode 100644
index 00000000000..85b19b7e72e
Binary files /dev/null and b/_static/img/distributed/fsdp_implicit.png differ
diff --git a/_static/img/distributed/fsdp_sharding.png b/_static/img/distributed/fsdp_sharding.png
new file mode 100755
index 00000000000..9dd1e3c111e
Binary files /dev/null and b/_static/img/distributed/fsdp_sharding.png differ
diff --git a/_static/img/distributed/fsdp_tp.png b/_static/img/distributed/fsdp_tp.png
new file mode 100644
index 00000000000..e419304ac7d
Binary files /dev/null and b/_static/img/distributed/fsdp_tp.png differ
diff --git a/_static/img/distributed/fsdp_workflow.png b/_static/img/distributed/fsdp_workflow.png
new file mode 100644
index 00000000000..1a8df0e44b6
Binary files /dev/null and b/_static/img/distributed/fsdp_workflow.png differ
diff --git a/_static/img/distributed/gather.png b/_static/img/distributed/gather.png
new file mode 100755
index 00000000000..195a1963830
Binary files /dev/null and b/_static/img/distributed/gather.png differ
diff --git a/_static/img/distributed/loss_parallel.png b/_static/img/distributed/loss_parallel.png
new file mode 100644
index 00000000000..b5cf9a499bc
Binary files /dev/null and b/_static/img/distributed/loss_parallel.png differ
diff --git a/_static/img/distributed/megatron_lm.png b/_static/img/distributed/megatron_lm.png
new file mode 100644
index 00000000000..38f7b06639f
Binary files /dev/null and b/_static/img/distributed/megatron_lm.png differ
diff --git a/_static/img/distributed/reduce.png b/_static/img/distributed/reduce.png
new file mode 100755
index 00000000000..a185ceb75c1
Binary files /dev/null and b/_static/img/distributed/reduce.png differ
diff --git a/_static/img/distributed/scatter.png b/_static/img/distributed/scatter.png
new file mode 100755
index 00000000000..140cd7c7d72
Binary files /dev/null and b/_static/img/distributed/scatter.png differ
diff --git a/_static/img/distributed/send_recv.png b/_static/img/distributed/send_recv.png
new file mode 100755
index 00000000000..d60e11e72fe
Binary files /dev/null and b/_static/img/distributed/send_recv.png differ
diff --git a/_static/img/distributed/send_recv_big.png b/_static/img/distributed/send_recv_big.png
new file mode 100755
index 00000000000..1ef9154c89e
Binary files /dev/null and b/_static/img/distributed/send_recv_big.png differ
diff --git a/_static/img/distributed/tcpstore_barrier_time.png b/_static/img/distributed/tcpstore_barrier_time.png
new file mode 100644
index 00000000000..5ece3a7471d
Binary files /dev/null and b/_static/img/distributed/tcpstore_barrier_time.png differ
diff --git a/_static/img/distributed/tcpstore_init_time.png b/_static/img/distributed/tcpstore_init_time.png
new file mode 100644
index 00000000000..df514b4dc48
Binary files /dev/null and b/_static/img/distributed/tcpstore_init_time.png differ
diff --git a/_static/img/dynamic_graph.gif b/_static/img/dynamic_graph.gif
new file mode 100755
index 00000000000..b4f17374e03
Binary files /dev/null and b/_static/img/dynamic_graph.gif differ
diff --git a/_static/img/fgsm_panda_image.png b/_static/img/fgsm_panda_image.png
new file mode 100755
index 00000000000..c10a73b605f
Binary files /dev/null and b/_static/img/fgsm_panda_image.png differ
diff --git a/_static/img/flask.png b/_static/img/flask.png
new file mode 100644
index 00000000000..bad6738efdd
Binary files /dev/null and b/_static/img/flask.png differ
diff --git a/_static/img/half_cheetah.gif b/_static/img/half_cheetah.gif
new file mode 100644
index 00000000000..b61ff47d4e6
Binary files /dev/null and b/_static/img/half_cheetah.gif differ
diff --git a/_static/img/hta/comm_across_ranks.png b/_static/img/hta/comm_across_ranks.png
new file mode 100644
index 00000000000..2336de3bcbc
Binary files /dev/null and b/_static/img/hta/comm_across_ranks.png differ
diff --git a/_static/img/hta/counts_diff.png b/_static/img/hta/counts_diff.png
new file mode 100644
index 00000000000..34575c145de
Binary files /dev/null and b/_static/img/hta/counts_diff.png differ
diff --git a/_static/img/hta/cuda_kernel_launch.png b/_static/img/hta/cuda_kernel_launch.png
new file mode 100644
index 00000000000..e57c54a2fc5
Binary files /dev/null and b/_static/img/hta/cuda_kernel_launch.png differ
diff --git a/_static/img/hta/cuda_kernel_launch_stats.png b/_static/img/hta/cuda_kernel_launch_stats.png
new file mode 100644
index 00000000000..33a160fc752
Binary files /dev/null and b/_static/img/hta/cuda_kernel_launch_stats.png differ
diff --git a/_static/img/hta/duration_diff.png b/_static/img/hta/duration_diff.png
new file mode 100644
index 00000000000..050d491c872
Binary files /dev/null and b/_static/img/hta/duration_diff.png differ
diff --git a/_static/img/hta/idle_time.png b/_static/img/hta/idle_time.png
new file mode 100644
index 00000000000..782bfe9adb5
Binary files /dev/null and b/_static/img/hta/idle_time.png differ
diff --git a/_static/img/hta/idle_time_breakdown_percentage.png b/_static/img/hta/idle_time_breakdown_percentage.png
new file mode 100644
index 00000000000..3bab5946eab
Binary files /dev/null and b/_static/img/hta/idle_time_breakdown_percentage.png differ
diff --git a/_static/img/hta/idle_time_summary.png b/_static/img/hta/idle_time_summary.png
new file mode 100644
index 00000000000..101b696b534
Binary files /dev/null and b/_static/img/hta/idle_time_summary.png differ
diff --git a/_static/img/hta/kernel_metrics_df.png b/_static/img/hta/kernel_metrics_df.png
new file mode 100644
index 00000000000..53eefb58b0c
Binary files /dev/null and b/_static/img/hta/kernel_metrics_df.png differ
diff --git a/_static/img/hta/kernel_type_breakdown.png b/_static/img/hta/kernel_type_breakdown.png
new file mode 100644
index 00000000000..29a29cf89b2
Binary files /dev/null and b/_static/img/hta/kernel_type_breakdown.png differ
diff --git a/_static/img/hta/launch_delay_outliers.png b/_static/img/hta/launch_delay_outliers.png
new file mode 100644
index 00000000000..9bb455adea4
Binary files /dev/null and b/_static/img/hta/launch_delay_outliers.png differ
diff --git a/_static/img/hta/mem_bandwidth_queue_length.png b/_static/img/hta/mem_bandwidth_queue_length.png
new file mode 100644
index 00000000000..9df5383b5d9
Binary files /dev/null and b/_static/img/hta/mem_bandwidth_queue_length.png differ
diff --git a/_static/img/hta/overlap_df.png b/_static/img/hta/overlap_df.png
new file mode 100644
index 00000000000..ef164a28a12
Binary files /dev/null and b/_static/img/hta/overlap_df.png differ
diff --git a/_static/img/hta/overlap_plot.png b/_static/img/hta/overlap_plot.png
new file mode 100644
index 00000000000..acd449bc7ff
Binary files /dev/null and b/_static/img/hta/overlap_plot.png differ
diff --git a/_static/img/hta/pie_charts.png b/_static/img/hta/pie_charts.png
new file mode 100644
index 00000000000..fa9137109a6
Binary files /dev/null and b/_static/img/hta/pie_charts.png differ
diff --git a/_static/img/hta/queue_length_summary.png b/_static/img/hta/queue_length_summary.png
new file mode 100644
index 00000000000..639a03fb6d1
Binary files /dev/null and b/_static/img/hta/queue_length_summary.png differ
diff --git a/_static/img/hta/runtime_outliers.png b/_static/img/hta/runtime_outliers.png
new file mode 100644
index 00000000000..1e2dfff9006
Binary files /dev/null and b/_static/img/hta/runtime_outliers.png differ
diff --git a/_static/img/hta/short_gpu_kernels.png b/_static/img/hta/short_gpu_kernels.png
new file mode 100644
index 00000000000..ff382a3a7f0
Binary files /dev/null and b/_static/img/hta/short_gpu_kernels.png differ
diff --git a/_static/img/hta/temporal_breakdown_df.png b/_static/img/hta/temporal_breakdown_df.png
new file mode 100644
index 00000000000..dce1829d113
Binary files /dev/null and b/_static/img/hta/temporal_breakdown_df.png differ
diff --git a/_static/img/hta/temporal_breakdown_plot.png b/_static/img/hta/temporal_breakdown_plot.png
new file mode 100644
index 00000000000..9c5f45c1d35
Binary files /dev/null and b/_static/img/hta/temporal_breakdown_plot.png differ
diff --git a/_static/img/hybrid_frontend/220px-KnnClassification.png b/_static/img/hybrid_frontend/220px-KnnClassification.png
new file mode 100755
index 00000000000..fb9ee948f45
Binary files /dev/null and b/_static/img/hybrid_frontend/220px-KnnClassification.png differ
diff --git a/_static/img/hybrid_frontend/iris_pic.jpg b/_static/img/hybrid_frontend/iris_pic.jpg
new file mode 100755
index 00000000000..85bb9b49393
Binary files /dev/null and b/_static/img/hybrid_frontend/iris_pic.jpg differ
diff --git a/_static/img/hybrid_frontend/pytorch_workflow_small.jpg b/_static/img/hybrid_frontend/pytorch_workflow_small.jpg
new file mode 100755
index 00000000000..8b3d91b1df5
Binary files /dev/null and b/_static/img/hybrid_frontend/pytorch_workflow_small.jpg differ
diff --git a/_static/img/install_msvc.png b/_static/img/install_msvc.png
new file mode 100644
index 00000000000..fce73207a80
Binary files /dev/null and b/_static/img/install_msvc.png differ
diff --git a/_static/img/invpendulum.gif b/_static/img/invpendulum.gif
new file mode 100644
index 00000000000..3102c5b55cf
Binary files /dev/null and b/_static/img/invpendulum.gif differ
diff --git a/_static/img/itt_tutorial/vtune_config.png b/_static/img/itt_tutorial/vtune_config.png
new file mode 100755
index 00000000000..9f3c4605022
Binary files /dev/null and b/_static/img/itt_tutorial/vtune_config.png differ
diff --git a/_static/img/itt_tutorial/vtune_start.png b/_static/img/itt_tutorial/vtune_start.png
new file mode 100755
index 00000000000..9460df7c5f3
Binary files /dev/null and b/_static/img/itt_tutorial/vtune_start.png differ
diff --git a/_static/img/itt_tutorial/vtune_timeline.png b/_static/img/itt_tutorial/vtune_timeline.png
new file mode 100755
index 00000000000..1f1f018e3fa
Binary files /dev/null and b/_static/img/itt_tutorial/vtune_timeline.png differ
diff --git a/_static/img/itt_tutorial/vtune_xpu_config.png b/_static/img/itt_tutorial/vtune_xpu_config.png
new file mode 100644
index 00000000000..80dd1812d26
Binary files /dev/null and b/_static/img/itt_tutorial/vtune_xpu_config.png differ
diff --git a/_static/img/itt_tutorial/vtune_xpu_timeline.png b/_static/img/itt_tutorial/vtune_xpu_timeline.png
new file mode 100644
index 00000000000..43818cf105c
Binary files /dev/null and b/_static/img/itt_tutorial/vtune_xpu_timeline.png differ
diff --git a/_static/img/knowledge_distillation/ce_only.png b/_static/img/knowledge_distillation/ce_only.png
new file mode 100644
index 00000000000..a7503716575
Binary files /dev/null and b/_static/img/knowledge_distillation/ce_only.png differ
diff --git a/_static/img/knowledge_distillation/cosine_embedding_loss.png b/_static/img/knowledge_distillation/cosine_embedding_loss.png
new file mode 100644
index 00000000000..ebfd957a250
Binary files /dev/null and b/_static/img/knowledge_distillation/cosine_embedding_loss.png differ
diff --git a/_static/img/knowledge_distillation/cosine_loss_distillation.png b/_static/img/knowledge_distillation/cosine_loss_distillation.png
new file mode 100644
index 00000000000..81f241eb07f
Binary files /dev/null and b/_static/img/knowledge_distillation/cosine_loss_distillation.png differ
diff --git a/_static/img/knowledge_distillation/distillation_output_loss.png b/_static/img/knowledge_distillation/distillation_output_loss.png
new file mode 100644
index 00000000000..f86cbddbdfd
Binary files /dev/null and b/_static/img/knowledge_distillation/distillation_output_loss.png differ
diff --git a/_static/img/knowledge_distillation/fitnets_knowledge_distill.png b/_static/img/knowledge_distillation/fitnets_knowledge_distill.png
new file mode 100644
index 00000000000..407d9de89f6
Binary files /dev/null and b/_static/img/knowledge_distillation/fitnets_knowledge_distill.png differ
diff --git a/_static/img/landmarked_face2.png b/_static/img/landmarked_face2.png
new file mode 100755
index 00000000000..2faa58b37f2
Binary files /dev/null and b/_static/img/landmarked_face2.png differ
diff --git a/_static/img/mario.gif b/_static/img/mario.gif
new file mode 100644
index 00000000000..95d8c0cb172
Binary files /dev/null and b/_static/img/mario.gif differ
diff --git a/_static/img/mario_env.png b/_static/img/mario_env.png
new file mode 100644
index 00000000000..b6fc09c3c8c
Binary files /dev/null and b/_static/img/mario_env.png differ
diff --git a/_static/img/memory_format_logo.png b/_static/img/memory_format_logo.png
new file mode 100644
index 00000000000..6d1043ed29a
Binary files /dev/null and b/_static/img/memory_format_logo.png differ
diff --git a/_static/img/mnist.png b/_static/img/mnist.png
new file mode 100644
index 00000000000..53c876a89d5
Binary files /dev/null and b/_static/img/mnist.png differ
diff --git a/_static/img/model-parallel-images/mp_vs_rn.png b/_static/img/model-parallel-images/mp_vs_rn.png
new file mode 100644
index 00000000000..c56ec8adf51
Binary files /dev/null and b/_static/img/model-parallel-images/mp_vs_rn.png differ
diff --git a/_static/img/model-parallel-images/mp_vs_rn_vs_pp.png b/_static/img/model-parallel-images/mp_vs_rn_vs_pp.png
new file mode 100644
index 00000000000..a102c916771
Binary files /dev/null and b/_static/img/model-parallel-images/mp_vs_rn_vs_pp.png differ
diff --git a/_static/img/model-parallel-images/split_size_tradeoff.png b/_static/img/model-parallel-images/split_size_tradeoff.png
new file mode 100644
index 00000000000..f30eba44637
Binary files /dev/null and b/_static/img/model-parallel-images/split_size_tradeoff.png differ
diff --git a/_static/img/named_tensor.png b/_static/img/named_tensor.png
new file mode 100644
index 00000000000..2efceb9f516
Binary files /dev/null and b/_static/img/named_tensor.png differ
diff --git a/_static/img/neural-style/dancing.jpg b/_static/img/neural-style/dancing.jpg
new file mode 100755
index 00000000000..4bb9da7e26a
Binary files /dev/null and b/_static/img/neural-style/dancing.jpg differ
diff --git a/_static/img/neural-style/neuralstyle.png b/_static/img/neural-style/neuralstyle.png
new file mode 100755
index 00000000000..5831c08b8c7
Binary files /dev/null and b/_static/img/neural-style/neuralstyle.png differ
diff --git a/_static/img/neural-style/picasso.jpg b/_static/img/neural-style/picasso.jpg
new file mode 100755
index 00000000000..62ad5d2a2f6
Binary files /dev/null and b/_static/img/neural-style/picasso.jpg differ
diff --git a/_static/img/neural-style/sphx_glr_neural_style_tutorial_001.png b/_static/img/neural-style/sphx_glr_neural_style_tutorial_001.png
new file mode 100755
index 00000000000..cd86198719b
Binary files /dev/null and b/_static/img/neural-style/sphx_glr_neural_style_tutorial_001.png differ
diff --git a/_static/img/neural-style/sphx_glr_neural_style_tutorial_002.png b/_static/img/neural-style/sphx_glr_neural_style_tutorial_002.png
new file mode 100755
index 00000000000..75af8cb43e8
Binary files /dev/null and b/_static/img/neural-style/sphx_glr_neural_style_tutorial_002.png differ
diff --git a/_static/img/neural-style/sphx_glr_neural_style_tutorial_003.png b/_static/img/neural-style/sphx_glr_neural_style_tutorial_003.png
new file mode 100755
index 00000000000..70cf7fe2cf7
Binary files /dev/null and b/_static/img/neural-style/sphx_glr_neural_style_tutorial_003.png differ
diff --git a/_static/img/neural-style/sphx_glr_neural_style_tutorial_004.png b/_static/img/neural-style/sphx_glr_neural_style_tutorial_004.png
new file mode 100755
index 00000000000..2168b343341
Binary files /dev/null and b/_static/img/neural-style/sphx_glr_neural_style_tutorial_004.png differ
diff --git a/_static/img/nvfuser_intro/nvfuser_transformer_block.png b/_static/img/nvfuser_intro/nvfuser_transformer_block.png
new file mode 100755
index 00000000000..8dd88bbdf94
Binary files /dev/null and b/_static/img/nvfuser_intro/nvfuser_transformer_block.png differ
diff --git a/_static/img/nvfuser_intro/nvfuser_tutorial_0.png b/_static/img/nvfuser_intro/nvfuser_tutorial_0.png
new file mode 100755
index 00000000000..d3448d192bc
Binary files /dev/null and b/_static/img/nvfuser_intro/nvfuser_tutorial_0.png differ
diff --git a/_static/img/nvfuser_intro/nvfuser_tutorial_1.png b/_static/img/nvfuser_intro/nvfuser_tutorial_1.png
new file mode 100755
index 00000000000..4752695fa91
Binary files /dev/null and b/_static/img/nvfuser_intro/nvfuser_tutorial_1.png differ
diff --git a/_static/img/nvfuser_intro/nvfuser_tutorial_2.png b/_static/img/nvfuser_intro/nvfuser_tutorial_2.png
new file mode 100755
index 00000000000..ec45793d67d
Binary files /dev/null and b/_static/img/nvfuser_intro/nvfuser_tutorial_2.png differ
diff --git a/_static/img/nvfuser_intro/nvfuser_tutorial_3.png b/_static/img/nvfuser_intro/nvfuser_tutorial_3.png
new file mode 100755
index 00000000000..be529d93259
Binary files /dev/null and b/_static/img/nvfuser_intro/nvfuser_tutorial_3.png differ
diff --git a/_static/img/nvfuser_intro/nvfuser_tutorial_4.png b/_static/img/nvfuser_intro/nvfuser_tutorial_4.png
new file mode 100755
index 00000000000..f2e7c3ff339
Binary files /dev/null and b/_static/img/nvfuser_intro/nvfuser_tutorial_4.png differ
diff --git a/_static/img/nvfuser_intro/nvfuser_tutorial_5.png b/_static/img/nvfuser_intro/nvfuser_tutorial_5.png
new file mode 100755
index 00000000000..efe43d73741
Binary files /dev/null and b/_static/img/nvfuser_intro/nvfuser_tutorial_5.png differ
diff --git a/_static/img/nvfuser_intro/nvfuser_tutorial_6.png b/_static/img/nvfuser_intro/nvfuser_tutorial_6.png
new file mode 100755
index 00000000000..59a54a87f86
Binary files /dev/null and b/_static/img/nvfuser_intro/nvfuser_tutorial_6.png differ
diff --git a/_static/img/oneworker.png b/_static/img/oneworker.png
new file mode 100644
index 00000000000..255ec584834
Binary files /dev/null and b/_static/img/oneworker.png differ
diff --git a/_static/img/onnx/image_classifier_onnx_model_on_netron_web_ui.png b/_static/img/onnx/image_classifier_onnx_model_on_netron_web_ui.png
new file mode 100644
index 00000000000..6430e4943ff
Binary files /dev/null and b/_static/img/onnx/image_classifier_onnx_model_on_netron_web_ui.png differ
diff --git a/_static/img/onnx/netron_web_ui.png b/_static/img/onnx/netron_web_ui.png
new file mode 100755
index 00000000000..f88936eb824
Binary files /dev/null and b/_static/img/onnx/netron_web_ui.png differ
diff --git a/_static/img/optim_step_in_bwd/snapshot.jpg b/_static/img/optim_step_in_bwd/snapshot.jpg
new file mode 100644
index 00000000000..50be55e7b9a
Binary files /dev/null and b/_static/img/optim_step_in_bwd/snapshot.jpg differ
diff --git a/_static/img/optim_step_in_bwd/snapshot_opt_in_bwd.jpg b/_static/img/optim_step_in_bwd/snapshot_opt_in_bwd.jpg
new file mode 100644
index 00000000000..65d53d21c38
Binary files /dev/null and b/_static/img/optim_step_in_bwd/snapshot_opt_in_bwd.jpg differ
diff --git a/_static/img/panda.png b/_static/img/panda.png
new file mode 100755
index 00000000000..dd717fec882
Binary files /dev/null and b/_static/img/panda.png differ
diff --git a/_static/img/pendulum.gif b/_static/img/pendulum.gif
new file mode 100644
index 00000000000..a7adf181fc8
Binary files /dev/null and b/_static/img/pendulum.gif differ
diff --git a/_static/img/per_channel_quant.png b/_static/img/per_channel_quant.png
new file mode 100644
index 00000000000..e28810aca3e
Binary files /dev/null and b/_static/img/per_channel_quant.png differ
diff --git a/_static/img/per_tensor_quant.png b/_static/img/per_tensor_quant.png
new file mode 100644
index 00000000000..183bab6fa3b
Binary files /dev/null and b/_static/img/per_tensor_quant.png differ
diff --git a/_static/img/perf_viz.png b/_static/img/perf_viz.png
new file mode 100644
index 00000000000..85608557bcb
Binary files /dev/null and b/_static/img/perf_viz.png differ
diff --git a/_static/img/pinmem/pinmem.png b/_static/img/pinmem/pinmem.png
new file mode 100644
index 00000000000..9d84e9d229d
Binary files /dev/null and b/_static/img/pinmem/pinmem.png differ
diff --git a/_static/img/pinmem/trace_streamed0_pinned0.png b/_static/img/pinmem/trace_streamed0_pinned0.png
new file mode 100644
index 00000000000..dedac997b0b
Binary files /dev/null and b/_static/img/pinmem/trace_streamed0_pinned0.png differ
diff --git a/_static/img/pinmem/trace_streamed0_pinned1.png b/_static/img/pinmem/trace_streamed0_pinned1.png
new file mode 100644
index 00000000000..2d5ff462e1a
Binary files /dev/null and b/_static/img/pinmem/trace_streamed0_pinned1.png differ
diff --git a/_static/img/pinmem/trace_streamed1_pinned0.png b/_static/img/pinmem/trace_streamed1_pinned0.png
new file mode 100644
index 00000000000..130182a1978
Binary files /dev/null and b/_static/img/pinmem/trace_streamed1_pinned0.png differ
diff --git a/_static/img/pinmem/trace_streamed1_pinned1.png b/_static/img/pinmem/trace_streamed1_pinned1.png
new file mode 100644
index 00000000000..c596fcdb691
Binary files /dev/null and b/_static/img/pinmem/trace_streamed1_pinned1.png differ
diff --git a/_static/img/profiler_callstack.png b/_static/img/profiler_callstack.png
new file mode 100644
index 00000000000..835673ba63c
Binary files /dev/null and b/_static/img/profiler_callstack.png differ
diff --git a/_static/img/profiler_distributed_view.png b/_static/img/profiler_distributed_view.png
new file mode 100644
index 00000000000..2b0d5565131
Binary files /dev/null and b/_static/img/profiler_distributed_view.png differ
diff --git a/_static/img/profiler_kernel_view.png b/_static/img/profiler_kernel_view.png
new file mode 100644
index 00000000000..cfe01b83a0d
Binary files /dev/null and b/_static/img/profiler_kernel_view.png differ
diff --git a/_static/img/profiler_memory_curve_selecting.png b/_static/img/profiler_memory_curve_selecting.png
new file mode 100644
index 00000000000..b5dc0c10e9b
Binary files /dev/null and b/_static/img/profiler_memory_curve_selecting.png differ
diff --git a/_static/img/profiler_memory_curve_single.png b/_static/img/profiler_memory_curve_single.png
new file mode 100644
index 00000000000..c12d480ac40
Binary files /dev/null and b/_static/img/profiler_memory_curve_single.png differ
diff --git a/_static/img/profiler_memory_view.png b/_static/img/profiler_memory_view.png
new file mode 100644
index 00000000000..4839505ab8c
Binary files /dev/null and b/_static/img/profiler_memory_view.png differ
diff --git a/_static/img/profiler_operator_view.png b/_static/img/profiler_operator_view.png
new file mode 100644
index 00000000000..e3e60b03025
Binary files /dev/null and b/_static/img/profiler_operator_view.png differ
diff --git a/_static/img/profiler_overview1.png b/_static/img/profiler_overview1.png
new file mode 100644
index 00000000000..01eef8fda68
Binary files /dev/null and b/_static/img/profiler_overview1.png differ
diff --git a/_static/img/profiler_overview2.png b/_static/img/profiler_overview2.png
new file mode 100644
index 00000000000..cc7826b352a
Binary files /dev/null and b/_static/img/profiler_overview2.png differ
diff --git a/_static/img/profiler_rocm_chrome_trace_view.png b/_static/img/profiler_rocm_chrome_trace_view.png
new file mode 100644
index 00000000000..cff7ba98c8a
Binary files /dev/null and b/_static/img/profiler_rocm_chrome_trace_view.png differ
diff --git a/_static/img/profiler_rocm_tensorboard_operartor_view.png b/_static/img/profiler_rocm_tensorboard_operartor_view.png
new file mode 100644
index 00000000000..27effb91e7c
Binary files /dev/null and b/_static/img/profiler_rocm_tensorboard_operartor_view.png differ
diff --git a/_static/img/profiler_trace_view1.png b/_static/img/profiler_trace_view1.png
new file mode 100644
index 00000000000..215fe03e724
Binary files /dev/null and b/_static/img/profiler_trace_view1.png differ
diff --git a/_static/img/profiler_trace_view2.png b/_static/img/profiler_trace_view2.png
new file mode 100644
index 00000000000..790ef5d58ea
Binary files /dev/null and b/_static/img/profiler_trace_view2.png differ
diff --git a/_static/img/profiler_trace_view_fwd_bwd.png b/_static/img/profiler_trace_view_fwd_bwd.png
new file mode 100644
index 00000000000..c773b829e5d
Binary files /dev/null and b/_static/img/profiler_trace_view_fwd_bwd.png differ
diff --git a/_static/img/profiler_views_list.png b/_static/img/profiler_views_list.png
new file mode 100644
index 00000000000..040f392e366
Binary files /dev/null and b/_static/img/profiler_views_list.png differ
diff --git a/_static/img/profiler_vscode.png b/_static/img/profiler_vscode.png
new file mode 100644
index 00000000000..afb99f06937
Binary files /dev/null and b/_static/img/profiler_vscode.png differ
diff --git a/_static/img/pruning.png b/_static/img/pruning.png
new file mode 100644
index 00000000000..7359f11e9a6
Binary files /dev/null and b/_static/img/pruning.png differ
diff --git a/_static/img/pruning_flow.jpg b/_static/img/pruning_flow.jpg
new file mode 100644
index 00000000000..bd57158b302
Binary files /dev/null and b/_static/img/pruning_flow.jpg differ
diff --git a/_static/img/python_extension_autoload_impl.png b/_static/img/python_extension_autoload_impl.png
new file mode 100644
index 00000000000..64e18fc7b4b
Binary files /dev/null and b/_static/img/python_extension_autoload_impl.png differ
diff --git a/_static/img/pytorch-logo-dark.png b/_static/img/pytorch-logo-dark.png
new file mode 100755
index 00000000000..7992605b01f
Binary files /dev/null and b/_static/img/pytorch-logo-dark.png differ
diff --git a/_static/img/pytorch-logo-dark.svg b/_static/img/pytorch-logo-dark.svg
new file mode 100755
index 00000000000..5e530003858
--- /dev/null
+++ b/_static/img/pytorch-logo-dark.svg
@@ -0,0 +1,33 @@
+
+
+
diff --git a/_static/img/qat.png b/_static/img/qat.png
new file mode 100644
index 00000000000..e8ca311745c
Binary files /dev/null and b/_static/img/qat.png differ
diff --git a/_static/img/quant_embeddings.png b/_static/img/quant_embeddings.png
new file mode 100644
index 00000000000..035561465a2
Binary files /dev/null and b/_static/img/quant_embeddings.png differ
diff --git a/_static/img/ray-tune.png b/_static/img/ray-tune.png
new file mode 100644
index 00000000000..febd6de282e
Binary files /dev/null and b/_static/img/ray-tune.png differ
diff --git a/_static/img/reinforcement_learning_diagram.drawio b/_static/img/reinforcement_learning_diagram.drawio
new file mode 100644
index 00000000000..2ff4e6f0270
--- /dev/null
+++ b/_static/img/reinforcement_learning_diagram.drawio
@@ -0,0 +1 @@
+5Vpbc+MmFP41nmkfmpGEpMiPjTftzrTZZtbbbbYvHSxhiRQJFeHb/vqChG4gx95ElqfTeCaGwwEO37lwDskMLNL9zwzmyQONEJk5VrSfgXczx7GtuSW+JOVQUXzXrggxw5FiaglL/BXVMxV1gyNU9Bg5pYTjvE8MaZahkPdokDG667OtKenvmsMYGYRlCIlJ/QNHPKmogWe19PcIx0m9s22pkRTWzIpQJDCiuw4J3M/AglHKq1a6XyAiwatxqeb9dGS0EYyhjJ8z4flQfCient3PwfKXVfTn40P6/vMPapUtJBt14I8oJ/AgaA8opeygZOeHGhBGN1mE5JrWDNztEszRMoehHN0JExC0hKdE9GzRXGNCFpRQVs4Fa09+BD1mMMJC7s7YqvyIsYIz+jfqjPjljxhRsiLG0f4oCHYDrbBJRFPE5SEsNcGt1aPMMVDdXatbu1ZY0tGrr2hQmVPcrNwiLhoK9G9QgGMo4Lec41T6gWN9535v4C/WFLaPTmM/AlrebR8t2z0TruBScAEDLgaziKaCtoI8TAy4wg3bltYqIUFZ9KOMCaIbElgUOOxD1rftai0UGQFCg0/sRzcsRKfdjEMWI37KGkx1dOD2BtCuaQwRyPG2L+6QCtQOjxSLgxz1DRdoWqyOqWZ1I42+kNdfCOjmUOFgLCR0U0aemi2XDMVxgQ3ztK0X5fJtjR/0+EWjkqA1z0YHr7dYz7DYR0pwKM/5AfFRw2sEUbAOh8PrLYI+sgbDaxig1foy4dWxrh1fAzOACodeqi5lPKExzSC5b6laGGh5fqU0V8g/I84PKluBG06HQu8okcN/W+Q4OyS8CWTfsPFPlbz/Cxu/eg5hm0nEmPcg2mP+JNs3nup96Yy823c7h/HvTu8/cXfqV9H8lXen5xxJuUa+O91A2yd4+e709LvTP8Hvvsh/mbvWNtPDUR0hE+I9NVNF50vrFrLb+kLZq52hcaCO+9hTuY9zpvt413Qf3Vqc29e6z1xbyJkm9TSKb51fS4mdKVJP2zXc4fc8grwsLb3rlpaODog3cItaU96ijUdNmirWscHuXq03jjdVeLDnZ8aHI+qcJsGspewFdp8Iee8ivJU7Ehxn5YD/z0a+qN0RtOZtT7Ri9Q1Tac3ZqsjLvvWJQZzhLBbNUmtqXSFnuXQzb5zd7Bvxa5FQWkgvbB4vJDxCgXVlCEOOaXZhURwpylJQiRQFZdsL7wfkfh9RSFkkGql6XrQ2KiRddG9X7t2+rF10L6/ElUpu5VZ/ZWUt1D/piuk76/K8pWyq5S+lHiVi23oGaA9E7PlAxG4Yxw/ZZr4X1q5Vu9AE6V8wP5UAyt4jYlgcG7HrlUhVGL1WkgeO5EDf/r5oDdcuo9dIeqUPXk7ygK/xn3iPNACxJkgKHTMpHNVJBmod6+Z2snzmqmWMrlCgVx/nWjjQLc+7jIUDvYw5ZeFA43emsFjzCf0iYd2ava6q7z2LTVbX18XdyaDvX9UjNIMBevl2tkdo71VATyrG8ghd4LcV6qLb/oW/Ym//TwLc/ws=
\ No newline at end of file
diff --git a/_static/img/reinforcement_learning_diagram.jpg b/_static/img/reinforcement_learning_diagram.jpg
new file mode 100644
index 00000000000..7e04efc2534
Binary files /dev/null and b/_static/img/reinforcement_learning_diagram.jpg differ
diff --git a/_static/img/replaybuffer_traj.png b/_static/img/replaybuffer_traj.png
new file mode 100644
index 00000000000..64773ee8f78
Binary files /dev/null and b/_static/img/replaybuffer_traj.png differ
diff --git a/_static/img/rnnclass.png b/_static/img/rnnclass.png
new file mode 100755
index 00000000000..ff34c9e63ee
Binary files /dev/null and b/_static/img/rnnclass.png differ
diff --git a/_static/img/rollout_recurrent.png b/_static/img/rollout_recurrent.png
new file mode 100644
index 00000000000..2ce24d40d23
Binary files /dev/null and b/_static/img/rollout_recurrent.png differ
diff --git a/_static/img/rpc-images/batch.png b/_static/img/rpc-images/batch.png
new file mode 100644
index 00000000000..cde410d1bd1
Binary files /dev/null and b/_static/img/rpc-images/batch.png differ
diff --git a/_static/img/rpc_trace_img.png b/_static/img/rpc_trace_img.png
new file mode 100644
index 00000000000..4faaf97ad47
Binary files /dev/null and b/_static/img/rpc_trace_img.png differ
diff --git a/_static/img/sample_file.jpeg b/_static/img/sample_file.jpeg
new file mode 100644
index 00000000000..a7b314bd969
Binary files /dev/null and b/_static/img/sample_file.jpeg differ
diff --git a/_static/img/scipynumpy.png b/_static/img/scipynumpy.png
new file mode 100755
index 00000000000..d730af1a4f4
Binary files /dev/null and b/_static/img/scipynumpy.png differ
diff --git a/_static/img/seq-seq-images/attention-decoder-network.dot b/_static/img/seq-seq-images/attention-decoder-network.dot
new file mode 100755
index 00000000000..17eecc26a7a
--- /dev/null
+++ b/_static/img/seq-seq-images/attention-decoder-network.dot
@@ -0,0 +1,51 @@
+digraph G {
+
+ // Main styles
+ nodesep=0.3; ranksep=0.15;
+
+ node [shape=rect, fillcolor=darkorange, color=white, style=filled, fontsize=11, fontname="arial", height=0.2];
+ edge [color=gray, arrowsize=0.5];
+
+ // Layout
+ {rank=same;input;prev_hidden;encoder_outputs}
+
+
+ input -> embedding;
+ embedding -> dropout;
+ dropout -> embedded;
+
+ embedded -> attn;
+ prev_hidden -> attn;
+ attn -> attn_softmax;
+ attn_softmax -> attn_weights;
+ attn_weights -> bmm;
+ encoder_outputs -> bmm;
+ bmm -> attn_applied;
+ attn_applied -> attn_combine;
+ embedded -> attn_combine;
+
+ attn_combine -> relu -> gru;
+ prev_hidden -> gru;
+ gru -> out;
+ gru -> hidden;
+
+ out -> softmax;
+ softmax -> output;
+
+ {rank=same;output;hidden}
+
+ // Layer nodes
+ embedding [fillcolor=dodgerblue, fontcolor=white];
+ attn [fillcolor=dodgerblue, fontcolor=white];
+ attn_combine [fillcolor=dodgerblue, fontcolor=white];
+ bmm [fillcolor=dodgerblue, fontcolor=white];
+ gru [fillcolor=dodgerblue, fontcolor=white];
+ out [fillcolor=dodgerblue, fontcolor=white];
+
+ // Function nodes
+ dropout [fillcolor=palegreen];
+ relu [fillcolor=palegreen];
+ softmax [fillcolor=palegreen];
+ attn_softmax [fillcolor=palegreen];
+
+}
diff --git a/_static/img/seq-seq-images/attention-decoder-network.png b/_static/img/seq-seq-images/attention-decoder-network.png
new file mode 100755
index 00000000000..d31d42a5af1
Binary files /dev/null and b/_static/img/seq-seq-images/attention-decoder-network.png differ
diff --git a/_static/img/seq-seq-images/decoder-network.dot b/_static/img/seq-seq-images/decoder-network.dot
new file mode 100755
index 00000000000..db8a1cdfaee
--- /dev/null
+++ b/_static/img/seq-seq-images/decoder-network.dot
@@ -0,0 +1,34 @@
+digraph G {
+
+ // Main styles
+ nodesep=0.3; ranksep=0.15;
+
+ node [shape=rect, fillcolor=darkorange, color=white, style=filled, fontsize=11, fontname="arial", height=0.2];
+ edge [color=gray, arrowsize=0.5];
+
+ // Layout
+ {rank=same;input;prev_hidden}
+
+ input -> embedding;
+ embedding -> relu;
+ relu -> gru;
+
+ prev_hidden -> gru;
+ gru -> out;
+ gru -> hidden;
+
+ out -> softmax;
+ softmax -> output;
+
+ {rank=same;output;hidden}
+
+ // Layer nodes
+ embedding [fillcolor=dodgerblue, fontcolor=white];
+ gru [fillcolor=dodgerblue, fontcolor=white];
+ out [fillcolor=dodgerblue, fontcolor=white];
+
+ // Function nodes
+ relu [fillcolor=palegreen];
+ softmax [fillcolor=palegreen];
+
+}
diff --git a/_static/img/seq-seq-images/decoder-network.png b/_static/img/seq-seq-images/decoder-network.png
new file mode 100755
index 00000000000..54931fcd206
Binary files /dev/null and b/_static/img/seq-seq-images/decoder-network.png differ
diff --git a/_static/img/seq-seq-images/decoder.png b/_static/img/seq-seq-images/decoder.png
new file mode 100755
index 00000000000..1e1ecd3f953
Binary files /dev/null and b/_static/img/seq-seq-images/decoder.png differ
diff --git a/_static/img/seq-seq-images/decoder@2x.png b/_static/img/seq-seq-images/decoder@2x.png
new file mode 100755
index 00000000000..b32b7d3aafd
Binary files /dev/null and b/_static/img/seq-seq-images/decoder@2x.png differ
diff --git a/_static/img/seq-seq-images/encoder-network.dot b/_static/img/seq-seq-images/encoder-network.dot
new file mode 100755
index 00000000000..9bf07421681
--- /dev/null
+++ b/_static/img/seq-seq-images/encoder-network.dot
@@ -0,0 +1,22 @@
+digraph G {
+
+ // Main styles
+ nodesep=0.3; ranksep=0.15;
+
+ node [shape=rect, fillcolor=darkorange, color=white, style=filled, fontsize=11, fontname="arial", height=0.2];
+ edge [color=gray, arrowsize=0.5];
+
+ // Layout
+ {rank=same;input;prev_hidden}
+
+ input -> embedding;
+ embedding -> embedded;
+ embedded -> gru;
+ prev_hidden -> gru;
+ gru -> output;
+ gru -> hidden;
+
+ embedding [fillcolor=dodgerblue, fontcolor=white];
+ gru [fillcolor=dodgerblue, fontcolor=white];
+
+}
diff --git a/_static/img/seq-seq-images/encoder-network.png b/_static/img/seq-seq-images/encoder-network.png
new file mode 100755
index 00000000000..1525f1bf2c3
Binary files /dev/null and b/_static/img/seq-seq-images/encoder-network.png differ
diff --git a/_static/img/seq-seq-images/seq2seq.png b/_static/img/seq-seq-images/seq2seq.png
new file mode 100755
index 00000000000..af09c74a1f8
Binary files /dev/null and b/_static/img/seq-seq-images/seq2seq.png differ
diff --git a/_static/img/seq-seq-images/seq2seq@2x.png b/_static/img/seq-seq-images/seq2seq@2x.png
new file mode 100755
index 00000000000..a47543ab5bd
Binary files /dev/null and b/_static/img/seq-seq-images/seq2seq@2x.png differ
diff --git a/_static/img/seq-seq-images/word-encoding.png b/_static/img/seq-seq-images/word-encoding.png
new file mode 100755
index 00000000000..4d7ee1c77e5
Binary files /dev/null and b/_static/img/seq-seq-images/word-encoding.png differ
diff --git a/_static/img/seq-seq-images/word-encoding@2x.png b/_static/img/seq-seq-images/word-encoding@2x.png
new file mode 100755
index 00000000000..c400077364f
Binary files /dev/null and b/_static/img/seq-seq-images/word-encoding@2x.png differ
diff --git a/_static/img/seq2seq_flat.png b/_static/img/seq2seq_flat.png
new file mode 100755
index 00000000000..279884de2e3
Binary files /dev/null and b/_static/img/seq2seq_flat.png differ
diff --git a/_static/img/steam-train-whistle-daniel_simon-converted-from-mp3.wav b/_static/img/steam-train-whistle-daniel_simon-converted-from-mp3.wav
new file mode 100644
index 00000000000..3f899c9dadd
Binary files /dev/null and b/_static/img/steam-train-whistle-daniel_simon-converted-from-mp3.wav differ
diff --git a/_static/img/stn/FSeq.png b/_static/img/stn/FSeq.png
new file mode 100755
index 00000000000..8bdd84acdf9
Binary files /dev/null and b/_static/img/stn/FSeq.png differ
diff --git a/_static/img/stn/Five.gif b/_static/img/stn/Five.gif
new file mode 100755
index 00000000000..5a1939933d3
Binary files /dev/null and b/_static/img/stn/Five.gif differ
diff --git a/_static/img/stn/stn-arch.png b/_static/img/stn/stn-arch.png
new file mode 100755
index 00000000000..670c99fbfdd
Binary files /dev/null and b/_static/img/stn/stn-arch.png differ
diff --git a/_static/img/stn/tr.png b/_static/img/stn/tr.png
new file mode 100755
index 00000000000..f80d2d8b093
Binary files /dev/null and b/_static/img/stn/tr.png differ
diff --git a/_static/img/tensor_illustration.png b/_static/img/tensor_illustration.png
new file mode 100755
index 00000000000..b0039c7f3f3
Binary files /dev/null and b/_static/img/tensor_illustration.png differ
diff --git a/_static/img/tensor_illustration_flat.png b/_static/img/tensor_illustration_flat.png
new file mode 100755
index 00000000000..5152b41ac9a
Binary files /dev/null and b/_static/img/tensor_illustration_flat.png differ
diff --git a/_static/img/tensorboard_figure.png b/_static/img/tensorboard_figure.png
new file mode 100644
index 00000000000..e4dd38e98da
Binary files /dev/null and b/_static/img/tensorboard_figure.png differ
diff --git a/_static/img/tensorboard_first_view.png b/_static/img/tensorboard_first_view.png
new file mode 100644
index 00000000000..702c8158a82
Binary files /dev/null and b/_static/img/tensorboard_first_view.png differ
diff --git a/_static/img/tensorboard_images.png b/_static/img/tensorboard_images.png
new file mode 100644
index 00000000000..79b7e15a7af
Binary files /dev/null and b/_static/img/tensorboard_images.png differ
diff --git a/_static/img/tensorboard_model_viz.png b/_static/img/tensorboard_model_viz.png
new file mode 100644
index 00000000000..2cd22344f15
Binary files /dev/null and b/_static/img/tensorboard_model_viz.png differ
diff --git a/_static/img/tensorboard_pr_curves.png b/_static/img/tensorboard_pr_curves.png
new file mode 100644
index 00000000000..0360187eae6
Binary files /dev/null and b/_static/img/tensorboard_pr_curves.png differ
diff --git a/_static/img/tensorboard_projector.png b/_static/img/tensorboard_projector.png
new file mode 100644
index 00000000000..f709efc32d0
Binary files /dev/null and b/_static/img/tensorboard_projector.png differ
diff --git a/_static/img/tensorboard_scalar_runs.png b/_static/img/tensorboard_scalar_runs.png
new file mode 100644
index 00000000000..f89ace713aa
Binary files /dev/null and b/_static/img/tensorboard_scalar_runs.png differ
diff --git a/_static/img/text_sentiment_ngrams_model.png b/_static/img/text_sentiment_ngrams_model.png
new file mode 100644
index 00000000000..94fdf554047
Binary files /dev/null and b/_static/img/text_sentiment_ngrams_model.png differ
diff --git a/_static/img/thumbnails/220px-KnnClassification.png b/_static/img/thumbnails/220px-KnnClassification.png
new file mode 100755
index 00000000000..fb9ee948f45
Binary files /dev/null and b/_static/img/thumbnails/220px-KnnClassification.png differ
diff --git a/_static/img/thumbnails/babel.jpg b/_static/img/thumbnails/babel.jpg
new file mode 100755
index 00000000000..f65d1509712
Binary files /dev/null and b/_static/img/thumbnails/babel.jpg differ
diff --git a/_static/img/thumbnails/captum_teaser.png b/_static/img/thumbnails/captum_teaser.png
new file mode 100644
index 00000000000..c7fcb2c093a
Binary files /dev/null and b/_static/img/thumbnails/captum_teaser.png differ
diff --git a/_static/img/thumbnails/cropped/60-min-blitz.png b/_static/img/thumbnails/cropped/60-min-blitz.png
new file mode 100644
index 00000000000..681a16d9935
Binary files /dev/null and b/_static/img/thumbnails/cropped/60-min-blitz.png differ
diff --git a/_static/img/thumbnails/cropped/Adversarial-Example-Generation.png b/_static/img/thumbnails/cropped/Adversarial-Example-Generation.png
new file mode 100644
index 00000000000..ad5014e805c
Binary files /dev/null and b/_static/img/thumbnails/cropped/Adversarial-Example-Generation.png differ
diff --git a/_static/img/thumbnails/cropped/Autograd-in-Cpp-Frontend.png b/_static/img/thumbnails/cropped/Autograd-in-Cpp-Frontend.png
new file mode 100644
index 00000000000..3aec75031ae
Binary files /dev/null and b/_static/img/thumbnails/cropped/Autograd-in-Cpp-Frontend.png differ
diff --git a/_static/img/thumbnails/cropped/Combining-Distributed-DataParallel-with-Distributed-RPC-Framework.png b/_static/img/thumbnails/cropped/Combining-Distributed-DataParallel-with-Distributed-RPC-Framework.png
new file mode 100644
index 00000000000..426a14d98f5
Binary files /dev/null and b/_static/img/thumbnails/cropped/Combining-Distributed-DataParallel-with-Distributed-RPC-Framework.png differ
diff --git a/_static/img/thumbnails/cropped/Custom-Cpp-and-CUDA-Extensions.png b/_static/img/thumbnails/cropped/Custom-Cpp-and-CUDA-Extensions.png
new file mode 100644
index 00000000000..426a14d98f5
Binary files /dev/null and b/_static/img/thumbnails/cropped/Custom-Cpp-and-CUDA-Extensions.png differ
diff --git a/_static/img/thumbnails/cropped/Customize-Process-Group-Backends-Using-Cpp-Extensions.png b/_static/img/thumbnails/cropped/Customize-Process-Group-Backends-Using-Cpp-Extensions.png
new file mode 100644
index 00000000000..426a14d98f5
Binary files /dev/null and b/_static/img/thumbnails/cropped/Customize-Process-Group-Backends-Using-Cpp-Extensions.png differ
diff --git a/_static/img/thumbnails/cropped/DCGAN-Tutorial.png b/_static/img/thumbnails/cropped/DCGAN-Tutorial.png
new file mode 100644
index 00000000000..a0c89096e9a
Binary files /dev/null and b/_static/img/thumbnails/cropped/DCGAN-Tutorial.png differ
diff --git a/_static/img/thumbnails/cropped/Deploying-PyTorch-in-Python-via-a-REST-API-with-Flask.png b/_static/img/thumbnails/cropped/Deploying-PyTorch-in-Python-via-a-REST-API-with-Flask.png
new file mode 100644
index 00000000000..426a14d98f5
Binary files /dev/null and b/_static/img/thumbnails/cropped/Deploying-PyTorch-in-Python-via-a-REST-API-with-Flask.png differ
diff --git a/_static/img/thumbnails/cropped/Distributed-Pipeline-Parallelism-Using-RPC.png b/_static/img/thumbnails/cropped/Distributed-Pipeline-Parallelism-Using-RPC.png
new file mode 100644
index 00000000000..426a14d98f5
Binary files /dev/null and b/_static/img/thumbnails/cropped/Distributed-Pipeline-Parallelism-Using-RPC.png differ
diff --git a/_static/img/thumbnails/cropped/Exporting-PyTorch-Models-to-ONNX-Graphs.png b/_static/img/thumbnails/cropped/Exporting-PyTorch-Models-to-ONNX-Graphs.png
new file mode 100755
index 00000000000..00156df042e
Binary files /dev/null and b/_static/img/thumbnails/cropped/Exporting-PyTorch-Models-to-ONNX-Graphs.png differ
diff --git a/_static/img/thumbnails/cropped/Extending-TorchScript-with-Custom-Cpp-Classes.png b/_static/img/thumbnails/cropped/Extending-TorchScript-with-Custom-Cpp-Classes.png
new file mode 100644
index 00000000000..426a14d98f5
Binary files /dev/null and b/_static/img/thumbnails/cropped/Extending-TorchScript-with-Custom-Cpp-Classes.png differ
diff --git a/_static/img/thumbnails/cropped/Extending-TorchScript-with-Custom-Cpp-Operators.png b/_static/img/thumbnails/cropped/Extending-TorchScript-with-Custom-Cpp-Operators.png
new file mode 100644
index 00000000000..426a14d98f5
Binary files /dev/null and b/_static/img/thumbnails/cropped/Extending-TorchScript-with-Custom-Cpp-Operators.png differ
diff --git a/_static/img/thumbnails/cropped/Getting Started with Distributed-RPC-Framework.png b/_static/img/thumbnails/cropped/Getting Started with Distributed-RPC-Framework.png
new file mode 100644
index 00000000000..426a14d98f5
Binary files /dev/null and b/_static/img/thumbnails/cropped/Getting Started with Distributed-RPC-Framework.png differ
diff --git a/_static/img/thumbnails/cropped/Getting-Started-with Distributed RPC Framework.png b/_static/img/thumbnails/cropped/Getting-Started-with Distributed RPC Framework.png
new file mode 100644
index 00000000000..426a14d98f5
Binary files /dev/null and b/_static/img/thumbnails/cropped/Getting-Started-with Distributed RPC Framework.png differ
diff --git a/_static/img/thumbnails/cropped/Getting-Started-with-DCP.png b/_static/img/thumbnails/cropped/Getting-Started-with-DCP.png
new file mode 100644
index 00000000000..426a14d98f5
Binary files /dev/null and b/_static/img/thumbnails/cropped/Getting-Started-with-DCP.png differ
diff --git a/_static/img/thumbnails/cropped/Getting-Started-with-Distributed-Data-Parallel.png b/_static/img/thumbnails/cropped/Getting-Started-with-Distributed-Data-Parallel.png
new file mode 100644
index 00000000000..426a14d98f5
Binary files /dev/null and b/_static/img/thumbnails/cropped/Getting-Started-with-Distributed-Data-Parallel.png differ
diff --git a/_static/img/thumbnails/cropped/Getting-Started-with-Distributed-RPC-Framework.png b/_static/img/thumbnails/cropped/Getting-Started-with-Distributed-RPC-Framework.png
new file mode 100644
index 00000000000..426a14d98f5
Binary files /dev/null and b/_static/img/thumbnails/cropped/Getting-Started-with-Distributed-RPC-Framework.png differ
diff --git a/_static/img/thumbnails/cropped/Getting-Started-with-FSDP.png b/_static/img/thumbnails/cropped/Getting-Started-with-FSDP.png
new file mode 100644
index 00000000000..426a14d98f5
Binary files /dev/null and b/_static/img/thumbnails/cropped/Getting-Started-with-FSDP.png differ
diff --git a/_static/img/thumbnails/cropped/Implementing-Batch-RPC-Processing-Using-Asynchronous-Executions.png b/_static/img/thumbnails/cropped/Implementing-Batch-RPC-Processing-Using-Asynchronous-Executions.png
new file mode 100644
index 00000000000..426a14d98f5
Binary files /dev/null and b/_static/img/thumbnails/cropped/Implementing-Batch-RPC-Processing-Using-Asynchronous-Executions.png differ
diff --git a/_static/img/thumbnails/cropped/Implementing-a-Parameter-Server-Using-Distributed-RPC-Framework.png b/_static/img/thumbnails/cropped/Implementing-a-Parameter-Server-Using-Distributed-RPC-Framework.png
new file mode 100644
index 00000000000..426a14d98f5
Binary files /dev/null and b/_static/img/thumbnails/cropped/Implementing-a-Parameter-Server-Using-Distributed-RPC-Framework.png differ
diff --git a/_static/img/thumbnails/cropped/Introduction-to-TorchScript.png b/_static/img/thumbnails/cropped/Introduction-to-TorchScript.png
new file mode 100644
index 00000000000..426a14d98f5
Binary files /dev/null and b/_static/img/thumbnails/cropped/Introduction-to-TorchScript.png differ
diff --git a/_static/img/thumbnails/cropped/Language-Translation-with-TorchText.png b/_static/img/thumbnails/cropped/Language-Translation-with-TorchText.png
new file mode 100644
index 00000000000..9330c6cbdb5
Binary files /dev/null and b/_static/img/thumbnails/cropped/Language-Translation-with-TorchText.png differ
diff --git a/_static/img/thumbnails/cropped/Large-Scale-Transformer-model-training-with-Tensor-Parallel.png b/_static/img/thumbnails/cropped/Large-Scale-Transformer-model-training-with-Tensor-Parallel.png
new file mode 100644
index 00000000000..426a14d98f5
Binary files /dev/null and b/_static/img/thumbnails/cropped/Large-Scale-Transformer-model-training-with-Tensor-Parallel.png differ
diff --git a/_static/img/thumbnails/cropped/Loading-a-TorchScript-Model-in-Cpp.png b/_static/img/thumbnails/cropped/Loading-a-TorchScript-Model-in-Cpp.png
new file mode 100644
index 00000000000..426a14d98f5
Binary files /dev/null and b/_static/img/thumbnails/cropped/Loading-a-TorchScript-Model-in-Cpp.png differ
diff --git a/_static/img/thumbnails/cropped/Model-Parallel-Best-Practices.png b/_static/img/thumbnails/cropped/Model-Parallel-Best-Practices.png
new file mode 100644
index 00000000000..426a14d98f5
Binary files /dev/null and b/_static/img/thumbnails/cropped/Model-Parallel-Best-Practices.png differ
diff --git a/_static/img/thumbnails/cropped/NLP-From-Scratch-Classifying-Names-with-a-Character-Level-RNN.png b/_static/img/thumbnails/cropped/NLP-From-Scratch-Classifying-Names-with-a-Character-Level-RNN.png
new file mode 100644
index 00000000000..0aa02de9a53
Binary files /dev/null and b/_static/img/thumbnails/cropped/NLP-From-Scratch-Classifying-Names-with-a-Character-Level-RNN.png differ
diff --git a/_static/img/thumbnails/cropped/NLP-From-Scratch-Generating-Names-with-a-Character-Level-RNN.png b/_static/img/thumbnails/cropped/NLP-From-Scratch-Generating-Names-with-a-Character-Level-RNN.png
new file mode 100644
index 00000000000..a63d82ba4b4
Binary files /dev/null and b/_static/img/thumbnails/cropped/NLP-From-Scratch-Generating-Names-with-a-Character-Level-RNN.png differ
diff --git a/_static/img/thumbnails/cropped/NLP-From-Scratch-Translation-with-a-Sequence-to-Sequence-Network-and-Attention.png b/_static/img/thumbnails/cropped/NLP-From-Scratch-Translation-with-a-Sequence-to-Sequence-Network-and-Attention.png
new file mode 100644
index 00000000000..11d4f07c3bf
Binary files /dev/null and b/_static/img/thumbnails/cropped/NLP-From-Scratch-Translation-with-a-Sequence-to-Sequence-Network-and-Attention.png differ
diff --git a/_static/img/thumbnails/cropped/Pruning-Tutorial.png b/_static/img/thumbnails/cropped/Pruning-Tutorial.png
new file mode 100644
index 00000000000..32953c7ab19
Binary files /dev/null and b/_static/img/thumbnails/cropped/Pruning-Tutorial.png differ
diff --git a/_static/img/thumbnails/cropped/PyTorch-Distributed-Overview.png b/_static/img/thumbnails/cropped/PyTorch-Distributed-Overview.png
new file mode 100644
index 00000000000..426a14d98f5
Binary files /dev/null and b/_static/img/thumbnails/cropped/PyTorch-Distributed-Overview.png differ
diff --git a/_static/img/thumbnails/cropped/Sequence-to-Sequence-Modeling-with-nnTransformer-andTorchText.png b/_static/img/thumbnails/cropped/Sequence-to-Sequence-Modeling-with-nnTransformer-andTorchText.png
new file mode 100644
index 00000000000..00c4a236f24
Binary files /dev/null and b/_static/img/thumbnails/cropped/Sequence-to-Sequence-Modeling-with-nnTransformer-andTorchText.png differ
diff --git a/_static/img/thumbnails/cropped/TIAToolbox-Tutorial.png b/_static/img/thumbnails/cropped/TIAToolbox-Tutorial.png
new file mode 100644
index 00000000000..76f2bcaf4de
Binary files /dev/null and b/_static/img/thumbnails/cropped/TIAToolbox-Tutorial.png differ
diff --git a/_static/img/thumbnails/cropped/Text-Classification-with-TorchText.png b/_static/img/thumbnails/cropped/Text-Classification-with-TorchText.png
new file mode 100644
index 00000000000..e46aa333390
Binary files /dev/null and b/_static/img/thumbnails/cropped/Text-Classification-with-TorchText.png differ
diff --git a/_static/img/thumbnails/cropped/TorchScript-Parallelism.jpg b/_static/img/thumbnails/cropped/TorchScript-Parallelism.jpg
new file mode 100644
index 00000000000..237990a0460
Binary files /dev/null and b/_static/img/thumbnails/cropped/TorchScript-Parallelism.jpg differ
diff --git a/_static/img/thumbnails/cropped/TorchVision-Object-Detection-Finetuning-Tutorial.png b/_static/img/thumbnails/cropped/TorchVision-Object-Detection-Finetuning-Tutorial.png
new file mode 100644
index 00000000000..e79ff0d395e
Binary files /dev/null and b/_static/img/thumbnails/cropped/TorchVision-Object-Detection-Finetuning-Tutorial.png differ
diff --git a/_static/img/thumbnails/cropped/Training-Transformer-Models-using-Distributed-Data-Parallel-and-Pipeline-Parallelism.png b/_static/img/thumbnails/cropped/Training-Transformer-Models-using-Distributed-Data-Parallel-and-Pipeline-Parallelism.png
new file mode 100644
index 00000000000..426a14d98f5
Binary files /dev/null and b/_static/img/thumbnails/cropped/Training-Transformer-Models-using-Distributed-Data-Parallel-and-Pipeline-Parallelism.png differ
diff --git a/_static/img/thumbnails/cropped/Training-Transformer-models-using-Pipeline-Parallelism.png b/_static/img/thumbnails/cropped/Training-Transformer-models-using-Pipeline-Parallelism.png
new file mode 100644
index 00000000000..426a14d98f5
Binary files /dev/null and b/_static/img/thumbnails/cropped/Training-Transformer-models-using-Pipeline-Parallelism.png differ
diff --git a/_static/img/thumbnails/cropped/Transfer-Learning-for-Computer-Vision-Tutorial.png b/_static/img/thumbnails/cropped/Transfer-Learning-for-Computer-Vision-Tutorial.png
new file mode 100644
index 00000000000..029f0ff1bea
Binary files /dev/null and b/_static/img/thumbnails/cropped/Transfer-Learning-for-Computer-Vision-Tutorial.png differ
diff --git a/_static/img/thumbnails/cropped/Tutorials_Card_Template.psd b/_static/img/thumbnails/cropped/Tutorials_Card_Template.psd
new file mode 100644
index 00000000000..6caf48a5951
Binary files /dev/null and b/_static/img/thumbnails/cropped/Tutorials_Card_Template.psd differ
diff --git a/_static/img/thumbnails/cropped/Using-the-PyTorch-Cpp-Frontend.png b/_static/img/thumbnails/cropped/Using-the-PyTorch-Cpp-Frontend.png
new file mode 100644
index 00000000000..3aec75031ae
Binary files /dev/null and b/_static/img/thumbnails/cropped/Using-the-PyTorch-Cpp-Frontend.png differ
diff --git a/_static/img/thumbnails/cropped/Writing-Distributed-Applications-with-PyTorch.png b/_static/img/thumbnails/cropped/Writing-Distributed-Applications-with-PyTorch.png
new file mode 100644
index 00000000000..426a14d98f5
Binary files /dev/null and b/_static/img/thumbnails/cropped/Writing-Distributed-Applications-with-PyTorch.png differ
diff --git a/_static/img/thumbnails/cropped/advanced-PyTorch-1point0-Distributed-Trainer-with-Amazon-AWS.png b/_static/img/thumbnails/cropped/advanced-PyTorch-1point0-Distributed-Trainer-with-Amazon-AWS.png
new file mode 100644
index 00000000000..426a14d98f5
Binary files /dev/null and b/_static/img/thumbnails/cropped/advanced-PyTorch-1point0-Distributed-Trainer-with-Amazon-AWS.png differ
diff --git a/_static/img/thumbnails/cropped/amp.png b/_static/img/thumbnails/cropped/amp.png
new file mode 100644
index 00000000000..a6916ce5605
Binary files /dev/null and b/_static/img/thumbnails/cropped/amp.png differ
diff --git a/_static/img/thumbnails/cropped/android.png b/_static/img/thumbnails/cropped/android.png
new file mode 100644
index 00000000000..5c6079d9090
Binary files /dev/null and b/_static/img/thumbnails/cropped/android.png differ
diff --git a/_static/img/thumbnails/cropped/custom-datasets-transforms-and-dataloaders.png b/_static/img/thumbnails/cropped/custom-datasets-transforms-and-dataloaders.png
new file mode 100644
index 00000000000..5f73aa5663c
Binary files /dev/null and b/_static/img/thumbnails/cropped/custom-datasets-transforms-and-dataloaders.png differ
diff --git a/_static/img/thumbnails/cropped/defining-a-network.PNG b/_static/img/thumbnails/cropped/defining-a-network.PNG
new file mode 100644
index 00000000000..ded6a9ed583
Binary files /dev/null and b/_static/img/thumbnails/cropped/defining-a-network.PNG differ
diff --git a/_static/img/thumbnails/cropped/experimental-Channels-Last-Memory-Format-in-PyTorch.png b/_static/img/thumbnails/cropped/experimental-Channels-Last-Memory-Format-in-PyTorch.png
new file mode 100644
index 00000000000..18cbc1d0bc2
Binary files /dev/null and b/_static/img/thumbnails/cropped/experimental-Channels-Last-Memory-Format-in-PyTorch.png differ
diff --git a/_static/img/thumbnails/cropped/experimental-Introduction-to-Named-Tensors-in-PyTorch.png b/_static/img/thumbnails/cropped/experimental-Introduction-to-Named-Tensors-in-PyTorch.png
new file mode 100644
index 00000000000..d52414ec275
Binary files /dev/null and b/_static/img/thumbnails/cropped/experimental-Introduction-to-Named-Tensors-in-PyTorch.png differ
diff --git a/_static/img/thumbnails/cropped/experimental-Quantized-Transfer-Learning-for-Computer-Vision-Tutorial.png b/_static/img/thumbnails/cropped/experimental-Quantized-Transfer-Learning-for-Computer-Vision-Tutorial.png
new file mode 100644
index 00000000000..d826d8170c1
Binary files /dev/null and b/_static/img/thumbnails/cropped/experimental-Quantized-Transfer-Learning-for-Computer-Vision-Tutorial.png differ
diff --git a/_static/img/thumbnails/cropped/experimental-Static-Quantization-with-Eager-Mode-in-PyTorch.png b/_static/img/thumbnails/cropped/experimental-Static-Quantization-with-Eager-Mode-in-PyTorch.png
new file mode 100644
index 00000000000..d826d8170c1
Binary files /dev/null and b/_static/img/thumbnails/cropped/experimental-Static-Quantization-with-Eager-Mode-in-PyTorch.png differ
diff --git a/_static/img/thumbnails/cropped/generic-pytorch-logo.png b/_static/img/thumbnails/cropped/generic-pytorch-logo.png
new file mode 100644
index 00000000000..426a14d98f5
Binary files /dev/null and b/_static/img/thumbnails/cropped/generic-pytorch-logo.png differ
diff --git a/_static/img/thumbnails/cropped/ios.png b/_static/img/thumbnails/cropped/ios.png
new file mode 100644
index 00000000000..8c1d4a2b04d
Binary files /dev/null and b/_static/img/thumbnails/cropped/ios.png differ
diff --git a/_static/img/thumbnails/cropped/knowledge_distillation_pytorch_logo.png b/_static/img/thumbnails/cropped/knowledge_distillation_pytorch_logo.png
new file mode 100644
index 00000000000..3ce40781542
Binary files /dev/null and b/_static/img/thumbnails/cropped/knowledge_distillation_pytorch_logo.png differ
diff --git a/_static/img/thumbnails/cropped/learning-pytorch-with-examples.png b/_static/img/thumbnails/cropped/learning-pytorch-with-examples.png
new file mode 100644
index 00000000000..b292603835b
Binary files /dev/null and b/_static/img/thumbnails/cropped/learning-pytorch-with-examples.png differ
diff --git a/_static/img/thumbnails/cropped/loading-data-in-pytorch.png b/_static/img/thumbnails/cropped/loading-data-in-pytorch.png
new file mode 100644
index 00000000000..20309e32cf5
Binary files /dev/null and b/_static/img/thumbnails/cropped/loading-data-in-pytorch.png differ
diff --git a/_static/img/thumbnails/cropped/loading-data.PNG b/_static/img/thumbnails/cropped/loading-data.PNG
new file mode 100644
index 00000000000..0cb07e34e5e
Binary files /dev/null and b/_static/img/thumbnails/cropped/loading-data.PNG differ
diff --git a/_static/img/thumbnails/cropped/model-interpretability-using-captum.png b/_static/img/thumbnails/cropped/model-interpretability-using-captum.png
new file mode 100644
index 00000000000..2e531ae27c9
Binary files /dev/null and b/_static/img/thumbnails/cropped/model-interpretability-using-captum.png differ
diff --git a/_static/img/thumbnails/cropped/parametrizations.png b/_static/img/thumbnails/cropped/parametrizations.png
new file mode 100644
index 00000000000..426a14d98f5
Binary files /dev/null and b/_static/img/thumbnails/cropped/parametrizations.png differ
diff --git a/_static/img/thumbnails/cropped/profile.png b/_static/img/thumbnails/cropped/profile.png
new file mode 100644
index 00000000000..372db8bbe87
Binary files /dev/null and b/_static/img/thumbnails/cropped/profile.png differ
diff --git a/_static/img/thumbnails/cropped/profiler.png b/_static/img/thumbnails/cropped/profiler.png
new file mode 100644
index 00000000000..426a14d98f5
Binary files /dev/null and b/_static/img/thumbnails/cropped/profiler.png differ
diff --git a/_static/img/thumbnails/cropped/pytorch-logo.png b/_static/img/thumbnails/cropped/pytorch-logo.png
new file mode 100644
index 00000000000..426a14d98f5
Binary files /dev/null and b/_static/img/thumbnails/cropped/pytorch-logo.png differ
diff --git a/_static/img/thumbnails/cropped/realtime_rpi.png b/_static/img/thumbnails/cropped/realtime_rpi.png
new file mode 100644
index 00000000000..b233f3df3a1
Binary files /dev/null and b/_static/img/thumbnails/cropped/realtime_rpi.png differ
diff --git a/_static/img/thumbnails/cropped/saving-and-loading-general-checkpoint.PNG b/_static/img/thumbnails/cropped/saving-and-loading-general-checkpoint.PNG
new file mode 100644
index 00000000000..ba351430712
Binary files /dev/null and b/_static/img/thumbnails/cropped/saving-and-loading-general-checkpoint.PNG differ
diff --git a/_static/img/thumbnails/cropped/saving-and-loading-models-across-devices.PNG b/_static/img/thumbnails/cropped/saving-and-loading-models-across-devices.PNG
new file mode 100644
index 00000000000..a1c337928a1
Binary files /dev/null and b/_static/img/thumbnails/cropped/saving-and-loading-models-across-devices.PNG differ
diff --git a/_static/img/thumbnails/cropped/saving-and-loading-models-for-inference.PNG b/_static/img/thumbnails/cropped/saving-and-loading-models-for-inference.PNG
new file mode 100644
index 00000000000..b8075559c1d
Binary files /dev/null and b/_static/img/thumbnails/cropped/saving-and-loading-models-for-inference.PNG differ
diff --git a/_static/img/thumbnails/cropped/saving-multiple-models.PNG b/_static/img/thumbnails/cropped/saving-multiple-models.PNG
new file mode 100644
index 00000000000..2917cac557a
Binary files /dev/null and b/_static/img/thumbnails/cropped/saving-multiple-models.PNG differ
diff --git a/_static/img/thumbnails/cropped/torch-nn.png b/_static/img/thumbnails/cropped/torch-nn.png
new file mode 100644
index 00000000000..44a3e8dca1d
Binary files /dev/null and b/_static/img/thumbnails/cropped/torch-nn.png differ
diff --git a/_static/img/thumbnails/cropped/torch_text_logo.png b/_static/img/thumbnails/cropped/torch_text_logo.png
new file mode 100644
index 00000000000..3fe736d60e2
Binary files /dev/null and b/_static/img/thumbnails/cropped/torch_text_logo.png differ
diff --git a/_static/img/thumbnails/cropped/torchaudio-Tutorial.png b/_static/img/thumbnails/cropped/torchaudio-Tutorial.png
new file mode 100644
index 00000000000..c49aa17c02f
Binary files /dev/null and b/_static/img/thumbnails/cropped/torchaudio-Tutorial.png differ
diff --git a/_static/img/thumbnails/cropped/torchaudio-alignment.png b/_static/img/thumbnails/cropped/torchaudio-alignment.png
new file mode 100644
index 00000000000..d5a25f35219
Binary files /dev/null and b/_static/img/thumbnails/cropped/torchaudio-alignment.png differ
diff --git a/_static/img/thumbnails/cropped/torchaudio-asr.png b/_static/img/thumbnails/cropped/torchaudio-asr.png
new file mode 100644
index 00000000000..ff84f3ff3f1
Binary files /dev/null and b/_static/img/thumbnails/cropped/torchaudio-asr.png differ
diff --git a/_static/img/thumbnails/cropped/torchaudio-speech.png b/_static/img/thumbnails/cropped/torchaudio-speech.png
new file mode 100644
index 00000000000..c874a6bb482
Binary files /dev/null and b/_static/img/thumbnails/cropped/torchaudio-speech.png differ
diff --git a/_static/img/thumbnails/cropped/torchscript_overview.png b/_static/img/thumbnails/cropped/torchscript_overview.png
new file mode 100644
index 00000000000..63e599b1a80
Binary files /dev/null and b/_static/img/thumbnails/cropped/torchscript_overview.png differ
diff --git a/_static/img/thumbnails/cropped/understanding_leaf_vs_nonleaf.png b/_static/img/thumbnails/cropped/understanding_leaf_vs_nonleaf.png
new file mode 100644
index 00000000000..0590cf227d9
Binary files /dev/null and b/_static/img/thumbnails/cropped/understanding_leaf_vs_nonleaf.png differ
diff --git a/_static/img/thumbnails/cropped/using-flask-create-restful-api.png b/_static/img/thumbnails/cropped/using-flask-create-restful-api.png
new file mode 100644
index 00000000000..176c4de6d5b
Binary files /dev/null and b/_static/img/thumbnails/cropped/using-flask-create-restful-api.png differ
diff --git a/_static/img/thumbnails/cropped/visualizing-with-tensorboard.png b/_static/img/thumbnails/cropped/visualizing-with-tensorboard.png
new file mode 100644
index 00000000000..8fdecca65fe
Binary files /dev/null and b/_static/img/thumbnails/cropped/visualizing-with-tensorboard.png differ
diff --git a/_static/img/thumbnails/cropped/visualizing_gradients_tutorial.png b/_static/img/thumbnails/cropped/visualizing_gradients_tutorial.png
new file mode 100644
index 00000000000..6ff6d97f2e2
Binary files /dev/null and b/_static/img/thumbnails/cropped/visualizing_gradients_tutorial.png differ
diff --git a/_static/img/thumbnails/cropped/warmstarting-models.PNG b/_static/img/thumbnails/cropped/warmstarting-models.PNG
new file mode 100644
index 00000000000..385f2ab80c8
Binary files /dev/null and b/_static/img/thumbnails/cropped/warmstarting-models.PNG differ
diff --git a/_static/img/thumbnails/cropped/what-is-a-state-dict.PNG b/_static/img/thumbnails/cropped/what-is-a-state-dict.PNG
new file mode 100644
index 00000000000..b0eee89ad73
Binary files /dev/null and b/_static/img/thumbnails/cropped/what-is-a-state-dict.PNG differ
diff --git a/_static/img/thumbnails/cropped/zeroing-out-gradients.PNG b/_static/img/thumbnails/cropped/zeroing-out-gradients.PNG
new file mode 100644
index 00000000000..0f21b230abf
Binary files /dev/null and b/_static/img/thumbnails/cropped/zeroing-out-gradients.PNG differ
diff --git a/_static/img/thumbnails/custom_dataset.png b/_static/img/thumbnails/custom_dataset.png
new file mode 100644
index 00000000000..59a8993bc4a
Binary files /dev/null and b/_static/img/thumbnails/custom_dataset.png differ
diff --git a/_static/img/thumbnails/default.png b/_static/img/thumbnails/default.png
new file mode 100755
index 00000000000..cbc8e0f3567
Binary files /dev/null and b/_static/img/thumbnails/default.png differ
diff --git a/_static/img/thumbnails/defining_a_network.png b/_static/img/thumbnails/defining_a_network.png
new file mode 100644
index 00000000000..f0c0a940713
Binary files /dev/null and b/_static/img/thumbnails/defining_a_network.png differ
diff --git a/_static/img/thumbnails/examples.png b/_static/img/thumbnails/examples.png
new file mode 100755
index 00000000000..62d3ff1784d
Binary files /dev/null and b/_static/img/thumbnails/examples.png differ
diff --git a/_static/img/thumbnails/eye.png b/_static/img/thumbnails/eye.png
new file mode 100755
index 00000000000..6feec8d9558
Binary files /dev/null and b/_static/img/thumbnails/eye.png differ
diff --git a/_static/img/thumbnails/floppy.png b/_static/img/thumbnails/floppy.png
new file mode 100755
index 00000000000..ba3ad2c3533
Binary files /dev/null and b/_static/img/thumbnails/floppy.png differ
diff --git a/_static/img/thumbnails/german_to_english_translation.png b/_static/img/thumbnails/german_to_english_translation.png
new file mode 100644
index 00000000000..a3560c574a6
Binary files /dev/null and b/_static/img/thumbnails/german_to_english_translation.png differ
diff --git a/_static/img/thumbnails/landmarked_face2.png b/_static/img/thumbnails/landmarked_face2.png
new file mode 100755
index 00000000000..2faa58b37f2
Binary files /dev/null and b/_static/img/thumbnails/landmarked_face2.png differ
diff --git a/_static/img/thumbnails/pixelated-cat.png b/_static/img/thumbnails/pixelated-cat.png
new file mode 100644
index 00000000000..c3d527f0691
Binary files /dev/null and b/_static/img/thumbnails/pixelated-cat.png differ
diff --git a/_static/img/thumbnails/pytorch-logo-flat.png b/_static/img/thumbnails/pytorch-logo-flat.png
new file mode 100755
index 00000000000..d9ace0e0e74
Binary files /dev/null and b/_static/img/thumbnails/pytorch-logo-flat.png differ
diff --git a/_static/img/thumbnails/pytorch_tensorboard.png b/_static/img/thumbnails/pytorch_tensorboard.png
new file mode 100644
index 00000000000..2dce6a6e268
Binary files /dev/null and b/_static/img/thumbnails/pytorch_tensorboard.png differ
diff --git a/_static/img/thumbnails/sphx_glr_transfer_learning_tutorial_001.png b/_static/img/thumbnails/sphx_glr_transfer_learning_tutorial_001.png
new file mode 100755
index 00000000000..42372d51c89
Binary files /dev/null and b/_static/img/thumbnails/sphx_glr_transfer_learning_tutorial_001.png differ
diff --git a/_static/img/thumbnails/tensorboard_dev.png b/_static/img/thumbnails/tensorboard_dev.png
new file mode 100644
index 00000000000..056839a6359
Binary files /dev/null and b/_static/img/thumbnails/tensorboard_dev.png differ
diff --git a/_static/img/thumbnails/tensorboard_scalars.png b/_static/img/thumbnails/tensorboard_scalars.png
new file mode 100644
index 00000000000..ab6734ab3e9
Binary files /dev/null and b/_static/img/thumbnails/tensorboard_scalars.png differ
diff --git a/_static/img/thumbnails/torch-logo.png b/_static/img/thumbnails/torch-logo.png
new file mode 100755
index 00000000000..fbadc30fdac
Binary files /dev/null and b/_static/img/thumbnails/torch-logo.png differ
diff --git a/_static/img/thumbnails/torchrec.png b/_static/img/thumbnails/torchrec.png
new file mode 100644
index 00000000000..1304b56873e
Binary files /dev/null and b/_static/img/thumbnails/torchrec.png differ
diff --git a/_static/img/thumbnails/torchtext.png b/_static/img/thumbnails/torchtext.png
new file mode 100644
index 00000000000..ee4285aef34
Binary files /dev/null and b/_static/img/thumbnails/torchtext.png differ
diff --git a/_static/img/thumbnails/tv-img.png b/_static/img/thumbnails/tv-img.png
new file mode 100644
index 00000000000..c2b2417f3f8
Binary files /dev/null and b/_static/img/thumbnails/tv-img.png differ
diff --git a/images/torch-nn-vs-pytorch-nn.png b/_static/img/torch-nn-vs-pytorch-nn.png
old mode 100644
new mode 100755
similarity index 100%
rename from images/torch-nn-vs-pytorch-nn.png
rename to _static/img/torch-nn-vs-pytorch-nn.png
diff --git a/_static/img/torch.nn.png b/_static/img/torch.nn.png
new file mode 100644
index 00000000000..3af3b087fa5
Binary files /dev/null and b/_static/img/torch.nn.png differ
diff --git a/_static/img/torchscript.png b/_static/img/torchscript.png
new file mode 100644
index 00000000000..b748d45d231
Binary files /dev/null and b/_static/img/torchscript.png differ
diff --git a/_static/img/torchscript_to_cpp.png b/_static/img/torchscript_to_cpp.png
new file mode 100644
index 00000000000..579d65b00d4
Binary files /dev/null and b/_static/img/torchscript_to_cpp.png differ
diff --git a/_static/img/torchserve-ipex-images-2/1.png b/_static/img/torchserve-ipex-images-2/1.png
new file mode 100644
index 00000000000..4a9f488236b
Binary files /dev/null and b/_static/img/torchserve-ipex-images-2/1.png differ
diff --git a/_static/img/torchserve-ipex-images-2/10.png b/_static/img/torchserve-ipex-images-2/10.png
new file mode 100644
index 00000000000..d56f34600d8
Binary files /dev/null and b/_static/img/torchserve-ipex-images-2/10.png differ
diff --git a/_static/img/torchserve-ipex-images-2/11.png b/_static/img/torchserve-ipex-images-2/11.png
new file mode 100644
index 00000000000..8ebbcc03d51
Binary files /dev/null and b/_static/img/torchserve-ipex-images-2/11.png differ
diff --git a/_static/img/torchserve-ipex-images-2/12.png b/_static/img/torchserve-ipex-images-2/12.png
new file mode 100644
index 00000000000..23c4794ae36
Binary files /dev/null and b/_static/img/torchserve-ipex-images-2/12.png differ
diff --git a/_static/img/torchserve-ipex-images-2/13.png b/_static/img/torchserve-ipex-images-2/13.png
new file mode 100644
index 00000000000..4e1dc6e1a03
Binary files /dev/null and b/_static/img/torchserve-ipex-images-2/13.png differ
diff --git a/_static/img/torchserve-ipex-images-2/14.png b/_static/img/torchserve-ipex-images-2/14.png
new file mode 100644
index 00000000000..701399e9d9b
Binary files /dev/null and b/_static/img/torchserve-ipex-images-2/14.png differ
diff --git a/_static/img/torchserve-ipex-images-2/15.png b/_static/img/torchserve-ipex-images-2/15.png
new file mode 100644
index 00000000000..b345a9d0d8c
Binary files /dev/null and b/_static/img/torchserve-ipex-images-2/15.png differ
diff --git a/_static/img/torchserve-ipex-images-2/16.png b/_static/img/torchserve-ipex-images-2/16.png
new file mode 100644
index 00000000000..39b5d6afb9c
Binary files /dev/null and b/_static/img/torchserve-ipex-images-2/16.png differ
diff --git a/_static/img/torchserve-ipex-images-2/17.png b/_static/img/torchserve-ipex-images-2/17.png
new file mode 100644
index 00000000000..bb7359bcbe6
Binary files /dev/null and b/_static/img/torchserve-ipex-images-2/17.png differ
diff --git a/_static/img/torchserve-ipex-images-2/18.png b/_static/img/torchserve-ipex-images-2/18.png
new file mode 100644
index 00000000000..30ad817a561
Binary files /dev/null and b/_static/img/torchserve-ipex-images-2/18.png differ
diff --git a/_static/img/torchserve-ipex-images-2/19.png b/_static/img/torchserve-ipex-images-2/19.png
new file mode 100644
index 00000000000..353bfb897a1
Binary files /dev/null and b/_static/img/torchserve-ipex-images-2/19.png differ
diff --git a/_static/img/torchserve-ipex-images-2/2.png b/_static/img/torchserve-ipex-images-2/2.png
new file mode 100644
index 00000000000..d7d351a3e74
Binary files /dev/null and b/_static/img/torchserve-ipex-images-2/2.png differ
diff --git a/_static/img/torchserve-ipex-images-2/20.png b/_static/img/torchserve-ipex-images-2/20.png
new file mode 100644
index 00000000000..aa94ff57dce
Binary files /dev/null and b/_static/img/torchserve-ipex-images-2/20.png differ
diff --git a/_static/img/torchserve-ipex-images-2/21.png b/_static/img/torchserve-ipex-images-2/21.png
new file mode 100644
index 00000000000..c714adc1453
Binary files /dev/null and b/_static/img/torchserve-ipex-images-2/21.png differ
diff --git a/_static/img/torchserve-ipex-images-2/22.png b/_static/img/torchserve-ipex-images-2/22.png
new file mode 100644
index 00000000000..fa7ae84c702
Binary files /dev/null and b/_static/img/torchserve-ipex-images-2/22.png differ
diff --git a/_static/img/torchserve-ipex-images-2/23.png b/_static/img/torchserve-ipex-images-2/23.png
new file mode 100644
index 00000000000..fd8a1bf8389
Binary files /dev/null and b/_static/img/torchserve-ipex-images-2/23.png differ
diff --git a/_static/img/torchserve-ipex-images-2/24.png b/_static/img/torchserve-ipex-images-2/24.png
new file mode 100644
index 00000000000..6ba858f98f0
Binary files /dev/null and b/_static/img/torchserve-ipex-images-2/24.png differ
diff --git a/_static/img/torchserve-ipex-images-2/3.png b/_static/img/torchserve-ipex-images-2/3.png
new file mode 100644
index 00000000000..6ae485bd132
Binary files /dev/null and b/_static/img/torchserve-ipex-images-2/3.png differ
diff --git a/_static/img/torchserve-ipex-images-2/4.png b/_static/img/torchserve-ipex-images-2/4.png
new file mode 100644
index 00000000000..b0fa5e68133
Binary files /dev/null and b/_static/img/torchserve-ipex-images-2/4.png differ
diff --git a/_static/img/torchserve-ipex-images-2/5.png b/_static/img/torchserve-ipex-images-2/5.png
new file mode 100644
index 00000000000..25adc177ad1
Binary files /dev/null and b/_static/img/torchserve-ipex-images-2/5.png differ
diff --git a/_static/img/torchserve-ipex-images-2/6.png b/_static/img/torchserve-ipex-images-2/6.png
new file mode 100644
index 00000000000..739d3b388d3
Binary files /dev/null and b/_static/img/torchserve-ipex-images-2/6.png differ
diff --git a/_static/img/torchserve-ipex-images-2/7.png b/_static/img/torchserve-ipex-images-2/7.png
new file mode 100644
index 00000000000..77765616d65
Binary files /dev/null and b/_static/img/torchserve-ipex-images-2/7.png differ
diff --git a/_static/img/torchserve-ipex-images-2/8.png b/_static/img/torchserve-ipex-images-2/8.png
new file mode 100644
index 00000000000..b731676cc21
Binary files /dev/null and b/_static/img/torchserve-ipex-images-2/8.png differ
diff --git a/_static/img/torchserve-ipex-images-2/9.png b/_static/img/torchserve-ipex-images-2/9.png
new file mode 100644
index 00000000000..9155201ab3c
Binary files /dev/null and b/_static/img/torchserve-ipex-images-2/9.png differ
diff --git a/_static/img/torchserve-ipex-images/1.png b/_static/img/torchserve-ipex-images/1.png
new file mode 100644
index 00000000000..fc8748b22a5
Binary files /dev/null and b/_static/img/torchserve-ipex-images/1.png differ
diff --git a/_static/img/torchserve-ipex-images/10.png b/_static/img/torchserve-ipex-images/10.png
new file mode 100644
index 00000000000..833a1bb7cf9
Binary files /dev/null and b/_static/img/torchserve-ipex-images/10.png differ
diff --git a/_static/img/torchserve-ipex-images/11.gif b/_static/img/torchserve-ipex-images/11.gif
new file mode 100644
index 00000000000..1c1a2644e8e
Binary files /dev/null and b/_static/img/torchserve-ipex-images/11.gif differ
diff --git a/_static/img/torchserve-ipex-images/12.png b/_static/img/torchserve-ipex-images/12.png
new file mode 100644
index 00000000000..b55968fd705
Binary files /dev/null and b/_static/img/torchserve-ipex-images/12.png differ
diff --git a/_static/img/torchserve-ipex-images/13.png b/_static/img/torchserve-ipex-images/13.png
new file mode 100644
index 00000000000..de9c08814e6
Binary files /dev/null and b/_static/img/torchserve-ipex-images/13.png differ
diff --git a/_static/img/torchserve-ipex-images/14.png b/_static/img/torchserve-ipex-images/14.png
new file mode 100644
index 00000000000..4d776d81647
Binary files /dev/null and b/_static/img/torchserve-ipex-images/14.png differ
diff --git a/_static/img/torchserve-ipex-images/15.png b/_static/img/torchserve-ipex-images/15.png
new file mode 100644
index 00000000000..513ccf8e053
Binary files /dev/null and b/_static/img/torchserve-ipex-images/15.png differ
diff --git a/_static/img/torchserve-ipex-images/16.png b/_static/img/torchserve-ipex-images/16.png
new file mode 100644
index 00000000000..3670d0a1dc4
Binary files /dev/null and b/_static/img/torchserve-ipex-images/16.png differ
diff --git a/_static/img/torchserve-ipex-images/17.png b/_static/img/torchserve-ipex-images/17.png
new file mode 100644
index 00000000000..5ab17373c95
Binary files /dev/null and b/_static/img/torchserve-ipex-images/17.png differ
diff --git a/_static/img/torchserve-ipex-images/18.png b/_static/img/torchserve-ipex-images/18.png
new file mode 100644
index 00000000000..50304884d3e
Binary files /dev/null and b/_static/img/torchserve-ipex-images/18.png differ
diff --git a/_static/img/torchserve-ipex-images/19.png b/_static/img/torchserve-ipex-images/19.png
new file mode 100644
index 00000000000..b123480530e
Binary files /dev/null and b/_static/img/torchserve-ipex-images/19.png differ
diff --git a/_static/img/torchserve-ipex-images/1_.png b/_static/img/torchserve-ipex-images/1_.png
new file mode 100644
index 00000000000..fc8748b22a5
Binary files /dev/null and b/_static/img/torchserve-ipex-images/1_.png differ
diff --git a/_static/img/torchserve-ipex-images/2.png b/_static/img/torchserve-ipex-images/2.png
new file mode 100644
index 00000000000..27633f25bcb
Binary files /dev/null and b/_static/img/torchserve-ipex-images/2.png differ
diff --git a/_static/img/torchserve-ipex-images/20.gif b/_static/img/torchserve-ipex-images/20.gif
new file mode 100644
index 00000000000..ba8e9e95315
Binary files /dev/null and b/_static/img/torchserve-ipex-images/20.gif differ
diff --git a/_static/img/torchserve-ipex-images/21.png b/_static/img/torchserve-ipex-images/21.png
new file mode 100644
index 00000000000..04b3ca622bf
Binary files /dev/null and b/_static/img/torchserve-ipex-images/21.png differ
diff --git a/_static/img/torchserve-ipex-images/22.png b/_static/img/torchserve-ipex-images/22.png
new file mode 100644
index 00000000000..cbb2c269a90
Binary files /dev/null and b/_static/img/torchserve-ipex-images/22.png differ
diff --git a/_static/img/torchserve-ipex-images/23.png b/_static/img/torchserve-ipex-images/23.png
new file mode 100644
index 00000000000..c9bc44463f3
Binary files /dev/null and b/_static/img/torchserve-ipex-images/23.png differ
diff --git a/_static/img/torchserve-ipex-images/24.png b/_static/img/torchserve-ipex-images/24.png
new file mode 100644
index 00000000000..8b5718c30f3
Binary files /dev/null and b/_static/img/torchserve-ipex-images/24.png differ
diff --git a/_static/img/torchserve-ipex-images/25.png b/_static/img/torchserve-ipex-images/25.png
new file mode 100644
index 00000000000..4de920e632b
Binary files /dev/null and b/_static/img/torchserve-ipex-images/25.png differ
diff --git a/_static/img/torchserve-ipex-images/26.gif b/_static/img/torchserve-ipex-images/26.gif
new file mode 100644
index 00000000000..60a5a64ad15
Binary files /dev/null and b/_static/img/torchserve-ipex-images/26.gif differ
diff --git a/_static/img/torchserve-ipex-images/27.png b/_static/img/torchserve-ipex-images/27.png
new file mode 100644
index 00000000000..c7e766155f5
Binary files /dev/null and b/_static/img/torchserve-ipex-images/27.png differ
diff --git a/_static/img/torchserve-ipex-images/28.png b/_static/img/torchserve-ipex-images/28.png
new file mode 100644
index 00000000000..b7056c4c4ac
Binary files /dev/null and b/_static/img/torchserve-ipex-images/28.png differ
diff --git a/_static/img/torchserve-ipex-images/29.png b/_static/img/torchserve-ipex-images/29.png
new file mode 100644
index 00000000000..9dcd8735111
Binary files /dev/null and b/_static/img/torchserve-ipex-images/29.png differ
diff --git a/_static/img/torchserve-ipex-images/3.png b/_static/img/torchserve-ipex-images/3.png
new file mode 100644
index 00000000000..2309071571c
Binary files /dev/null and b/_static/img/torchserve-ipex-images/3.png differ
diff --git a/_static/img/torchserve-ipex-images/30.png b/_static/img/torchserve-ipex-images/30.png
new file mode 100644
index 00000000000..96b07ec7205
Binary files /dev/null and b/_static/img/torchserve-ipex-images/30.png differ
diff --git a/_static/img/torchserve-ipex-images/31.png b/_static/img/torchserve-ipex-images/31.png
new file mode 100644
index 00000000000..601b63e511d
Binary files /dev/null and b/_static/img/torchserve-ipex-images/31.png differ
diff --git a/_static/img/torchserve-ipex-images/4.png b/_static/img/torchserve-ipex-images/4.png
new file mode 100644
index 00000000000..f12d8c7cc40
Binary files /dev/null and b/_static/img/torchserve-ipex-images/4.png differ
diff --git a/_static/img/torchserve-ipex-images/5.png b/_static/img/torchserve-ipex-images/5.png
new file mode 100644
index 00000000000..55e05e5e53c
Binary files /dev/null and b/_static/img/torchserve-ipex-images/5.png differ
diff --git a/_static/img/torchserve-ipex-images/6.png b/_static/img/torchserve-ipex-images/6.png
new file mode 100644
index 00000000000..59a028f94b9
Binary files /dev/null and b/_static/img/torchserve-ipex-images/6.png differ
diff --git a/_static/img/torchserve-ipex-images/7.png b/_static/img/torchserve-ipex-images/7.png
new file mode 100644
index 00000000000..5739cb4f53a
Binary files /dev/null and b/_static/img/torchserve-ipex-images/7.png differ
diff --git a/_static/img/torchserve-ipex-images/8.png b/_static/img/torchserve-ipex-images/8.png
new file mode 100644
index 00000000000..1e6531b6cab
Binary files /dev/null and b/_static/img/torchserve-ipex-images/8.png differ
diff --git a/_static/img/torchserve-ipex-images/9.gif b/_static/img/torchserve-ipex-images/9.gif
new file mode 100644
index 00000000000..682e2f3425e
Binary files /dev/null and b/_static/img/torchserve-ipex-images/9.gif differ
diff --git a/_static/img/trace_img.png b/_static/img/trace_img.png
new file mode 100644
index 00000000000..8c540ceb519
Binary files /dev/null and b/_static/img/trace_img.png differ
diff --git a/_static/img/trace_xpu_img.png b/_static/img/trace_xpu_img.png
new file mode 100644
index 00000000000..2eca0a78cb6
Binary files /dev/null and b/_static/img/trace_xpu_img.png differ
diff --git a/_static/img/transformer_architecture.jpg b/_static/img/transformer_architecture.jpg
new file mode 100644
index 00000000000..4188fae7c85
Binary files /dev/null and b/_static/img/transformer_architecture.jpg differ
diff --git a/_static/img/transformer_input_target.png b/_static/img/transformer_input_target.png
new file mode 100644
index 00000000000..02e87174762
Binary files /dev/null and b/_static/img/transformer_input_target.png differ
diff --git a/_static/img/tts_pipeline.png b/_static/img/tts_pipeline.png
new file mode 100644
index 00000000000..5dc37ae9ddd
Binary files /dev/null and b/_static/img/tts_pipeline.png differ
diff --git a/_static/img/tv_tutorial/tv_image03.png b/_static/img/tv_tutorial/tv_image03.png
new file mode 100644
index 00000000000..54878b15dde
Binary files /dev/null and b/_static/img/tv_tutorial/tv_image03.png differ
diff --git a/_static/img/tv_tutorial/tv_image04.png b/_static/img/tv_tutorial/tv_image04.png
new file mode 100644
index 00000000000..229bf711329
Binary files /dev/null and b/_static/img/tv_tutorial/tv_image04.png differ
diff --git a/_static/img/understanding_leaf_vs_nonleaf/comp-graph-1.png b/_static/img/understanding_leaf_vs_nonleaf/comp-graph-1.png
new file mode 100644
index 00000000000..1fa3d80d339
Binary files /dev/null and b/_static/img/understanding_leaf_vs_nonleaf/comp-graph-1.png differ
diff --git a/_static/img/understanding_leaf_vs_nonleaf/comp-graph-2.png b/_static/img/understanding_leaf_vs_nonleaf/comp-graph-2.png
new file mode 100644
index 00000000000..3f76deab3bf
Binary files /dev/null and b/_static/img/understanding_leaf_vs_nonleaf/comp-graph-2.png differ
diff --git a/_static/img/usb_semisup_learn/code.png b/_static/img/usb_semisup_learn/code.png
new file mode 100644
index 00000000000..fdc7b798a37
Binary files /dev/null and b/_static/img/usb_semisup_learn/code.png differ
diff --git a/_static/minus.png b/_static/minus.png
new file mode 100755
index 00000000000..d96755fdaf8
Binary files /dev/null and b/_static/minus.png differ
diff --git a/_static/mnist.pkl.gz b/_static/mnist.pkl.gz
new file mode 100644
index 00000000000..6a739549cc6
Binary files /dev/null and b/_static/mnist.pkl.gz differ
diff --git a/_static/no_image.png b/_static/no_image.png
new file mode 100755
index 00000000000..8c2d48d5d3f
Binary files /dev/null and b/_static/no_image.png differ
diff --git a/_static/pencil-16.png b/_static/pencil-16.png
new file mode 100644
index 00000000000..6a4f1cf688e
Binary files /dev/null and b/_static/pencil-16.png differ
diff --git a/_static/plus.png b/_static/plus.png
new file mode 100755
index 00000000000..7107cec93a9
Binary files /dev/null and b/_static/plus.png differ
diff --git a/_static/pygments.css b/_static/pygments.css
new file mode 100755
index 00000000000..20c4814dcf0
--- /dev/null
+++ b/_static/pygments.css
@@ -0,0 +1,69 @@
+.highlight .hll { background-color: #ffffcc }
+.highlight { background: #eeffcc; }
+.highlight .c { color: #408090; font-style: italic } /* Comment */
+.highlight .err { border: 1px solid #FF0000 } /* Error */
+.highlight .k { color: #007020; font-weight: bold } /* Keyword */
+.highlight .o { color: #666666 } /* Operator */
+.highlight .ch { color: #408090; font-style: italic } /* Comment.Hashbang */
+.highlight .cm { color: #408090; font-style: italic } /* Comment.Multiline */
+.highlight .cp { color: #007020 } /* Comment.Preproc */
+.highlight .cpf { color: #408090; font-style: italic } /* Comment.PreprocFile */
+.highlight .c1 { color: #408090; font-style: italic } /* Comment.Single */
+.highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */
+.highlight .gd { color: #A00000 } /* Generic.Deleted */
+.highlight .ge { font-style: italic } /* Generic.Emph */
+.highlight .gr { color: #FF0000 } /* Generic.Error */
+.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
+.highlight .gi { color: #00A000 } /* Generic.Inserted */
+.highlight .go { color: #333333 } /* Generic.Output */
+.highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */
+.highlight .gs { font-weight: bold } /* Generic.Strong */
+.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
+.highlight .gt { color: #0044DD } /* Generic.Traceback */
+.highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */
+.highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */
+.highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */
+.highlight .kp { color: #007020 } /* Keyword.Pseudo */
+.highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */
+.highlight .kt { color: #902000 } /* Keyword.Type */
+.highlight .m { color: #208050 } /* Literal.Number */
+.highlight .s { color: #4070a0 } /* Literal.String */
+.highlight .na { color: #4070a0 } /* Name.Attribute */
+.highlight .nb { color: #007020 } /* Name.Builtin */
+.highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */
+.highlight .no { color: #60add5 } /* Name.Constant */
+.highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */
+.highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */
+.highlight .ne { color: #007020 } /* Name.Exception */
+.highlight .nf { color: #06287e } /* Name.Function */
+.highlight .nl { color: #002070; font-weight: bold } /* Name.Label */
+.highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */
+.highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */
+.highlight .nv { color: #bb60d5 } /* Name.Variable */
+.highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */
+.highlight .w { color: #bbbbbb } /* Text.Whitespace */
+.highlight .mb { color: #208050 } /* Literal.Number.Bin */
+.highlight .mf { color: #208050 } /* Literal.Number.Float */
+.highlight .mh { color: #208050 } /* Literal.Number.Hex */
+.highlight .mi { color: #208050 } /* Literal.Number.Integer */
+.highlight .mo { color: #208050 } /* Literal.Number.Oct */
+.highlight .sa { color: #4070a0 } /* Literal.String.Affix */
+.highlight .sb { color: #4070a0 } /* Literal.String.Backtick */
+.highlight .sc { color: #4070a0 } /* Literal.String.Char */
+.highlight .dl { color: #4070a0 } /* Literal.String.Delimiter */
+.highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */
+.highlight .s2 { color: #4070a0 } /* Literal.String.Double */
+.highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */
+.highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */
+.highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */
+.highlight .sx { color: #c65d09 } /* Literal.String.Other */
+.highlight .sr { color: #235388 } /* Literal.String.Regex */
+.highlight .s1 { color: #4070a0 } /* Literal.String.Single */
+.highlight .ss { color: #517918 } /* Literal.String.Symbol */
+.highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */
+.highlight .fm { color: #06287e } /* Name.Function.Magic */
+.highlight .vc { color: #bb60d5 } /* Name.Variable.Class */
+.highlight .vg { color: #bb60d5 } /* Name.Variable.Global */
+.highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */
+.highlight .vm { color: #bb60d5 } /* Name.Variable.Magic */
+.highlight .il { color: #208050 } /* Literal.Number.Integer.Long */
\ No newline at end of file
diff --git a/_static/pytorch-logo-dark.svg b/_static/pytorch-logo-dark.svg
new file mode 100755
index 00000000000..5e530003858
--- /dev/null
+++ b/_static/pytorch-logo-dark.svg
@@ -0,0 +1,33 @@
+
+
+
diff --git a/_static/up-pressed.png b/_static/up-pressed.png
new file mode 100755
index 00000000000..acee3b68efb
Binary files /dev/null and b/_static/up-pressed.png differ
diff --git a/_static/up.png b/_static/up.png
new file mode 100755
index 00000000000..2a940a7da7c
Binary files /dev/null and b/_static/up.png differ
diff --git a/advanced_source/ONNXLive.rst b/advanced_source/ONNXLive.rst
new file mode 100644
index 00000000000..7177522c968
--- /dev/null
+++ b/advanced_source/ONNXLive.rst
@@ -0,0 +1,12 @@
+
+ONNX Live Tutorial
+==================
+
+This tutorial has been deprecated.
+
+Redirecting in 3 seconds...
+
+
+.. raw:: html
+
+
diff --git a/advanced_source/README.txt b/advanced_source/README.txt
new file mode 100644
index 00000000000..56f01688089
--- /dev/null
+++ b/advanced_source/README.txt
@@ -0,0 +1,10 @@
+Advanced Tutorials
+------------------
+
+1. neural_style_tutorial.py
+ Neural Transfer with PyTorch
+ https://pytorch.org/tutorials/advanced/neural_style_tutorial.html
+
+2. numpy_extensions_tutorial.py
+ Creating Extensions Using numpy and scipy
+ https://pytorch.org/tutorials/advanced/numpy_extensions_tutorial.html
diff --git a/advanced_source/coding_ddpg.py b/advanced_source/coding_ddpg.py
new file mode 100644
index 00000000000..90ea4565dab
--- /dev/null
+++ b/advanced_source/coding_ddpg.py
@@ -0,0 +1,1220 @@
+# -*- coding: utf-8 -*-
+"""
+TorchRL objectives: Coding a DDPG loss
+======================================
+**Author**: `Vincent Moens `_
+
+"""
+
+##############################################################################
+# Overview
+# --------
+#
+# TorchRL separates the training of RL algorithms in various pieces that will be
+# assembled in your training script: the environment, the data collection and
+# storage, the model and finally the loss function.
+#
+# TorchRL losses (or "objectives") are stateful objects that contain the
+# trainable parameters (policy and value models).
+# This tutorial will guide you through the steps to code a loss from the ground up
+# using TorchRL.
+#
+# To this aim, we will be focusing on DDPG, which is a relatively straightforward
+# algorithm to code.
+# `Deep Deterministic Policy Gradient `_ (DDPG)
+# is a simple continuous control algorithm. It consists in learning a
+# parametric value function for an action-observation pair, and
+# then learning a policy that outputs actions that maximize this value
+# function given a certain observation.
+#
+# What you will learn:
+#
+# - how to write a loss module and customize its value estimator;
+# - how to build an environment in TorchRL, including transforms
+# (for example, data normalization) and parallel execution;
+# - how to design a policy and value network;
+# - how to collect data from your environment efficiently and store them
+# in a replay buffer;
+# - how to store trajectories (and not transitions) in your replay buffer);
+# - how to evaluate your model.
+#
+# Prerequisites
+# ~~~~~~~~~~~~~
+#
+# This tutorial assumes that you have completed the
+# `PPO tutorial `_ which gives
+# an overview of the TorchRL components and dependencies, such as
+# :class:`tensordict.TensorDict` and :class:`tensordict.nn.TensorDictModules`,
+# although it should be
+# sufficiently transparent to be understood without a deep understanding of
+# these classes.
+#
+# .. note::
+# We do not aim at giving a SOTA implementation of the algorithm, but rather
+# to provide a high-level illustration of TorchRL's loss implementations
+# and the library features that are to be used in the context of
+# this algorithm.
+#
+# Imports and setup
+# -----------------
+#
+# .. code-block:: bash
+#
+# %%bash
+# pip3 install torchrl mujoco glfw
+
+# sphinx_gallery_start_ignore
+import warnings
+
+warnings.filterwarnings("ignore")
+from torch import multiprocessing
+
+# TorchRL prefers spawn method, that restricts creation of ``~torchrl.envs.ParallelEnv`` inside
+# `__main__` method call, but for the easy of reading the code switch to fork
+# which is also a default spawn method in Google's Colaboratory
+try:
+ multiprocessing.set_start_method("fork")
+except RuntimeError:
+ pass
+
+# sphinx_gallery_end_ignore
+
+
+import torch
+import tqdm
+
+
+###############################################################################
+# We will execute the policy on CUDA if available
+is_fork = multiprocessing.get_start_method() == "fork"
+device = (
+ torch.device(0)
+ if torch.cuda.is_available() and not is_fork
+ else torch.device("cpu")
+)
+collector_device = torch.device("cpu") # Change the device to ``cuda`` to use CUDA
+
+###############################################################################
+# TorchRL :class:`~torchrl.objectives.LossModule`
+# -----------------------------------------------
+#
+# TorchRL provides a series of losses to use in your training scripts.
+# The aim is to have losses that are easily reusable/swappable and that have
+# a simple signature.
+#
+# The main characteristics of TorchRL losses are:
+#
+# - They are stateful objects: they contain a copy of the trainable parameters
+# such that ``loss_module.parameters()`` gives whatever is needed to train the
+# algorithm.
+# - They follow the ``TensorDict`` convention: the :meth:`torch.nn.Module.forward`
+# method will receive a TensorDict as input that contains all the necessary
+# information to return a loss value.
+#
+# >>> data = replay_buffer.sample()
+# >>> loss_dict = loss_module(data)
+#
+# - They output a :class:`tensordict.TensorDict` instance with the loss values
+# written under a ``"loss_"`` where ``smth`` is a string describing the
+# loss. Additional keys in the ``TensorDict`` may be useful metrics to log during
+# training time.
+#
+# .. note::
+# The reason we return independent losses is to let the user use a different
+# optimizer for different sets of parameters for instance. Summing the losses
+# can be simply done via
+#
+# >>> loss_val = sum(loss for key, loss in loss_dict.items() if key.startswith("loss_"))
+#
+# The ``__init__`` method
+# ~~~~~~~~~~~~~~~~~~~~~~~
+#
+# The parent class of all losses is :class:`~torchrl.objectives.LossModule`.
+# As many other components of the library, its :meth:`~torchrl.objectives.LossModule.forward` method expects
+# as input a :class:`tensordict.TensorDict` instance sampled from an experience
+# replay buffer, or any similar data structure. Using this format makes it
+# possible to re-use the module across
+# modalities, or in complex settings where the model needs to read multiple
+# entries for instance. In other words, it allows us to code a loss module that
+# is oblivious to the data type that is being given to is and that focuses on
+# running the elementary steps of the loss function and only those.
+#
+# To keep the tutorial as didactic as we can, we'll be displaying each method
+# of the class independently and we'll be populating the class at a later
+# stage.
+#
+# Let us start with the :meth:`~torchrl.objectives.LossModule.__init__`
+# method. DDPG aims at solving a control task with a simple strategy:
+# training a policy to output actions that maximize the value predicted by
+# a value network. Hence, our loss module needs to receive two networks in its
+# constructor: an actor and a value networks. We expect both of these to be
+# TensorDict-compatible objects, such as
+# :class:`tensordict.nn.TensorDictModule`.
+# Our loss function will need to compute a target value and fit the value
+# network to this, and generate an action and fit the policy such that its
+# value estimate is maximized.
+#
+# The crucial step of the :meth:`LossModule.__init__` method is the call to
+# :meth:`~torchrl.LossModule.convert_to_functional`. This method will extract
+# the parameters from the module and convert it to a functional module.
+# Strictly speaking, this is not necessary and one may perfectly code all
+# the losses without it. However, we encourage its usage for the following
+# reason.
+#
+# The reason TorchRL does this is that RL algorithms often execute the same
+# model with different sets of parameters, called "trainable" and "target"
+# parameters.
+# The "trainable" parameters are those that the optimizer needs to fit. The
+# "target" parameters are usually a copy of the former's with some time lag
+# (absolute or diluted through a moving average).
+# These target parameters are used to compute the value associated with the
+# next observation. One the advantages of using a set of target parameters
+# for the value model that do not match exactly the current configuration is
+# that they provide a pessimistic bound on the value function being computed.
+# Pay attention to the ``create_target_params`` keyword argument below: this
+# argument tells the :meth:`~torchrl.objectives.LossModule.convert_to_functional`
+# method to create a set of target parameters in the loss module to be used
+# for target value computation. If this is set to ``False`` (see the actor network
+# for instance) the ``target_actor_network_params`` attribute will still be
+# accessible but this will just return a **detached** version of the
+# actor parameters.
+#
+# Later, we will see how the target parameters should be updated in TorchRL.
+#
+
+from tensordict.nn import TensorDictModule, TensorDictSequential
+
+
+def _init(
+ self,
+ actor_network: TensorDictModule,
+ value_network: TensorDictModule,
+) -> None:
+ super(type(self), self).__init__()
+
+ self.convert_to_functional(
+ actor_network,
+ "actor_network",
+ create_target_params=True,
+ )
+ self.convert_to_functional(
+ value_network,
+ "value_network",
+ create_target_params=True,
+ compare_against=list(actor_network.parameters()),
+ )
+
+ self.actor_in_keys = actor_network.in_keys
+
+ # Since the value we'll be using is based on the actor and value network,
+ # we put them together in a single actor-critic container.
+ actor_critic = ActorCriticWrapper(actor_network, value_network)
+ self.actor_critic = actor_critic
+ self.loss_function = "l2"
+
+
+###############################################################################
+# The value estimator loss method
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# In many RL algorithm, the value network (or Q-value network) is trained based
+# on an empirical value estimate. This can be bootstrapped (TD(0), low
+# variance, high bias), meaning
+# that the target value is obtained using the next reward and nothing else, or
+# a Monte-Carlo estimate can be obtained (TD(1)) in which case the whole
+# sequence of upcoming rewards will be used (high variance, low bias). An
+# intermediate estimator (TD(:math:`\lambda`)) can also be used to compromise
+# bias and variance.
+# TorchRL makes it easy to use one or the other estimator via the
+# :class:`~torchrl.objectives.utils.ValueEstimators` Enum class, which contains
+# pointers to all the value estimators implemented. Let us define the default
+# value function here. We will take the simplest version (TD(0)), and show later
+# on how this can be changed.
+
+from torchrl.objectives.utils import ValueEstimators
+
+default_value_estimator = ValueEstimators.TD0
+
+###############################################################################
+# We also need to give some instructions to DDPG on how to build the value
+# estimator, depending on the user query. Depending on the estimator provided,
+# we will build the corresponding module to be used at train time:
+
+from torchrl.objectives.utils import default_value_kwargs
+from torchrl.objectives.value import TD0Estimator, TD1Estimator, TDLambdaEstimator
+
+
+def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
+ hp = dict(default_value_kwargs(value_type))
+ if hasattr(self, "gamma"):
+ hp["gamma"] = self.gamma
+ hp.update(hyperparams)
+ value_key = "state_action_value"
+ if value_type == ValueEstimators.TD1:
+ self._value_estimator = TD1Estimator(value_network=self.actor_critic, **hp)
+ elif value_type == ValueEstimators.TD0:
+ self._value_estimator = TD0Estimator(value_network=self.actor_critic, **hp)
+ elif value_type == ValueEstimators.GAE:
+ raise NotImplementedError(
+ f"Value type {value_type} it not implemented for loss {type(self)}."
+ )
+ elif value_type == ValueEstimators.TDLambda:
+ self._value_estimator = TDLambdaEstimator(value_network=self.actor_critic, **hp)
+ else:
+ raise NotImplementedError(f"Unknown value type {value_type}")
+ self._value_estimator.set_keys(value=value_key)
+
+
+###############################################################################
+# The ``make_value_estimator`` method can but does not need to be called: if
+# not, the :class:`~torchrl.objectives.LossModule` will query this method with
+# its default estimator.
+#
+# The actor loss method
+# ~~~~~~~~~~~~~~~~~~~~~
+#
+# The central piece of an RL algorithm is the training loss for the actor.
+# In the case of DDPG, this function is quite simple: we just need to compute
+# the value associated with an action computed using the policy and optimize
+# the actor weights to maximize this value.
+#
+# When computing this value, we must make sure to take the value parameters out
+# of the graph, otherwise the actor and value loss will be mixed up.
+# For this, the :func:`~torchrl.objectives.utils.hold_out_params` function
+# can be used.
+
+
+def _loss_actor(
+ self,
+ tensordict,
+) -> torch.Tensor:
+ td_copy = tensordict.select(*self.actor_in_keys)
+ # Get an action from the actor network: since we made it functional, we need to pass the params
+ with self.actor_network_params.to_module(self.actor_network):
+ td_copy = self.actor_network(td_copy)
+ # get the value associated with that action
+ with self.value_network_params.detach().to_module(self.value_network):
+ td_copy = self.value_network(td_copy)
+ return -td_copy.get("state_action_value")
+
+
+###############################################################################
+# The value loss method
+# ~~~~~~~~~~~~~~~~~~~~~
+#
+# We now need to optimize our value network parameters.
+# To do this, we will rely on the value estimator of our class:
+#
+
+from torchrl.objectives.utils import distance_loss
+
+
+def _loss_value(
+ self,
+ tensordict,
+):
+ td_copy = tensordict.clone()
+
+ # V(s, a)
+ with self.value_network_params.to_module(self.value_network):
+ self.value_network(td_copy)
+ pred_val = td_copy.get("state_action_value").squeeze(-1)
+
+ # we manually reconstruct the parameters of the actor-critic, where the first
+ # set of parameters belongs to the actor and the second to the value function.
+ target_params = TensorDict(
+ {
+ "module": {
+ "0": self.target_actor_network_params,
+ "1": self.target_value_network_params,
+ }
+ },
+ batch_size=self.target_actor_network_params.batch_size,
+ device=self.target_actor_network_params.device,
+ )
+ with target_params.to_module(self.actor_critic):
+ target_value = self.value_estimator.value_estimate(tensordict).squeeze(-1)
+
+ # Computes the value loss: L2, L1 or smooth L1 depending on `self.loss_function`
+ loss_value = distance_loss(pred_val, target_value, loss_function=self.loss_function)
+ td_error = (pred_val - target_value).pow(2)
+
+ return loss_value, td_error, pred_val, target_value
+
+
+###############################################################################
+# Putting things together in a forward call
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# The only missing piece is the forward method, which will glue together the
+# value and actor loss, collect the cost values and write them in a ``TensorDict``
+# delivered to the user.
+
+from tensordict import TensorDict, TensorDictBase
+
+
+def _forward(self, input_tensordict: TensorDictBase) -> TensorDict:
+ loss_value, td_error, pred_val, target_value = self.loss_value(
+ input_tensordict,
+ )
+ td_error = td_error.detach()
+ td_error = td_error.unsqueeze(input_tensordict.ndimension())
+ if input_tensordict.device is not None:
+ td_error = td_error.to(input_tensordict.device)
+ input_tensordict.set(
+ "td_error",
+ td_error,
+ inplace=True,
+ )
+ loss_actor = self.loss_actor(input_tensordict)
+ return TensorDict(
+ source={
+ "loss_actor": loss_actor.mean(),
+ "loss_value": loss_value.mean(),
+ "pred_value": pred_val.mean().detach(),
+ "target_value": target_value.mean().detach(),
+ "pred_value_max": pred_val.max().detach(),
+ "target_value_max": target_value.max().detach(),
+ },
+ batch_size=[],
+ )
+
+
+from torchrl.objectives import LossModule
+
+
+class DDPGLoss(LossModule):
+ default_value_estimator = default_value_estimator
+ make_value_estimator = make_value_estimator
+
+ __init__ = _init
+ forward = _forward
+ loss_value = _loss_value
+ loss_actor = _loss_actor
+
+
+###############################################################################
+# Now that we have our loss, we can use it to train a policy to solve a
+# control task.
+#
+# Environment
+# -----------
+#
+# In most algorithms, the first thing that needs to be taken care of is the
+# construction of the environment as it conditions the remainder of the
+# training script.
+#
+# For this example, we will be using the ``"cheetah"`` task. The goal is to make
+# a half-cheetah run as fast as possible.
+#
+# In TorchRL, one can create such a task by relying on ``dm_control`` or ``gym``:
+#
+# .. code-block:: python
+#
+# env = GymEnv("HalfCheetah-v4")
+#
+# or
+#
+# .. code-block:: python
+#
+# env = DMControlEnv("cheetah", "run")
+#
+# By default, these environment disable rendering. Training from states is
+# usually easier than training from images. To keep things simple, we focus
+# on learning from states only. To pass the pixels to the ``tensordicts`` that
+# are collected by :func:`env.step()`, simply pass the ``from_pixels=True``
+# argument to the constructor:
+#
+# .. code-block:: python
+#
+# env = GymEnv("HalfCheetah-v4", from_pixels=True, pixels_only=True)
+#
+# We write a :func:`make_env` helper function that will create an environment
+# with either one of the two backends considered above (``dm-control`` or ``gym``).
+#
+
+from torchrl.envs.libs.dm_control import DMControlEnv
+from torchrl.envs.libs.gym import GymEnv
+
+env_library = None
+env_name = None
+
+
+def make_env(from_pixels=False):
+ """Create a base ``env``."""
+ global env_library
+ global env_name
+
+ if backend == "dm_control":
+ env_name = "cheetah"
+ env_task = "run"
+ env_args = (env_name, env_task)
+ env_library = DMControlEnv
+ elif backend == "gym":
+ env_name = "HalfCheetah-v4"
+ env_args = (env_name,)
+ env_library = GymEnv
+ else:
+ raise NotImplementedError
+
+ env_kwargs = {
+ "device": device,
+ "from_pixels": from_pixels,
+ "pixels_only": from_pixels,
+ "frame_skip": 2,
+ }
+ env = env_library(*env_args, **env_kwargs)
+ return env
+
+
+###############################################################################
+# Transforms
+# ~~~~~~~~~~
+#
+# Now that we have a base environment, we may want to modify its representation
+# to make it more policy-friendly. In TorchRL, transforms are appended to the
+# base environment in a specialized :class:`torchr.envs.TransformedEnv` class.
+#
+# - It is common in DDPG to rescale the reward using some heuristic value. We
+# will multiply the reward by 5 in this example.
+#
+# - If we are using :mod:`dm_control`, it is also important to build an interface
+# between the simulator which works with double precision numbers, and our
+# script which presumably uses single precision ones. This transformation goes
+# both ways: when calling :func:`env.step`, our actions will need to be
+# represented in double precision, and the output will need to be transformed
+# to single precision.
+# The :class:`~torchrl.envs.DoubleToFloat` transform does exactly this: the
+# ``in_keys`` list refers to the keys that will need to be transformed from
+# double to float, while the ``in_keys_inv`` refers to those that need to
+# be transformed to double before being passed to the environment.
+#
+# - We concatenate the state keys together using the :class:`~torchrl.envs.CatTensors`
+# transform.
+#
+# - Finally, we also leave the possibility of normalizing the states: we will
+# take care of computing the normalizing constants later on.
+#
+
+from torchrl.envs import (
+ CatTensors,
+ DoubleToFloat,
+ EnvCreator,
+ InitTracker,
+ ObservationNorm,
+ ParallelEnv,
+ RewardScaling,
+ StepCounter,
+ TransformedEnv,
+)
+
+
+def make_transformed_env(
+ env,
+):
+ """Apply transforms to the ``env`` (such as reward scaling and state normalization)."""
+
+ env = TransformedEnv(env)
+
+ # we append transforms one by one, although we might as well create the
+ # transformed environment using the `env = TransformedEnv(base_env, transforms)`
+ # syntax.
+ env.append_transform(RewardScaling(loc=0.0, scale=reward_scaling))
+
+ # We concatenate all states into a single "observation_vector"
+ # even if there is a single tensor, it'll be renamed in "observation_vector".
+ # This facilitates the downstream operations as we know the name of the
+ # output tensor.
+ # In some environments (not half-cheetah), there may be more than one
+ # observation vector: in this case this code snippet will concatenate them
+ # all.
+ selected_keys = list(env.observation_spec.keys())
+ out_key = "observation_vector"
+ env.append_transform(CatTensors(in_keys=selected_keys, out_key=out_key))
+
+ # we normalize the states, but for now let's just instantiate a stateless
+ # version of the transform
+ env.append_transform(ObservationNorm(in_keys=[out_key], standard_normal=True))
+
+ env.append_transform(DoubleToFloat())
+
+ env.append_transform(StepCounter(max_frames_per_traj))
+
+ # We need a marker for the start of trajectories for our Ornstein-Uhlenbeck (OU)
+ # exploration:
+ env.append_transform(InitTracker())
+
+ return env
+
+
+###############################################################################
+# Parallel execution
+# ~~~~~~~~~~~~~~~~~~
+#
+# The following helper function allows us to run environments in parallel.
+# Running environments in parallel can significantly speed up the collection
+# throughput. When using transformed environment, we need to choose whether we
+# want to execute the transform individually for each environment, or
+# centralize the data and transform it in batch. Both approaches are easy to
+# code:
+#
+# .. code-block:: python
+#
+# env = ParallelEnv(
+# lambda: TransformedEnv(GymEnv("HalfCheetah-v4"), transforms),
+# num_workers=4
+# )
+# env = TransformedEnv(
+# ParallelEnv(lambda: GymEnv("HalfCheetah-v4"), num_workers=4),
+# transforms
+# )
+#
+# To leverage the vectorization capabilities of PyTorch, we adopt
+# the first method:
+#
+
+
+def parallel_env_constructor(
+ env_per_collector,
+ transform_state_dict,
+):
+ if env_per_collector == 1:
+
+ def make_t_env():
+ env = make_transformed_env(make_env())
+ env.transform[2].init_stats(3)
+ env.transform[2].loc.copy_(transform_state_dict["loc"])
+ env.transform[2].scale.copy_(transform_state_dict["scale"])
+ return env
+
+ env_creator = EnvCreator(make_t_env)
+ return env_creator
+
+ parallel_env = ParallelEnv(
+ num_workers=env_per_collector,
+ create_env_fn=EnvCreator(lambda: make_env()),
+ create_env_kwargs=None,
+ pin_memory=False,
+ )
+ env = make_transformed_env(parallel_env)
+ # we call `init_stats` for a limited number of steps, just to instantiate
+ # the lazy buffers.
+ env.transform[2].init_stats(3, cat_dim=1, reduce_dim=[0, 1])
+ env.transform[2].load_state_dict(transform_state_dict)
+ return env
+
+
+# The backend can be ``gym`` or ``dm_control``
+backend = "gym"
+
+###############################################################################
+# .. note::
+#
+# ``frame_skip`` batches multiple step together with a single action
+# If > 1, the other frame counts (for example, frames_per_batch, total_frames)
+# need to be adjusted to have a consistent total number of frames collected
+# across experiments. This is important as raising the frame-skip but keeping the
+# total number of frames unchanged may seem like cheating: all things compared,
+# a dataset of 10M elements collected with a frame-skip of 2 and another with
+# a frame-skip of 1 actually have a ratio of interactions with the environment
+# of 2:1! In a nutshell, one should be cautious about the frame-count of a
+# training script when dealing with frame skipping as this may lead to
+# biased comparisons between training strategies.
+#
+# Scaling the reward helps us control the signal magnitude for a more
+# efficient learning.
+reward_scaling = 5.0
+
+###############################################################################
+# We also define when a trajectory will be truncated. A thousand steps (500 if
+# frame-skip = 2) is a good number to use for the cheetah task:
+
+max_frames_per_traj = 500
+
+###############################################################################
+# Normalization of the observations
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# To compute the normalizing statistics, we run an arbitrary number of random
+# steps in the environment and compute the mean and standard deviation of the
+# collected observations. The :func:`ObservationNorm.init_stats()` method can
+# be used for this purpose. To get the summary statistics, we create a dummy
+# environment and run it for a given number of steps, collect data over a given
+# number of steps and compute its summary statistics.
+#
+
+
+def get_env_stats():
+ """Gets the stats of an environment."""
+ proof_env = make_transformed_env(make_env())
+ t = proof_env.transform[2]
+ t.init_stats(init_env_steps)
+ transform_state_dict = t.state_dict()
+ proof_env.close()
+ return transform_state_dict
+
+
+###############################################################################
+# Normalization stats
+# ~~~~~~~~~~~~~~~~~~~
+# Number of random steps used as for stats computation using ``ObservationNorm``
+
+init_env_steps = 5000
+
+transform_state_dict = get_env_stats()
+
+###############################################################################
+# Number of environments in each data collector
+env_per_collector = 4
+
+###############################################################################
+# We pass the stats computed earlier to normalize the output of our
+# environment:
+
+parallel_env = parallel_env_constructor(
+ env_per_collector=env_per_collector,
+ transform_state_dict=transform_state_dict,
+)
+
+
+from torchrl.data import CompositeSpec
+
+###############################################################################
+# Building the model
+# ------------------
+#
+# We now turn to the setup of the model. As we have seen, DDPG requires a
+# value network, trained to estimate the value of a state-action pair, and a
+# parametric actor that learns how to select actions that maximize this value.
+#
+# Recall that building a TorchRL module requires two steps:
+#
+# - writing the :class:`torch.nn.Module` that will be used as network,
+# - wrapping the network in a :class:`tensordict.nn.TensorDictModule` where the
+# data flow is handled by specifying the input and output keys.
+#
+# In more complex scenarios, :class:`tensordict.nn.TensorDictSequential` can
+# also be used.
+#
+#
+# The Q-Value network is wrapped in a :class:`~torchrl.modules.ValueOperator`
+# that automatically sets the ``out_keys`` to ``"state_action_value`` for q-value
+# networks and ``state_value`` for other value networks.
+#
+# TorchRL provides a built-in version of the DDPG networks as presented in the
+# original paper. These can be found under :class:`~torchrl.modules.DdpgMlpActor`
+# and :class:`~torchrl.modules.DdpgMlpQNet`.
+#
+# Since we use lazy modules, it is necessary to materialize the lazy modules
+# before being able to move the policy from device to device and achieve other
+# operations. Hence, it is good practice to run the modules with a small
+# sample of data. For this purpose, we generate fake data from the
+# environment specs.
+#
+
+from torchrl.modules import (
+ ActorCriticWrapper,
+ DdpgMlpActor,
+ DdpgMlpQNet,
+ OrnsteinUhlenbeckProcessModule,
+ ProbabilisticActor,
+ TanhDelta,
+ ValueOperator,
+)
+
+
+def make_ddpg_actor(
+ transform_state_dict,
+ device="cpu",
+):
+ proof_environment = make_transformed_env(make_env())
+ proof_environment.transform[2].init_stats(3)
+ proof_environment.transform[2].load_state_dict(transform_state_dict)
+
+ out_features = proof_environment.action_spec.shape[-1]
+
+ actor_net = DdpgMlpActor(
+ action_dim=out_features,
+ )
+
+ in_keys = ["observation_vector"]
+ out_keys = ["param"]
+
+ actor = TensorDictModule(
+ actor_net,
+ in_keys=in_keys,
+ out_keys=out_keys,
+ )
+
+ actor = ProbabilisticActor(
+ actor,
+ distribution_class=TanhDelta,
+ in_keys=["param"],
+ spec=CompositeSpec(action=proof_environment.action_spec),
+ ).to(device)
+
+ q_net = DdpgMlpQNet()
+
+ in_keys = in_keys + ["action"]
+ qnet = ValueOperator(
+ in_keys=in_keys,
+ module=q_net,
+ ).to(device)
+
+ # initialize lazy modules
+ qnet(actor(proof_environment.reset().to(device)))
+ return actor, qnet
+
+
+actor, qnet = make_ddpg_actor(
+ transform_state_dict=transform_state_dict,
+ device=device,
+)
+
+###############################################################################
+# Exploration
+# ~~~~~~~~~~~
+#
+# The policy is passed into a :class:`~torchrl.modules.OrnsteinUhlenbeckProcessModule`
+# exploration module, as suggested in the original paper.
+# Let's define the number of frames before OU noise reaches its minimum value
+annealing_frames = 1_000_000
+
+actor_model_explore = TensorDictSequential(
+ actor,
+ OrnsteinUhlenbeckProcessModule(
+ spec=actor.spec.clone(),
+ annealing_num_steps=annealing_frames,
+ ).to(device),
+)
+if device == torch.device("cpu"):
+ actor_model_explore.share_memory()
+
+
+###############################################################################
+# Data collector
+# --------------
+#
+# TorchRL provides specialized classes to help you collect data by executing
+# the policy in the environment. These "data collectors" iteratively compute
+# the action to be executed at a given time, then execute a step in the
+# environment and reset it when required.
+# Data collectors are designed to help developers have a tight control
+# on the number of frames per batch of data, on the (a)sync nature of this
+# collection and on the resources allocated to the data collection (for example
+# GPU, number of workers, and so on).
+#
+# Here we will use
+# :class:`~torchrl.collectors.SyncDataCollector`, a simple, single-process
+# data collector. TorchRL offers other collectors, such as
+# :class:`~torchrl.collectors.MultiaSyncDataCollector`, which executed the
+# rollouts in an asynchronous manner (for example, data will be collected while
+# the policy is being optimized, thereby decoupling the training and
+# data collection).
+#
+# The parameters to specify are:
+#
+# - an environment factory or an environment,
+# - the policy,
+# - the total number of frames before the collector is considered empty,
+# - the maximum number of frames per trajectory (useful for non-terminating
+# environments, like ``dm_control`` ones).
+#
+# .. note::
+#
+# The ``max_frames_per_traj`` passed to the collector will have the effect
+# of registering a new :class:`~torchrl.envs.StepCounter` transform
+# with the environment used for inference. We can achieve the same result
+# manually, as we do in this script.
+#
+# One should also pass:
+#
+# - the number of frames in each batch collected,
+# - the number of random steps executed independently from the policy,
+# - the devices used for policy execution
+# - the devices used to store data before the data is passed to the main
+# process.
+#
+# The total frames we will use during training should be around 1M.
+total_frames = 10_000 # 1_000_000
+
+###############################################################################
+# The number of frames returned by the collector at each iteration of the outer
+# loop is equal to the length of each sub-trajectories times the number of
+# environments run in parallel in each collector.
+#
+# In other words, we expect batches from the collector to have a shape
+# ``[env_per_collector, traj_len]`` where
+# ``traj_len=frames_per_batch/env_per_collector``:
+#
+traj_len = 200
+frames_per_batch = env_per_collector * traj_len
+init_random_frames = 5000
+num_collectors = 2
+
+from torchrl.collectors import SyncDataCollector
+from torchrl.envs import ExplorationType
+
+collector = SyncDataCollector(
+ parallel_env,
+ policy=actor_model_explore,
+ total_frames=total_frames,
+ frames_per_batch=frames_per_batch,
+ init_random_frames=init_random_frames,
+ reset_at_each_iter=False,
+ split_trajs=False,
+ device=collector_device,
+ exploration_type=ExplorationType.RANDOM,
+)
+
+###############################################################################
+# Evaluator: building your recorder object
+# ----------------------------------------
+#
+# As the training data is obtained using some exploration strategy, the true
+# performance of our algorithm needs to be assessed in deterministic mode. We
+# do this using a dedicated class, ``Recorder``, which executes the policy in
+# the environment at a given frequency and returns some statistics obtained
+# from these simulations.
+#
+# The following helper function builds this object:
+from torchrl.trainers import Recorder
+
+
+def make_recorder(actor_model_explore, transform_state_dict, record_interval):
+ base_env = make_env()
+ environment = make_transformed_env(base_env)
+ environment.transform[2].init_stats(
+ 3
+ ) # must be instantiated to load the state dict
+ environment.transform[2].load_state_dict(transform_state_dict)
+
+ recorder_obj = Recorder(
+ record_frames=1000,
+ policy_exploration=actor_model_explore,
+ environment=environment,
+ exploration_type=ExplorationType.DETERMINISTIC,
+ record_interval=record_interval,
+ )
+ return recorder_obj
+
+
+###############################################################################
+# We will be recording the performance every 10 batch collected
+record_interval = 10
+
+recorder = make_recorder(
+ actor_model_explore, transform_state_dict, record_interval=record_interval
+)
+
+from torchrl.data.replay_buffers import (
+ LazyMemmapStorage,
+ PrioritizedSampler,
+ RandomSampler,
+ TensorDictReplayBuffer,
+)
+
+###############################################################################
+# Replay buffer
+# -------------
+#
+# Replay buffers come in two flavors: prioritized (where some error signal
+# is used to give a higher likelihood of sampling to some items than others)
+# and regular, circular experience replay.
+#
+# TorchRL replay buffers are composable: one can pick up the storage, sampling
+# and writing strategies. It is also possible to
+# store tensors on physical memory using a memory-mapped array. The following
+# function takes care of creating the replay buffer with the desired
+# hyperparameters:
+#
+
+from torchrl.envs import RandomCropTensorDict
+
+
+def make_replay_buffer(buffer_size, batch_size, random_crop_len, prefetch=3, prb=False):
+ if prb:
+ sampler = PrioritizedSampler(
+ max_capacity=buffer_size,
+ alpha=0.7,
+ beta=0.5,
+ )
+ else:
+ sampler = RandomSampler()
+ replay_buffer = TensorDictReplayBuffer(
+ storage=LazyMemmapStorage(
+ buffer_size,
+ scratch_dir=buffer_scratch_dir,
+ ),
+ batch_size=batch_size,
+ sampler=sampler,
+ pin_memory=False,
+ prefetch=prefetch,
+ transform=RandomCropTensorDict(random_crop_len, sample_dim=1),
+ )
+ return replay_buffer
+
+
+###############################################################################
+# We'll store the replay buffer in a temporary directory on disk
+
+import tempfile
+
+tmpdir = tempfile.TemporaryDirectory()
+buffer_scratch_dir = tmpdir.name
+
+###############################################################################
+# Replay buffer storage and batch size
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# TorchRL replay buffer counts the number of elements along the first dimension.
+# Since we'll be feeding trajectories to our buffer, we need to adapt the buffer
+# size by dividing it by the length of the sub-trajectories yielded by our
+# data collector.
+# Regarding the batch-size, our sampling strategy will consist in sampling
+# trajectories of length ``traj_len=200`` before selecting sub-trajectories
+# or length ``random_crop_len=25`` on which the loss will be computed.
+# This strategy balances the choice of storing whole trajectories of a certain
+# length with the need for providing samples with a sufficient heterogeneity
+# to our loss. The following figure shows the dataflow from a collector
+# that gets 8 frames in each batch with 2 environments run in parallel,
+# feeds them to a replay buffer that contains 1000 trajectories and
+# samples sub-trajectories of 2 time steps each.
+#
+# .. figure:: /_static/img/replaybuffer_traj.png
+# :alt: Storing trajectories in the replay buffer
+#
+# Let's start with the number of frames stored in the buffer
+
+
+def ceil_div(x, y):
+ return -x // (-y)
+
+
+buffer_size = 1_000_000
+buffer_size = ceil_div(buffer_size, traj_len)
+
+###############################################################################
+# Prioritized replay buffer is disabled by default
+prb = False
+
+###############################################################################
+# We also need to define how many updates we'll be doing per batch of data
+# collected. This is known as the update-to-data or ``UTD`` ratio:
+update_to_data = 64
+
+###############################################################################
+# We'll be feeding the loss with trajectories of length 25:
+random_crop_len = 25
+
+###############################################################################
+# In the original paper, the authors perform one update with a batch of 64
+# elements for each frame collected. Here, we reproduce the same ratio
+# but while realizing several updates at each batch collection. We
+# adapt our batch-size to achieve the same number of update-per-frame ratio:
+
+batch_size = ceil_div(64 * frames_per_batch, update_to_data * random_crop_len)
+
+replay_buffer = make_replay_buffer(
+ buffer_size=buffer_size,
+ batch_size=batch_size,
+ random_crop_len=random_crop_len,
+ prefetch=3,
+ prb=prb,
+)
+
+###############################################################################
+# Loss module construction
+# ------------------------
+#
+# We build our loss module with the actor and ``qnet`` we've just created.
+# Because we have target parameters to update, we _must_ create a target network
+# updater.
+#
+
+gamma = 0.99
+lmbda = 0.9
+tau = 0.001 # Decay factor for the target network
+
+loss_module = DDPGLoss(actor, qnet)
+
+###############################################################################
+# let's use the TD(lambda) estimator!
+loss_module.make_value_estimator(ValueEstimators.TDLambda, gamma=gamma, lmbda=lmbda, device=device)
+
+###############################################################################
+# .. note::
+# Off-policy usually dictates a TD(0) estimator. Here, we use a TD(:math:`\lambda`)
+# estimator, which will introduce some bias as the trajectory that follows
+# a certain state has been collected with an outdated policy.
+# This trick, as the multi-step trick that can be used during data collection,
+# are alternative versions of "hacks" that we usually find to work well in
+# practice despite the fact that they introduce some bias in the return
+# estimates.
+#
+# Target network updater
+# ~~~~~~~~~~~~~~~~~~~~~~
+#
+# Target networks are a crucial part of off-policy RL algorithms.
+# Updating the target network parameters is made easy thanks to the
+# :class:`~torchrl.objectives.HardUpdate` and :class:`~torchrl.objectives.SoftUpdate`
+# classes. They're built with the loss module as argument, and the update is
+# achieved via a call to `updater.step()` at the appropriate location in the
+# training loop.
+
+from torchrl.objectives.utils import SoftUpdate
+
+target_net_updater = SoftUpdate(loss_module, eps=1 - tau)
+
+###############################################################################
+# Optimizer
+# ~~~~~~~~~
+#
+# Finally, we will use the Adam optimizer for the policy and value network:
+
+from torch import optim
+
+optimizer_actor = optim.Adam(
+ loss_module.actor_network_params.values(True, True), lr=1e-4, weight_decay=0.0
+)
+optimizer_value = optim.Adam(
+ loss_module.value_network_params.values(True, True), lr=1e-3, weight_decay=1e-2
+)
+total_collection_steps = total_frames // frames_per_batch
+
+###############################################################################
+# Time to train the policy
+# ------------------------
+#
+# The training loop is pretty straightforward now that we have built all the
+# modules we need.
+#
+
+rewards = []
+rewards_eval = []
+
+# Main loop
+
+collected_frames = 0
+pbar = tqdm.tqdm(total=total_frames)
+r0 = None
+for i, tensordict in enumerate(collector):
+
+ # update weights of the inference policy
+ collector.update_policy_weights_()
+
+ if r0 is None:
+ r0 = tensordict["next", "reward"].mean().item()
+ pbar.update(tensordict.numel())
+
+ # extend the replay buffer with the new data
+ current_frames = tensordict.numel()
+ collected_frames += current_frames
+ replay_buffer.extend(tensordict.cpu())
+
+ # optimization steps
+ if collected_frames >= init_random_frames:
+ for _ in range(update_to_data):
+ # sample from replay buffer
+ sampled_tensordict = replay_buffer.sample().to(device)
+
+ # Compute loss
+ loss_dict = loss_module(sampled_tensordict)
+
+ # optimize
+ loss_dict["loss_actor"].backward()
+ gn1 = torch.nn.utils.clip_grad_norm_(
+ loss_module.actor_network_params.values(True, True), 10.0
+ )
+ optimizer_actor.step()
+ optimizer_actor.zero_grad()
+
+ loss_dict["loss_value"].backward()
+ gn2 = torch.nn.utils.clip_grad_norm_(
+ loss_module.value_network_params.values(True, True), 10.0
+ )
+ optimizer_value.step()
+ optimizer_value.zero_grad()
+
+ gn = (gn1**2 + gn2**2) ** 0.5
+
+ # update priority
+ if prb:
+ replay_buffer.update_tensordict_priority(sampled_tensordict)
+ # update target network
+ target_net_updater.step()
+
+ rewards.append(
+ (
+ i,
+ tensordict["next", "reward"].mean().item(),
+ )
+ )
+ td_record = recorder(None)
+ if td_record is not None:
+ rewards_eval.append((i, td_record["r_evaluation"].item()))
+ if len(rewards_eval) and collected_frames >= init_random_frames:
+ target_value = loss_dict["target_value"].item()
+ loss_value = loss_dict["loss_value"].item()
+ loss_actor = loss_dict["loss_actor"].item()
+ rn = sampled_tensordict["next", "reward"].mean().item()
+ rs = sampled_tensordict["next", "reward"].std().item()
+ pbar.set_description(
+ f"reward: {rewards[-1][1]: 4.2f} (r0 = {r0: 4.2f}), "
+ f"reward eval: reward: {rewards_eval[-1][1]: 4.2f}, "
+ f"reward normalized={rn :4.2f}/{rs :4.2f}, "
+ f"grad norm={gn: 4.2f}, "
+ f"loss_value={loss_value: 4.2f}, "
+ f"loss_actor={loss_actor: 4.2f}, "
+ f"target value: {target_value: 4.2f}"
+ )
+
+ # update the exploration strategy
+ actor_model_explore[1].step(current_frames)
+
+collector.shutdown()
+del collector
+
+###############################################################################
+# Experiment results
+# ------------------
+#
+# We make a simple plot of the average rewards during training. We can observe
+# that our policy learned quite well to solve the task.
+#
+# .. note::
+# As already mentioned above, to get a more reasonable performance,
+# use a greater value for ``total_frames`` for example, 1M.
+
+from matplotlib import pyplot as plt
+
+plt.figure()
+plt.plot(*zip(*rewards), label="training")
+plt.plot(*zip(*rewards_eval), label="eval")
+plt.legend()
+plt.xlabel("iter")
+plt.ylabel("reward")
+plt.tight_layout()
+
+###############################################################################
+# Conclusion
+# ----------
+#
+# In this tutorial, we have learned how to code a loss module in TorchRL given
+# the concrete example of DDPG.
+#
+# The key takeaways are:
+#
+# - How to use the :class:`~torchrl.objectives.LossModule` class to code up a new
+# loss component;
+# - How to use (or not) a target network, and how to update its parameters;
+# - How to create an optimizer associated with a loss module.
+#
+# Next Steps
+# ----------
+#
+# To iterate further on this loss module we might consider:
+#
+# - Using `@dispatch` (see `[Feature] Distpatch IQL loss module `_.)
+# - Allowing flexible TensorDict keys.
+#
diff --git a/advanced_source/cpp_autograd.rst b/advanced_source/cpp_autograd.rst
new file mode 100644
index 00000000000..51e5e0b358f
--- /dev/null
+++ b/advanced_source/cpp_autograd.rst
@@ -0,0 +1,437 @@
+Autograd in C++ Frontend
+========================
+
+The ``autograd`` package is crucial for building highly flexible and dynamic neural
+networks in PyTorch. Most of the autograd APIs in PyTorch Python frontend are also available
+in C++ frontend, allowing easy translation of autograd code from Python to C++.
+
+In this tutorial explore several examples of doing autograd in PyTorch C++ frontend.
+Note that this tutorial assumes that you already have a basic understanding of
+autograd in Python frontend. If that's not the case, please first read
+`Autograd: Automatic Differentiation `_.
+
+Basic autograd operations
+-------------------------
+
+(Adapted from `this tutorial `_)
+
+Create a tensor and set ``torch::requires_grad()`` to track computation with it
+
+.. code-block:: cpp
+
+ auto x = torch::ones({2, 2}, torch::requires_grad());
+ std::cout << x << std::endl;
+
+Out:
+
+.. code-block:: shell
+
+ 1 1
+ 1 1
+ [ CPUFloatType{2,2} ]
+
+
+Do a tensor operation:
+
+.. code-block:: cpp
+
+ auto y = x + 2;
+ std::cout << y << std::endl;
+
+Out:
+
+.. code-block:: shell
+
+ 3 3
+ 3 3
+ [ CPUFloatType{2,2} ]
+
+``y`` was created as a result of an operation, so it has a ``grad_fn``.
+
+.. code-block:: cpp
+
+ std::cout << y.grad_fn()->name() << std::endl;
+
+Out:
+
+.. code-block:: shell
+
+ AddBackward1
+
+Do more operations on ``y``
+
+.. code-block:: cpp
+
+ auto z = y * y * 3;
+ auto out = z.mean();
+
+ std::cout << z << std::endl;
+ std::cout << z.grad_fn()->name() << std::endl;
+ std::cout << out << std::endl;
+ std::cout << out.grad_fn()->name() << std::endl;
+
+Out:
+
+.. code-block:: shell
+
+ 27 27
+ 27 27
+ [ CPUFloatType{2,2} ]
+ MulBackward1
+ 27
+ [ CPUFloatType{} ]
+ MeanBackward0
+
+
+``.requires_grad_( ... )`` changes an existing tensor's ``requires_grad`` flag in-place.
+
+.. code-block:: cpp
+
+ auto a = torch::randn({2, 2});
+ a = ((a * 3) / (a - 1));
+ std::cout << a.requires_grad() << std::endl;
+
+ a.requires_grad_(true);
+ std::cout << a.requires_grad() << std::endl;
+
+ auto b = (a * a).sum();
+ std::cout << b.grad_fn()->name() << std::endl;
+
+Out:
+
+.. code-block:: shell
+
+ false
+ true
+ SumBackward0
+
+Let's backprop now. Because ``out`` contains a single scalar, ``out.backward()``
+is equivalent to ``out.backward(torch::tensor(1.))``.
+
+.. code-block:: cpp
+
+ out.backward();
+
+Print gradients d(out)/dx
+
+.. code-block:: cpp
+
+ std::cout << x.grad() << std::endl;
+
+Out:
+
+.. code-block:: shell
+
+ 4.5000 4.5000
+ 4.5000 4.5000
+ [ CPUFloatType{2,2} ]
+
+You should have got a matrix of ``4.5``. For explanations on how we arrive at this value,
+please see `the corresponding section in this tutorial `_.
+
+Now let's take a look at an example of vector-Jacobian product:
+
+.. code-block:: cpp
+
+ x = torch::randn(3, torch::requires_grad());
+
+ y = x * 2;
+ while (y.norm().item() < 1000) {
+ y = y * 2;
+ }
+
+ std::cout << y << std::endl;
+ std::cout << y.grad_fn()->name() << std::endl;
+
+Out:
+
+.. code-block:: shell
+
+ -1021.4020
+ 314.6695
+ -613.4944
+ [ CPUFloatType{3} ]
+ MulBackward1
+
+If we want the vector-Jacobian product, pass the vector to ``backward`` as argument:
+
+.. code-block:: cpp
+
+ auto v = torch::tensor({0.1, 1.0, 0.0001}, torch::kFloat);
+ y.backward(v);
+
+ std::cout << x.grad() << std::endl;
+
+Out:
+
+.. code-block:: shell
+
+ 102.4000
+ 1024.0000
+ 0.1024
+ [ CPUFloatType{3} ]
+
+You can also stop autograd from tracking history on tensors that require gradients
+either by putting ``torch::NoGradGuard`` in a code block
+
+.. code-block:: cpp
+
+ std::cout << x.requires_grad() << std::endl;
+ std::cout << x.pow(2).requires_grad() << std::endl;
+
+ {
+ torch::NoGradGuard no_grad;
+ std::cout << x.pow(2).requires_grad() << std::endl;
+ }
+
+
+Out:
+
+.. code-block:: shell
+
+ true
+ true
+ false
+
+Or by using ``.detach()`` to get a new tensor with the same content but that does
+not require gradients:
+
+.. code-block:: cpp
+
+ std::cout << x.requires_grad() << std::endl;
+ y = x.detach();
+ std::cout << y.requires_grad() << std::endl;
+ std::cout << x.eq(y).all().item() << std::endl;
+
+Out:
+
+.. code-block:: shell
+
+ true
+ false
+ true
+
+For more information on C++ tensor autograd APIs such as ``grad`` / ``requires_grad`` /
+``is_leaf`` / ``backward`` / ``detach`` / ``detach_`` / ``register_hook`` / ``retain_grad``,
+please see `the corresponding C++ API docs `_.
+
+Computing higher-order gradients in C++
+---------------------------------------
+
+One of the applications of higher-order gradients is calculating gradient penalty.
+Let's see an example of it using ``torch::autograd::grad``:
+
+.. code-block:: cpp
+
+ #include
+
+ auto model = torch::nn::Linear(4, 3);
+
+ auto input = torch::randn({3, 4}).requires_grad_(true);
+ auto output = model(input);
+
+ // Calculate loss
+ auto target = torch::randn({3, 3});
+ auto loss = torch::nn::MSELoss()(output, target);
+
+ // Use norm of gradients as penalty
+ auto grad_output = torch::ones_like(output);
+ auto gradient = torch::autograd::grad({output}, {input}, /*grad_outputs=*/{grad_output}, /*create_graph=*/true)[0];
+ auto gradient_penalty = torch::pow((gradient.norm(2, /*dim=*/1) - 1), 2).mean();
+
+ // Add gradient penalty to loss
+ auto combined_loss = loss + gradient_penalty;
+ combined_loss.backward();
+
+ std::cout << input.grad() << std::endl;
+
+Out:
+
+.. code-block:: shell
+
+ -0.1042 -0.0638 0.0103 0.0723
+ -0.2543 -0.1222 0.0071 0.0814
+ -0.1683 -0.1052 0.0355 0.1024
+ [ CPUFloatType{3,4} ]
+
+Please see the documentation for ``torch::autograd::backward``
+(`link `_)
+and ``torch::autograd::grad``
+(`link `_)
+for more information on how to use them.
+
+Using custom autograd function in C++
+-------------------------------------
+
+(Adapted from `this tutorial `_)
+
+Adding a new elementary operation to ``torch::autograd`` requires implementing a new ``torch::autograd::Function``
+subclass for each operation. ``torch::autograd::Function`` s are what ``torch::autograd``
+uses to compute the results and gradients, and encode the operation history. Every
+new function requires you to implement 2 methods: ``forward`` and ``backward``, and
+please see `this link `_
+for the detailed requirements.
+
+Below you can find code for a ``Linear`` function from ``torch::nn``:
+
+.. code-block:: cpp
+
+ #include
+
+ using namespace torch::autograd;
+
+ // Inherit from Function
+ class LinearFunction : public Function {
+ public:
+ // Note that both forward and backward are static functions
+
+ // bias is an optional argument
+ static torch::Tensor forward(
+ AutogradContext *ctx, torch::Tensor input, torch::Tensor weight, torch::Tensor bias = torch::Tensor()) {
+ ctx->save_for_backward({input, weight, bias});
+ auto output = input.mm(weight.t());
+ if (bias.defined()) {
+ output += bias.unsqueeze(0).expand_as(output);
+ }
+ return output;
+ }
+
+ static tensor_list backward(AutogradContext *ctx, tensor_list grad_outputs) {
+ auto saved = ctx->get_saved_variables();
+ auto input = saved[0];
+ auto weight = saved[1];
+ auto bias = saved[2];
+
+ auto grad_output = grad_outputs[0];
+ auto grad_input = grad_output.mm(weight);
+ auto grad_weight = grad_output.t().mm(input);
+ auto grad_bias = torch::Tensor();
+ if (bias.defined()) {
+ grad_bias = grad_output.sum(0);
+ }
+
+ return {grad_input, grad_weight, grad_bias};
+ }
+ };
+
+Then, we can use the ``LinearFunction`` in the following way:
+
+.. code-block:: cpp
+
+ auto x = torch::randn({2, 3}).requires_grad_();
+ auto weight = torch::randn({4, 3}).requires_grad_();
+ auto y = LinearFunction::apply(x, weight);
+ y.sum().backward();
+
+ std::cout << x.grad() << std::endl;
+ std::cout << weight.grad() << std::endl;
+
+Out:
+
+.. code-block:: shell
+
+ 0.5314 1.2807 1.4864
+ 0.5314 1.2807 1.4864
+ [ CPUFloatType{2,3} ]
+ 3.7608 0.9101 0.0073
+ 3.7608 0.9101 0.0073
+ 3.7608 0.9101 0.0073
+ 3.7608 0.9101 0.0073
+ [ CPUFloatType{4,3} ]
+
+Here, we give an additional example of a function that is parametrized by non-tensor arguments:
+
+.. code-block:: cpp
+
+ #include
+
+ using namespace torch::autograd;
+
+ class MulConstant : public Function {
+ public:
+ static torch::Tensor forward(AutogradContext *ctx, torch::Tensor tensor, double constant) {
+ // ctx is a context object that can be used to stash information
+ // for backward computation
+ ctx->saved_data["constant"] = constant;
+ return tensor * constant;
+ }
+
+ static tensor_list backward(AutogradContext *ctx, tensor_list grad_outputs) {
+ // We return as many input gradients as there were arguments.
+ // Gradients of non-tensor arguments to forward must be `torch::Tensor()`.
+ return {grad_outputs[0] * ctx->saved_data["constant"].toDouble(), torch::Tensor()};
+ }
+ };
+
+Then, we can use the ``MulConstant`` in the following way:
+
+.. code-block:: cpp
+
+ auto x = torch::randn({2}).requires_grad_();
+ auto y = MulConstant::apply(x, 5.5);
+ y.sum().backward();
+
+ std::cout << x.grad() << std::endl;
+
+Out:
+
+.. code-block:: shell
+
+ 5.5000
+ 5.5000
+ [ CPUFloatType{2} ]
+
+For more information on ``torch::autograd::Function``, please see
+`its documentation `_.
+
+Translating autograd code from Python to C++
+--------------------------------------------
+
+On a high level, the easiest way to use autograd in C++ is to have working
+autograd code in Python first, and then translate your autograd code from Python to
+C++ using the following table:
+
++--------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Python | C++ |
++================================+========================================================================================================================================================================+
+| ``torch.autograd.backward`` | ``torch::autograd::backward`` (`link `_) |
++--------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``torch.autograd.grad`` | ``torch::autograd::grad`` (`link `_) |
++--------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``torch.Tensor.detach`` | ``torch::Tensor::detach`` (`link `_) |
++--------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``torch.Tensor.detach_`` | ``torch::Tensor::detach_`` (`link `_) |
++--------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``torch.Tensor.backward`` | ``torch::Tensor::backward`` (`link `_) |
++--------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``torch.Tensor.register_hook`` | ``torch::Tensor::register_hook`` (`link `_) |
++--------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``torch.Tensor.requires_grad`` | ``torch::Tensor::requires_grad_`` (`link `_) |
++--------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``torch.Tensor.retain_grad`` | ``torch::Tensor::retain_grad`` (`link `_) |
++--------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``torch.Tensor.grad`` | ``torch::Tensor::grad`` (`link `_) |
++--------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``torch.Tensor.grad_fn`` | ``torch::Tensor::grad_fn`` (`link `_) |
++--------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``torch.Tensor.set_data`` | ``torch::Tensor::set_data`` (`link `_) |
++--------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``torch.Tensor.data`` | ``torch::Tensor::data`` (`link `_) |
++--------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``torch.Tensor.output_nr`` | ``torch::Tensor::output_nr`` (`link `_) |
++--------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``torch.Tensor.is_leaf`` | ``torch::Tensor::is_leaf`` (`link `_) |
++--------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+After translation, most of your Python autograd code should just work in C++.
+If that's not the case, please file a bug report at `GitHub issues `_
+and we will fix it as soon as possible.
+
+Conclusion
+----------
+
+You should now have a good overview of PyTorch's C++ autograd API.
+You can find the code examples displayed in this note `here
+`_. As always, if you run into any
+problems or have questions, you can use our `forum `_
+or `GitHub issues `_ to get in touch.
diff --git a/advanced_source/cpp_custom_ops.rst b/advanced_source/cpp_custom_ops.rst
new file mode 100644
index 00000000000..512c39b2a68
--- /dev/null
+++ b/advanced_source/cpp_custom_ops.rst
@@ -0,0 +1,582 @@
+.. _cpp-custom-ops-tutorial:
+
+Custom C++ and CUDA Operators
+=============================
+
+**Author:** `Richard Zou `_
+
+.. grid:: 2
+
+ .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn
+ :class-card: card-prerequisites
+
+ * How to integrate custom operators written in C++/CUDA with PyTorch
+ * How to test custom operators using ``torch.library.opcheck``
+
+ .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites
+ :class-card: card-prerequisites
+
+ * PyTorch 2.4 or later
+ * Basic understanding of C++ and CUDA programming
+
+.. note::
+
+ This tutorial will also work on AMD ROCm with no additional modifications.
+
+PyTorch offers a large library of operators that work on Tensors (e.g. torch.add, torch.sum, etc).
+However, you may wish to bring a new custom operator to PyTorch. This tutorial demonstrates the
+blessed path to authoring a custom operator written in C++/CUDA.
+
+For our tutorial, we’ll demonstrate how to author a fused multiply-add C++
+and CUDA operator that composes with PyTorch subsystems. The semantics of
+the operation are as follows:
+
+.. code-block:: python
+
+ def mymuladd(a: Tensor, b: Tensor, c: float):
+ return a * b + c
+
+You can find the end-to-end working example for this tutorial
+`here `_ .
+
+Setting up the Build System
+---------------------------
+
+If you are developing custom C++/CUDA code, it must be compiled.
+Note that if you’re interfacing with a Python library that already has bindings
+to precompiled C++/CUDA code, you might consider writing a custom Python operator
+instead (:ref:`python-custom-ops-tutorial`).
+
+Use `torch.utils.cpp_extension `_
+to compile custom C++/CUDA code for use with PyTorch
+C++ extensions may be built either "ahead of time" with setuptools, or "just in time"
+via `load_inline `_;
+we’ll focus on the "ahead of time" flavor.
+
+Using ``cpp_extension`` is as simple as writing the following ``setup.py``:
+
+.. code-block:: python
+
+ from setuptools import setup, Extension
+ from torch.utils import cpp_extension
+
+ setup(name="extension_cpp",
+ ext_modules=[
+ cpp_extension.CppExtension(
+ "extension_cpp",
+ ["muladd.cpp"],
+ # define Py_LIMITED_API with min version 3.9 to expose only the stable
+ # limited API subset from Python.h
+ extra_compile_args={"cxx": ["-DPy_LIMITED_API=0x03090000"]},
+ py_limited_api=True)], # Build 1 wheel across multiple Python versions
+ cmdclass={'build_ext': cpp_extension.BuildExtension},
+ options={"bdist_wheel": {"py_limited_api": "cp39"}} # 3.9 is minimum supported Python version
+ )
+
+If you need to compile CUDA code (for example, ``.cu`` files), then instead use
+`torch.utils.cpp_extension.CUDAExtension `_.
+Please see `extension-cpp `_ for an
+example for how this is set up.
+
+The above example represents what we refer to as a CPython agnostic wheel, meaning we are
+building a single wheel that can be run across multiple CPython versions (similar to pure
+Python packages). CPython agnosticism is desirable in minimizing the number of wheels your
+custom library needs to support and release. The minimum version we'd like to support is
+3.9, since it is the oldest supported version currently, so we use the corresponding hexcode
+and specifier throughout the setup code. We suggest building the extension in the same
+environment as the minimum CPython version you'd like to support to minimize unknown behavior,
+so, here, we build the extension in a CPython 3.9 environment. When built, this single wheel
+will be runnable in any CPython environment 3.9+. To achieve this, there are three key lines
+to note.
+
+The first is the specification of ``Py_LIMITED_API`` in ``extra_compile_args`` to the
+minimum CPython version you would like to support:
+
+.. code-block:: python
+
+ extra_compile_args={"cxx": ["-DPy_LIMITED_API=0x03090000"]},
+
+Defining the ``Py_LIMITED_API`` flag helps verify that the extension is in fact
+only using the `CPython Stable Limited API `_,
+which is a requirement for the building a CPython agnostic wheel. If this requirement
+is not met, it is possible to build a wheel that looks CPython agnostic but will crash,
+or worse, be silently incorrect, in another CPython environment. Take care to avoid
+using unstable CPython APIs, for example APIs from libtorch_python (in particular
+pytorch/python bindings,) and to only use APIs from libtorch (ATen objects, operators
+and the dispatcher). We strongly recommend defining the ``Py_LIMITED_API`` flag to
+help ascertain the extension is compliant and safe as a CPython agnostic wheel. Note that
+defining this flag is not a full guarantee that the built wheel is CPython agnostic, but
+it is better than the wild wild west. There are several caveats mentioned in the
+`Python docs `_,
+and you should test and verify yourself that the wheel is truly agnostic for the relevant
+CPython versions.
+
+The second and third lines specifying ``py_limited_api`` inform setuptools that you intend
+to build a CPython agnostic wheel and will influence the naming of the wheel accordingly:
+
+.. code-block:: python
+
+ setup(name="extension_cpp",
+ ext_modules=[
+ cpp_extension.CppExtension(
+ ...,
+ py_limited_api=True)], # Build 1 wheel across multiple Python versions
+ ...,
+ options={"bdist_wheel": {"py_limited_api": "cp39"}} # 3.9 is minimum supported Python version
+ )
+
+It is necessary to specify ``py_limited_api=True`` as an argument to CppExtension/
+CUDAExtension and also as an option to the ``"bdist_wheel"`` command with the minimal
+supported CPython version (in this case, 3.9). Consequently, the ``setup`` in our
+tutorial would build one properly named wheel that could be installed across multiple
+CPython versions ``>=3.9``.
+
+If your extension uses CPython APIs outside the stable limited set, then you cannot
+build a CPython agnostic wheel! You should build one wheel per CPython version instead,
+like so:
+
+.. code-block:: python
+
+ from setuptools import setup, Extension
+ from torch.utils import cpp_extension
+
+ setup(name="extension_cpp",
+ ext_modules=[
+ cpp_extension.CppExtension(
+ "extension_cpp",
+ ["muladd.cpp"])],
+ cmdclass={'build_ext': cpp_extension.BuildExtension},
+ )
+
+
+Defining the custom op and adding backend implementations
+---------------------------------------------------------
+First, let's write a C++ function that computes ``mymuladd``:
+
+.. code-block:: cpp
+
+ at::Tensor mymuladd_cpu(at::Tensor a, const at::Tensor& b, double c) {
+ TORCH_CHECK(a.sizes() == b.sizes());
+ TORCH_CHECK(a.dtype() == at::kFloat);
+ TORCH_CHECK(b.dtype() == at::kFloat);
+ TORCH_INTERNAL_ASSERT(a.device().type() == at::DeviceType::CPU);
+ TORCH_INTERNAL_ASSERT(b.device().type() == at::DeviceType::CPU);
+ at::Tensor a_contig = a.contiguous();
+ at::Tensor b_contig = b.contiguous();
+ at::Tensor result = torch::empty(a_contig.sizes(), a_contig.options());
+ const float* a_ptr = a_contig.data_ptr();
+ const float* b_ptr = b_contig.data_ptr();
+ float* result_ptr = result.data_ptr();
+ for (int64_t i = 0; i < result.numel(); i++) {
+ result_ptr[i] = a_ptr[i] * b_ptr[i] + c;
+ }
+ return result;
+ }
+
+In order to use this from PyTorch’s Python frontend, we need to register it
+as a PyTorch operator using the ``TORCH_LIBRARY`` API. This will automatically
+bind the operator to Python.
+
+Operator registration is a two step-process:
+
+- **Defining the operator** - This step ensures that PyTorch is aware of the new operator.
+- **Registering backend implementations** - In this step, implementations for various
+ backends, such as CPU and CUDA, are associated with the operator.
+
+Defining an operator
+^^^^^^^^^^^^^^^^^^^^
+To define an operator, follow these steps:
+
+1. select a namespace for an operator. We recommend the namespace be the name of your top-level
+ project; we’ll use "extension_cpp" in our tutorial.
+2. provide a schema string that specifies the input/output types of the operator and if an
+ input Tensors will be mutated. We support more types in addition to Tensor and float;
+ please see `The Custom Operators Manual `_
+ for more details.
+
+ * If you are authoring an operator that can mutate its input Tensors, please see here
+ (:ref:`mutable-ops`) for how to specify that.
+
+.. code-block:: cpp
+
+ TORCH_LIBRARY(extension_cpp, m) {
+ // Note that "float" in the schema corresponds to the C++ double type
+ // and the Python float type.
+ m.def("mymuladd(Tensor a, Tensor b, float c) -> Tensor");
+ }
+
+This makes the operator available from Python via ``torch.ops.extension_cpp.mymuladd``.
+
+Registering backend implementations for an operator
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Use ``TORCH_LIBRARY_IMPL`` to register a backend implementation for the operator.
+
+.. code-block:: cpp
+
+ TORCH_LIBRARY_IMPL(extension_cpp, CPU, m) {
+ m.impl("mymuladd", &mymuladd_cpu);
+ }
+
+If you also have a CUDA implementation of ``myaddmul``, you can register it
+in a separate ``TORCH_LIBRARY_IMPL`` block:
+
+.. code-block:: cpp
+
+ __global__ void muladd_kernel(int numel, const float* a, const float* b, float c, float* result) {
+ int idx = blockIdx.x * blockDim.x + threadIdx.x;
+ if (idx < numel) result[idx] = a[idx] * b[idx] + c;
+ }
+
+ at::Tensor mymuladd_cuda(const at::Tensor& a, const at::Tensor& b, double c) {
+ TORCH_CHECK(a.sizes() == b.sizes());
+ TORCH_CHECK(a.dtype() == at::kFloat);
+ TORCH_CHECK(b.dtype() == at::kFloat);
+ TORCH_INTERNAL_ASSERT(a.device().type() == at::DeviceType::CUDA);
+ TORCH_INTERNAL_ASSERT(b.device().type() == at::DeviceType::CUDA);
+ at::Tensor a_contig = a.contiguous();
+ at::Tensor b_contig = b.contiguous();
+ at::Tensor result = torch::empty(a_contig.sizes(), a_contig.options());
+ const float* a_ptr = a_contig.data_ptr();
+ const float* b_ptr = b_contig.data_ptr();
+ float* result_ptr = result.data_ptr();
+
+ int numel = a_contig.numel();
+ muladd_kernel<<<(numel+255)/256, 256>>>(numel, a_ptr, b_ptr, c, result_ptr);
+ return result;
+ }
+
+ TORCH_LIBRARY_IMPL(extension_cpp, CUDA, m) {
+ m.impl("mymuladd", &mymuladd_cuda);
+ }
+
+Adding ``torch.compile`` support for an operator
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+To add ``torch.compile`` support for an operator, we must add a FakeTensor kernel (also
+known as a "meta kernel" or "abstract impl"). FakeTensors are Tensors that have
+metadata (such as shape, dtype, device) but no data: the FakeTensor kernel for an
+operator specifies how to compute the metadata of output tensors given the metadata of input tensors.
+The FakeTensor kernel should return dummy Tensors of your choice with
+the correct Tensor metadata (shape/strides/``dtype``/device).
+
+We recommend that this be done from Python via the ``torch.library.register_fake`` API,
+though it is possible to do this from C++ as well (see
+`The Custom Operators Manual `_
+for more details).
+
+.. code-block:: python
+
+ # Important: the C++ custom operator definitions should be loaded first
+ # before calling ``torch.library`` APIs that add registrations for the
+ # C++ custom operator(s). The following import loads our
+ # C++ custom operator definitions.
+ # Note that if you are striving for Python agnosticism, you should use
+ # the ``load_library(...)`` API call instead. See the next section for
+ # more details.
+ from . import _C
+
+ @torch.library.register_fake("extension_cpp::mymuladd")
+ def _(a, b, c):
+ torch._check(a.shape == b.shape)
+ torch._check(a.dtype == torch.float)
+ torch._check(b.dtype == torch.float)
+ torch._check(a.device == b.device)
+ return torch.empty_like(a)
+
+Setting up hybrid Python/C++ registration
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+In this tutorial, we defined a custom operator in C++, added CPU/CUDA
+implementations in C++, and added ``FakeTensor`` kernels and backward formulas
+in Python. The order in which these registrations are loaded (or imported)
+matters (importing in the wrong order will lead to an error).
+
+To use the custom operator with hybrid Python/C++ registrations, we must
+first load the C++ library that holds the custom operator definition
+and then call the ``torch.library`` registration APIs. This can happen in
+three ways:
+
+
+1. The first way to load the C++ library that holds the custom operator definition
+ is to define a dummy Python module for _C. Then, in Python, when you import the
+ module with ``import _C``, the ``.so`` files corresponding to the extension will
+ be loaded and the ``TORCH_LIBRARY`` and ``TORCH_LIBRARY_IMPL`` static initializers
+ will run. One can create a dummy Python module with ``PYBIND11_MODULE`` like below,
+ but you will notice that this does not compile with ``Py_LIMITED_API``, because
+ ``pybind11`` does not promise to only use the stable limited CPython API! With
+ the below code, you sadly cannot build a CPython agnostic wheel for your extension!
+ (Foreshadowing: I wonder what the second way is ;) ).
+
+.. code-block:: cpp
+
+ // in, say, not_agnostic/csrc/extension_BAD.cpp
+ #include
+
+ PYBIND11_MODULE("_C", m) {}
+
+.. code-block:: python
+
+ # in, say, extension/__init__.py
+ from . import _C
+
+2. In this tutorial, because we value being able to build a single wheel across multiple
+ CPython versions, we will replace the unstable ``PYBIND11`` call with stable API calls.
+ The below code compiles with ``-DPy_LIMITED_API=0x03090000`` and successfully creates
+ a dummy Python module for our ``_C`` extension so that it can be imported from Python.
+ See `extension_cpp/__init__.py `_
+ and `extension_cpp/csrc/muladd.cpp `_
+ for more details:
+
+.. code-block:: cpp
+
+ #include
+
+ extern "C" {
+ /* Creates a dummy empty _C module that can be imported from Python.
+ The import from Python will load the .so consisting of this file
+ in this extension, so that the TORCH_LIBRARY static initializers
+ below are run. */
+ PyObject* PyInit__C(void)
+ {
+ static struct PyModuleDef module_def = {
+ PyModuleDef_HEAD_INIT,
+ "_C", /* name of module */
+ NULL, /* module documentation, may be NULL */
+ -1, /* size of per-interpreter state of the module,
+ or -1 if the module keeps state in global variables. */
+ NULL, /* methods */
+ };
+ return PyModule_Create(&module_def);
+ }
+ }
+
+.. code-block:: python
+
+ # in, say, extension/__init__.py
+ from . import _C
+
+3. If you want to avoid ``Python.h`` entirely in your C++ custom operator, you may
+ use ``torch.ops.load_library("/path/to/library.so")`` in Python to load the ``.so``
+ file(s) compiled from the extension. Note that, with this method, there is no ``_C``
+ Python module created for the extension so you cannot call ``import _C`` from Python.
+ Instead of relying on the import statement to trigger the custom operators to be
+ registered, ``torch.ops.load_library("/path/to/library.so")`` will do the trick.
+ The challenge then is shifted towards understanding where the ``.so`` files are
+ located so that you can load them, which is not always trivial:
+
+.. code-block:: python
+
+ import torch
+ from pathlib import Path
+
+ so_files = list(Path(__file__).parent.glob("_C*.so"))
+ assert (
+ len(so_files) == 1
+ ), f"Expected one _C*.so file, found {len(so_files)}"
+ torch.ops.load_library(so_files[0])
+
+ from . import ops
+
+
+Adding training (autograd) support for an operator
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Use ``torch.library.register_autograd`` to add training support for an operator. Prefer
+this over directly using Python ``torch.autograd.Function`` or C++ ``torch::autograd::Function``;
+you must use those in a very specific way to avoid silent incorrectness (see
+`The Custom Operators Manual `_
+for more details).
+
+.. code-block:: python
+
+ def _backward(ctx, grad):
+ a, b = ctx.saved_tensors
+ grad_a, grad_b = None, None
+ if ctx.needs_input_grad[0]:
+ grad_a = grad * b
+ if ctx.needs_input_grad[1]:
+ grad_b = grad * a
+ return grad_a, grad_b, None
+
+ def _setup_context(ctx, inputs, output):
+ a, b, c = inputs
+ saved_a, saved_b = None, None
+ if ctx.needs_input_grad[0]:
+ saved_b = b
+ if ctx.needs_input_grad[1]:
+ saved_a = a
+ ctx.save_for_backward(saved_a, saved_b)
+
+ # This code adds training support for the operator. You must provide us
+ # the backward formula for the operator and a `setup_context` function
+ # to save values to be used in the backward.
+ torch.library.register_autograd(
+ "extension_cpp::mymuladd", _backward, setup_context=_setup_context)
+
+Note that the backward must be a composition of PyTorch-understood operators.
+If you wish to use another custom C++ or CUDA kernel in your backwards pass,
+it must be wrapped into a custom operator.
+
+If we had our own custom ``mymul`` kernel, we would need to wrap it into a
+custom operator and then call that from the backward:
+
+.. code-block:: cpp
+
+ // New! a mymul_cpu kernel
+ at::Tensor mymul_cpu(const at::Tensor& a, const at::Tensor& b) {
+ TORCH_CHECK(a.sizes() == b.sizes());
+ TORCH_CHECK(a.dtype() == at::kFloat);
+ TORCH_CHECK(b.dtype() == at::kFloat);
+ TORCH_CHECK(a.device().type() == at::DeviceType::CPU);
+ TORCH_CHECK(b.device().type() == at::DeviceType::CPU);
+ at::Tensor a_contig = a.contiguous();
+ at::Tensor b_contig = b.contiguous();
+ at::Tensor result = torch::empty(a_contig.sizes(), a_contig.options());
+ const float* a_ptr = a_contig.data_ptr();
+ const float* b_ptr = b_contig.data_ptr();
+ float* result_ptr = result.data_ptr();
+ for (int64_t i = 0; i < result.numel(); i++) {
+ result_ptr[i] = a_ptr[i] * b_ptr[i];
+ }
+ return result;
+ }
+
+ TORCH_LIBRARY(extension_cpp, m) {
+ m.def("mymuladd(Tensor a, Tensor b, float c) -> Tensor");
+ // New! defining the mymul operator
+ m.def("mymul(Tensor a, Tensor b) -> Tensor");
+ }
+
+
+ TORCH_LIBRARY_IMPL(extension_cpp, CPU, m) {
+ m.impl("mymuladd", &mymuladd_cpu);
+ // New! registering the cpu kernel for the mymul operator
+ m.impl("mymul", &mymul_cpu);
+ }
+
+.. code-block:: python
+
+ def _backward(ctx, grad):
+ a, b = ctx.saved_tensors
+ grad_a, grad_b = None, None
+ if ctx.needs_input_grad[0]:
+ grad_a = torch.ops.extension_cpp.mymul.default(grad, b)
+ if ctx.needs_input_grad[1]:
+ grad_b = torch.ops.extension_cpp.mymul.default(grad, a)
+ return grad_a, grad_b, None
+
+
+ def _setup_context(ctx, inputs, output):
+ a, b, c = inputs
+ saved_a, saved_b = None, None
+ if ctx.needs_input_grad[0]:
+ saved_b = b
+ if ctx.needs_input_grad[1]:
+ saved_a = a
+ ctx.save_for_backward(saved_a, saved_b)
+
+
+ # This code adds training support for the operator. You must provide us
+ # the backward formula for the operator and a `setup_context` function
+ # to save values to be used in the backward.
+ torch.library.register_autograd(
+ "extension_cpp::mymuladd", _backward, setup_context=_setup_context)
+
+Testing an operator
+-------------------
+Use ``torch.library.opcheck`` to test that the custom op was registered correctly.
+Note that this function does not test that the gradients are mathematically correct
+-- plan to write separate tests for that, either manual ones or by using
+``torch.autograd.gradcheck``.
+
+.. code-block:: python
+
+ def sample_inputs(device, *, requires_grad=False):
+ def make_tensor(*size):
+ return torch.randn(size, device=device, requires_grad=requires_grad)
+
+ def make_nondiff_tensor(*size):
+ return torch.randn(size, device=device, requires_grad=False)
+
+ return [
+ [make_tensor(3), make_tensor(3), 1],
+ [make_tensor(20), make_tensor(20), 3.14],
+ [make_tensor(20), make_nondiff_tensor(20), -123],
+ [make_nondiff_tensor(2, 3), make_tensor(2, 3), -0.3],
+ ]
+
+ def reference_muladd(a, b, c):
+ return a * b + c
+
+ samples = sample_inputs(device, requires_grad=True)
+ samples.extend(sample_inputs(device, requires_grad=False))
+ for args in samples:
+ # Correctness test
+ result = torch.ops.extension_cpp.mymuladd(*args)
+ expected = reference_muladd(*args)
+ torch.testing.assert_close(result, expected)
+
+ # Use opcheck to check for incorrect usage of operator registration APIs
+ torch.library.opcheck(torch.ops.extension_cpp.mymuladd.default, args)
+
+.. _mutable-ops:
+
+Creating mutable operators
+--------------------------
+You may wish to author a custom operator that mutates its inputs. Use ``Tensor(a!)``
+to specify each mutable Tensor in the schema; otherwise, there will be undefined
+behavior. If there are multiple mutated Tensors, use different names (for example, ``Tensor(a!)``,
+``Tensor(b!)``, ``Tensor(c!)``) for each mutable Tensor.
+
+Let's author a ``myadd_out(a, b, out)`` operator, which writes the contents of ``a+b`` into ``out``.
+
+.. code-block:: cpp
+
+ // An example of an operator that mutates one of its inputs.
+ void myadd_out_cpu(const at::Tensor& a, const at::Tensor& b, at::Tensor& out) {
+ TORCH_CHECK(a.sizes() == b.sizes());
+ TORCH_CHECK(b.sizes() == out.sizes());
+ TORCH_CHECK(a.dtype() == at::kFloat);
+ TORCH_CHECK(b.dtype() == at::kFloat);
+ TORCH_CHECK(out.dtype() == at::kFloat);
+ TORCH_CHECK(out.is_contiguous());
+ TORCH_INTERNAL_ASSERT(a.device().type() == at::DeviceType::CPU);
+ TORCH_INTERNAL_ASSERT(b.device().type() == at::DeviceType::CPU);
+ TORCH_INTERNAL_ASSERT(out.device().type() == at::DeviceType::CPU);
+ at::Tensor a_contig = a.contiguous();
+ at::Tensor b_contig = b.contiguous();
+ const float* a_ptr = a_contig.data_ptr();
+ const float* b_ptr = b_contig.data_ptr();
+ float* result_ptr = out.data_ptr();
+ for (int64_t i = 0; i < out.numel(); i++) {
+ result_ptr[i] = a_ptr[i] + b_ptr[i];
+ }
+ }
+
+When defining the operator, we must specify that it mutates the out Tensor in the schema:
+
+.. code-block:: cpp
+
+ TORCH_LIBRARY(extension_cpp, m) {
+ m.def("mymuladd(Tensor a, Tensor b, float c) -> Tensor");
+ m.def("mymul(Tensor a, Tensor b) -> Tensor");
+ // New!
+ m.def("myadd_out(Tensor a, Tensor b, Tensor(a!) out) -> ()");
+ }
+
+ TORCH_LIBRARY_IMPL(extension_cpp, CPU, m) {
+ m.impl("mymuladd", &mymuladd_cpu);
+ m.impl("mymul", &mymul_cpu);
+ // New!
+ m.impl("myadd_out", &myadd_out_cpu);
+ }
+
+.. note::
+
+ Do not return any mutated Tensors as outputs of the operator as this will
+ cause incompatibility with PyTorch subsystems like ``torch.compile``.
+
+Conclusion
+----------
+In this tutorial, we went over the recommended approach to integrating Custom C++
+and CUDA operators with PyTorch. The ``TORCH_LIBRARY/torch.library`` APIs are fairly
+low-level. For more information about how to use the API, see
+`The Custom Operators Manual `_.
diff --git a/advanced_source/cpp_custom_ops_sycl.rst b/advanced_source/cpp_custom_ops_sycl.rst
new file mode 100644
index 00000000000..3b3ad069b58
--- /dev/null
+++ b/advanced_source/cpp_custom_ops_sycl.rst
@@ -0,0 +1,274 @@
+.. _cpp-custom-ops-tutorial-sycl:
+
+Custom SYCL Operators
+=====================
+
+.. grid:: 2
+
+ .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn
+ :class-card: card-prerequisites
+
+ * How to integrate custom operators written in SYCL with PyTorch
+
+ .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites
+ :class-card: card-prerequisites
+
+ * PyTorch 2.8 or later
+ * Basic understanding of SYCL programming
+
+.. note::
+
+ ``SYCL`` serves as the backend programming language for Intel GPUs (device label ``xpu``). For configuration details, see:
+ `Getting Started on Intel GPUs `_. The Intel Compiler, which comes bundled with Intel Deep Learning Essentials, handles ``SYCL`` compilation. Ensure you install and activate the compiler environment prior to executing the code examples in this tutorial.
+
+PyTorch offers a large library of operators that work on Tensors (e.g. torch.add, torch.sum, etc).
+However, you may wish to bring a new custom operator to PyTorch. This tutorial demonstrates the
+best path to authoring a custom operator written in SYCL. Tutorials for C++ and CUDA operators are available in the :ref:`cpp-custom-ops-tutorial`.
+
+Follow the structure to create a custom SYCL operator:
+
+.. code-block:: text
+
+ sycl_example/
+ ├── setup.py
+ ├── sycl_extension
+ │ ├── __init__.py
+ │ ├── muladd.sycl
+ │ └── ops.py
+ └── test_sycl_extension.py
+
+Setting up the Build System
+---------------------------
+
+If you need to compile **SYCL** code (for example, ``.sycl`` files), use `torch.utils.cpp_extension.SyclExtension `_.
+The setup process is very similar to C++/CUDA, except the compilation arguments need to be adjusted for SYCL.
+
+Using ``sycl_extension`` is as straightforward as writing the following ``setup.py``:
+
+.. code-block:: python
+
+ import os
+ import torch
+ import glob
+ from setuptools import find_packages, setup
+ from torch.utils.cpp_extension import SyclExtension, BuildExtension
+
+ library_name = "sycl_extension"
+ py_limited_api = True
+ extra_compile_args = {
+ "cxx": ["-O3",
+ "-fdiagnostics-color=always",
+ "-DPy_LIMITED_API=0x03090000"],
+ "sycl": ["-O3" ]
+ }
+
+ assert(torch.xpu.is_available()), "XPU is not available, please check your environment"
+ # Source files collection
+ this_dir = os.path.dirname(os.path.curdir)
+ extensions_dir = os.path.join(this_dir, library_name)
+ sources = list(glob.glob(os.path.join(extensions_dir, "*.sycl")))
+ # Construct extension
+ ext_modules = [
+ SyclExtension(
+ f"{library_name}._C",
+ sources,
+ extra_compile_args=extra_compile_args,
+ py_limited_api=py_limited_api,
+ )
+ ]
+ setup(
+ name=library_name,
+ packages=find_packages(),
+ ext_modules=ext_modules,
+ install_requires=["torch"],
+ description="Simple Example of PyTorch Sycl extensions",
+ cmdclass={"build_ext": BuildExtension},
+ options={"bdist_wheel": {"py_limited_api": "cp39"}} if py_limited_api else {},
+ )
+
+
+Defining the custom op and adding backend implementations
+---------------------------------------------------------
+First, let's write a SYCL function that computes ``mymuladd``:
+
+In order to use this from PyTorch’s Python frontend, we need to register it
+as a PyTorch operator using the ``TORCH_LIBRARY`` API. This will automatically
+bind the operator to Python.
+
+
+If you also have a SYCL implementation of ``myaddmul``, you can also register it
+in a separate ``TORCH_LIBRARY_IMPL`` block:
+
+.. code-block:: cpp
+
+ #include
+ #include
+ #include
+ #include
+ #include
+
+ namespace sycl_extension {
+ // MulAdd Kernel: result = a * b + c
+ static void muladd_kernel(
+ int numel, const float* a, const float* b, float c, float* result,
+ const sycl::nd_item<1>& item) {
+ int idx = item.get_global_id(0);
+ if (idx < numel) {
+ result[idx] = a[idx] * b[idx] + c;
+ }
+ }
+
+ class MulAddKernelFunctor {
+ public:
+ MulAddKernelFunctor(int _numel, const float* _a, const float* _b, float _c, float* _result)
+ : numel(_numel), a(_a), b(_b), c(_c), result(_result) {}
+ void operator()(const sycl::nd_item<1>& item) const {
+ muladd_kernel(numel, a, b, c, result, item);
+ }
+
+ private:
+ int numel;
+ const float* a;
+ const float* b;
+ float c;
+ float* result;
+ };
+
+ at::Tensor mymuladd_xpu(const at::Tensor& a, const at::Tensor& b, double c) {
+ TORCH_CHECK(a.sizes() == b.sizes(), "a and b must have the same shape");
+ TORCH_CHECK(a.dtype() == at::kFloat, "a must be a float tensor");
+ TORCH_CHECK(b.dtype() == at::kFloat, "b must be a float tensor");
+ TORCH_CHECK(a.device().is_xpu(), "a must be an XPU tensor");
+ TORCH_CHECK(b.device().is_xpu(), "b must be an XPU tensor");
+
+ at::Tensor a_contig = a.contiguous();
+ at::Tensor b_contig = b.contiguous();
+ at::Tensor result = at::empty_like(a_contig);
+
+ const float* a_ptr = a_contig.data_ptr();
+ const float* b_ptr = b_contig.data_ptr();
+ float* res_ptr = result.data_ptr();
+ int numel = a_contig.numel();
+
+ sycl::queue& queue = c10::xpu::getCurrentXPUStream().queue();
+ constexpr int threads = 256;
+ int blocks = (numel + threads - 1) / threads;
+
+ queue.submit([&](sycl::handler& cgh) {
+ cgh.parallel_for(
+ sycl::nd_range<1>(blocks * threads, threads),
+ MulAddKernelFunctor(numel, a_ptr, b_ptr, static_cast(c), res_ptr)
+ );
+ });
+
+ return result;
+ }
+ // Defines the operators
+ TORCH_LIBRARY(sycl_extension, m) {
+ m.def("mymuladd(Tensor a, Tensor b, float c) -> Tensor");
+ }
+
+ // ==================================================
+ // Register SYCL Implementations to Torch Library
+ // ==================================================
+ TORCH_LIBRARY_IMPL(sycl_extension, XPU, m) {
+ m.impl("mymuladd", &mymuladd_xpu);
+ }
+
+ } // namespace sycl_extension
+
+
+
+Create a Python Interface
+-------------------------
+
+Create a Python interface for our operator in the ``sycl_extension/ops.py`` file:
+
+.. code-block:: python
+
+ import torch
+ from torch import Tensor
+ __all__ = ["mymuladd"]
+
+ def mymuladd(a: Tensor, b: Tensor, c: float) -> Tensor:
+ """Performs a * b + c in an efficient fused kernel"""
+ return torch.ops.sycl_extension.mymuladd.default(a, b, c)
+
+Initialize Package
+------------------
+
+Create ``sycl_extension/__init__.py`` file to make the package importable:
+
+.. code-block:: python
+
+ import ctypes
+ from pathlib import Path
+
+ import torch
+
+ current_dir = Path(__file__).parent.parent
+ build_dir = current_dir / "build"
+ so_files = list(build_dir.glob("**/*.so"))
+
+ assert len(so_files) == 1, f"Expected one _C*.so file, found {len(so_files)}"
+
+ with torch._ops.dl_open_guard():
+ loaded_lib = ctypes.CDLL(so_files[0])
+
+ from . import ops
+
+ __all__ = [
+ "loaded_lib",
+ "ops",
+ ]
+
+Testing SYCL extension operator
+-------------------
+
+Use simple test to verify that the operator works correctly.
+
+.. code-block:: python
+
+ import torch
+ from torch.testing._internal.common_utils import TestCase
+ import unittest
+ import sycl_extension
+
+ def reference_muladd(a, b, c):
+ return a * b + c
+
+ class TestMyMulAdd(TestCase):
+ def sample_inputs(self, device, *, requires_grad=False):
+ def make_tensor(*size):
+ return torch.randn(size, device=device, requires_grad=requires_grad)
+
+ def make_nondiff_tensor(*size):
+ return torch.randn(size, device=device, requires_grad=False)
+
+ return [
+ [make_tensor(3), make_tensor(3), 1],
+ [make_tensor(20), make_tensor(20), 3.14],
+ [make_tensor(20), make_nondiff_tensor(20), -123],
+ [make_nondiff_tensor(2, 3), make_tensor(2, 3), -0.3],
+ ]
+
+ def _test_correctness(self, device):
+ samples = self.sample_inputs(device)
+ for args in samples:
+ result = sycl_extension.ops.mymuladd(*args)
+ expected = reference_muladd(*args)
+ torch.testing.assert_close(result, expected)
+
+ @unittest.skipIf(not torch.xpu.is_available(), "requires Intel GPU")
+ def test_correctness_xpu(self):
+ self._test_correctness("xpu")
+
+ if __name__ == "__main__":
+ unittest.main()
+
+This test checks the correctness of the custom operator by comparing its output against a reference implementation.
+
+Conclusion
+----------
+
+In this tutorial, we demonstrated how to implement and compile custom SYCL operators for PyTorch. We specifically showcased an inference operation ``muladd``. For adding backward support or enabling torch.compile compatibility, please refer to :ref:`cpp-custom-ops-tutorial`.
diff --git a/advanced_source/cpp_export.rst b/advanced_source/cpp_export.rst
new file mode 100644
index 00000000000..56c4bcbaae7
--- /dev/null
+++ b/advanced_source/cpp_export.rst
@@ -0,0 +1,3 @@
+.. warning::
+ TorchScript is deprecated, please use
+ `torch.export `__ instead.
\ No newline at end of file
diff --git a/advanced_source/cpp_frontend.rst b/advanced_source/cpp_frontend.rst
new file mode 100644
index 00000000000..968afa01b23
--- /dev/null
+++ b/advanced_source/cpp_frontend.rst
@@ -0,0 +1,1325 @@
+.. _cpp-frontend-tutorial:
+
+Using the PyTorch C++ Frontend
+==============================
+
+**Author:** `Peter Goldsborough `_
+
+.. grid:: 2
+
+ .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn
+ :class-card: card-prerequisites
+
+ * How to build a C++ application that utilizes the PyTorch C++ frontend
+ * How to define and train neural networks from C++ using PyTorch abstractions
+
+ .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites
+ :class-card: card-prerequisites
+
+ * PyTorch 1.5 or later
+ * Basic understanding of C++ programming
+ * Basic Ubuntu Linux environment with CMake >= 3.5; similar commands will work in a MacOS / Windows environment
+ * (Optional) A CUDA-based GPU for the GPU training sections
+
+The PyTorch C++ frontend is a pure C++ interface to the PyTorch machine learning
+framework. While the primary interface to PyTorch naturally is Python, this
+Python API sits atop a substantial C++ codebase providing foundational data
+structures and functionality such as tensors and automatic differentiation. The
+C++ frontend exposes a pure C++17 API that extends this underlying C++ codebase
+with tools required for machine learning training and inference. This includes a
+built-in collection of common components for neural network modeling; an API to
+extend this collection with custom modules; a library of popular optimization
+algorithms such as stochastic gradient descent; a parallel data loader with an
+API to define and load datasets; serialization routines and more.
+
+This tutorial will walk you through an end-to-end example of training a model
+with the C++ frontend. Concretely, we will be training a `DCGAN
+`_ -- a kind of generative model -- to
+generate images of MNIST digits. While conceptually a simple example, it should
+be enough to give you a whirlwind overview of the PyTorch C++ frontend and wet
+your appetite for training more complex models. We will begin with some
+motivating words for why you would want to use the C++ frontend to begin with,
+and then dive straight into defining and training our model.
+
+.. tip::
+
+ Watch `this lightning talk from CppCon 2018
+ `_ for a quick (and humorous)
+ presentation on the C++ frontend.
+
+.. tip::
+
+ `This note `_ provides a sweeping
+ overview of the C++ frontend's components and design philosophy.
+
+.. tip::
+
+ Documentation for the PyTorch C++ ecosystem is available at
+ https://pytorch.org/cppdocs. There you can find high level descriptions as
+ well as API-level documentation.
+
+Motivation
+----------
+
+Before we embark on our exciting journey of GANs and MNIST digits, let's take a
+step back and discuss why you would want to use the C++ frontend instead of the
+Python one to begin with. We (the PyTorch team) created the C++ frontend to
+enable research in environments in which Python cannot be used, or is simply not
+the right tool for the job. Examples for such environments include:
+
+- **Low Latency Systems**: You may want to do reinforcement learning research in
+ a pure C++ game engine with high frames-per-second and low latency
+ requirements. Using a pure C++ library is a much better fit to such an
+ environment than a Python library. Python may not be tractable at all because
+ of the slowness of the Python interpreter.
+- **Highly Multithreaded Environments**: Due to the Global Interpreter Lock
+ (GIL), Python cannot run more than one system thread at a time.
+ Multiprocessing is an alternative, but not as scalable and has significant
+ shortcomings. C++ has no such constraints and threads are easy to use and
+ create. Models requiring heavy parallelization, like those used in `Deep
+ Neuroevolution `_, can benefit from
+ this.
+- **Existing C++ Codebases**: You may be the owner of an existing C++
+ application doing anything from serving web pages in a backend server to
+ rendering 3D graphics in photo editing software, and wish to integrate
+ machine learning methods into your system. The C++ frontend allows you to
+ remain in C++ and spare yourself the hassle of binding back and forth between
+ Python and C++, while retaining much of the flexibility and intuitiveness of
+ the traditional PyTorch (Python) experience.
+
+The C++ frontend is not intended to compete with the Python frontend. It is
+meant to complement it. We know researchers and engineers alike love PyTorch for
+its simplicity, flexibility and intuitive API. Our goal is to make sure you can
+take advantage of these core design principles in every possible environment,
+including the ones described above. If one of these scenarios describes your use
+case well, or if you are simply interested or curious, follow along as we
+explore the C++ frontend in detail in the following paragraphs.
+
+.. tip::
+
+ The C++ frontend tries to provide an API as close as possible to that of the
+ Python frontend. If you are experienced with the Python frontend and ever ask
+ yourself "how do I do X with the C++ frontend?", write your code the way you
+ would in Python, and more often than not the same functions and methods will
+ be available in C++ as in Python (just remember to replace dots with double
+ colons).
+
+Writing a Basic Application
+---------------------------
+
+Let's begin by writing a minimal C++ application to verify that we're on the
+same page regarding our setup and build environment. First, you will need to
+grab a copy of the *LibTorch* distribution -- our ready-built zip archive that
+packages all relevant headers, libraries and CMake build files required to use
+the C++ frontend. The LibTorch distribution is available for download on the
+`PyTorch website `_ for Linux, MacOS
+and Windows. The rest of this tutorial will assume a basic Ubuntu Linux
+environment, however you are free to follow along on MacOS or Windows too.
+
+.. tip::
+
+ The note on `Installing C++ Distributions of PyTorch
+ `_ describes the following steps
+ in more detail.
+
+.. tip::
+ On Windows, debug and release builds are not ABI-compatible. If you plan to
+ build your project in debug mode, please try the debug version of LibTorch.
+ Also, make sure you specify the correct configuration in the ``cmake --build .``
+ line below.
+
+The first step is to download the LibTorch distribution locally, via the link
+retrieved from the PyTorch website. For a vanilla Ubuntu Linux environment, this
+means running:
+
+.. code-block:: shell
+
+ # If you need e.g. CUDA 9.0 support, please replace "cpu" with "cu90" in the URL below.
+ wget https://download.pytorch.org/libtorch/nightly/cpu/libtorch-shared-with-deps-latest.zip
+ unzip libtorch-shared-with-deps-latest.zip
+
+Next, let's write a tiny C++ file called ``dcgan.cpp`` that includes
+``torch/torch.h`` and for now simply prints out a three by three identity
+matrix:
+
+.. code-block:: cpp
+
+ #include
+ #include
+
+ int main() {
+ torch::Tensor tensor = torch::eye(3);
+ std::cout << tensor << std::endl;
+ }
+
+To build this tiny application as well as our full-fledged training script later
+on we'll use this ``CMakeLists.txt`` file:
+
+.. code-block:: cmake
+
+ cmake_minimum_required(VERSION 3.5 FATAL_ERROR)
+ project(dcgan)
+
+ find_package(Torch REQUIRED)
+
+ add_executable(dcgan dcgan.cpp)
+ target_link_libraries(dcgan "${TORCH_LIBRARIES}")
+ set_property(TARGET dcgan PROPERTY CXX_STANDARD 17)
+
+.. note::
+
+ While CMake is the recommended build system for LibTorch, it is not a hard
+ requirement. You can also use Visual Studio project files, QMake, plain
+ Makefiles or any other build environment you feel comfortable with. However,
+ we do not provide out-of-the-box support for this.
+
+Make note of line 4 in the above CMake file: ``find_package(Torch REQUIRED)``.
+This instructs CMake to find the build configuration for the LibTorch library.
+In order for CMake to know *where* to find these files, we must set the
+``CMAKE_PREFIX_PATH`` when invoking ``cmake``. Before we do this, let's agree on
+the following directory structure for our ``dcgan`` application:
+
+.. code-block:: shell
+
+ dcgan/
+ CMakeLists.txt
+ dcgan.cpp
+
+Further, I will refer to the path to the unzipped LibTorch distribution as
+``/path/to/libtorch``. Note that this **must be an absolute path**. In
+particular, setting ``CMAKE_PREFIX_PATH`` to something like ``../../libtorch``
+will break in unexpected ways. Instead, write ``$PWD/../../libtorch`` to get the
+corresponding absolute path. Now, we are ready to build our application:
+
+.. code-block:: shell
+
+ root@fa350df05ecf:/home# mkdir build
+ root@fa350df05ecf:/home# cd build
+ root@fa350df05ecf:/home/build# cmake -DCMAKE_PREFIX_PATH=/path/to/libtorch ..
+ -- The C compiler identification is GNU 5.4.0
+ -- The CXX compiler identification is GNU 5.4.0
+ -- Check for working C compiler: /usr/bin/cc
+ -- Check for working C compiler: /usr/bin/cc -- works
+ -- Detecting C compiler ABI info
+ -- Detecting C compiler ABI info - done
+ -- Detecting C compile features
+ -- Detecting C compile features - done
+ -- Check for working CXX compiler: /usr/bin/c++
+ -- Check for working CXX compiler: /usr/bin/c++ -- works
+ -- Detecting CXX compiler ABI info
+ -- Detecting CXX compiler ABI info - done
+ -- Detecting CXX compile features
+ -- Detecting CXX compile features - done
+ -- Looking for pthread.h
+ -- Looking for pthread.h - found
+ -- Looking for pthread_create
+ -- Looking for pthread_create - not found
+ -- Looking for pthread_create in pthreads
+ -- Looking for pthread_create in pthreads - not found
+ -- Looking for pthread_create in pthread
+ -- Looking for pthread_create in pthread - found
+ -- Found Threads: TRUE
+ -- Found torch: /path/to/libtorch/lib/libtorch.so
+ -- Configuring done
+ -- Generating done
+ -- Build files have been written to: /home/build
+ root@fa350df05ecf:/home/build# cmake --build . --config Release
+ Scanning dependencies of target dcgan
+ [ 50%] Building CXX object CMakeFiles/dcgan.dir/dcgan.cpp.o
+ [100%] Linking CXX executable dcgan
+ [100%] Built target dcgan
+
+Above, we first created a ``build`` folder inside of our ``dcgan`` directory,
+entered this folder, ran the ``cmake`` command to generate the necessary build
+(Make) files and finally compiled the project successfully by running ``cmake
+--build . --config Release``. We are now all set to execute our minimal binary
+and complete this section on basic project configuration:
+
+.. code-block:: shell
+
+ root@fa350df05ecf:/home/build# ./dcgan
+ 1 0 0
+ 0 1 0
+ 0 0 1
+ [ Variable[CPUFloatType]{3,3} ]
+
+Looks like an identity matrix to me!
+
+Defining the Neural Network Models
+----------------------------------
+
+Now that we have our basic environment configured, we can dive into the much
+more interesting parts of this tutorial. First, we will discuss how to define
+and interact with modules in the C++ frontend. We'll begin with basic,
+small-scale example modules and then implement a full-fledged GAN using the
+extensive library of built-in modules provided by the C++ frontend.
+
+Module API Basics
+^^^^^^^^^^^^^^^^^
+
+In line with the Python interface, neural networks based on the C++ frontend are
+composed of reusable building blocks called *modules*. There is a base module
+class from which all other modules are derived. In Python, this class is
+``torch.nn.Module`` and in C++ it is ``torch::nn::Module``. Besides a
+``forward()`` method that implements the algorithm the module encapsulates, a
+module usually contains any of three kinds of sub-objects: parameters, buffers
+and submodules.
+
+Parameters and buffers store state in form of tensors. Parameters record
+gradients, while buffers do not. Parameters are usually the trainable weights of
+your neural network. Examples of buffers include means and variances for batch
+normalization. In order to re-use particular blocks of logic and state, the
+PyTorch API allows modules to be nested. A nested module is termed a
+*submodule*.
+
+Parameters, buffers and submodules must be explicitly registered. Once
+registered, methods like ``parameters()`` or ``buffers()`` can be used to
+retrieve a container of all parameters in the entire (nested) module hierarchy.
+Similarly, methods like ``to(...)``, where e.g. ``to(torch::kCUDA)`` moves all
+parameters and buffers from CPU to CUDA memory, work on the entire module
+hierarchy.
+
+Defining a Module and Registering Parameters
+********************************************
+
+To put these words into code, let's consider this simple module written in the
+Python interface:
+
+.. code-block:: python
+
+ import torch
+
+ class Net(torch.nn.Module):
+ def __init__(self, N, M):
+ super(Net, self).__init__()
+ self.W = torch.nn.Parameter(torch.randn(N, M))
+ self.b = torch.nn.Parameter(torch.randn(M))
+
+ def forward(self, input):
+ return torch.addmm(self.b, input, self.W)
+
+
+In C++, it would look like this:
+
+.. code-block:: cpp
+
+ #include
+
+ struct Net : torch::nn::Module {
+ Net(int64_t N, int64_t M) {
+ W = register_parameter("W", torch::randn({N, M}));
+ b = register_parameter("b", torch::randn(M));
+ }
+ torch::Tensor forward(torch::Tensor input) {
+ return torch::addmm(b, input, W);
+ }
+ torch::Tensor W, b;
+ };
+
+Just like in Python, we define a class called ``Net`` (for simplicity here a
+``struct`` instead of a ``class``) and derive it from the module base class.
+Inside the constructor, we create tensors using ``torch::randn`` just like we
+use ``torch.randn`` in Python. One interesting difference is how we register the
+parameters. In Python, we wrap the tensors with the ``torch.nn.Parameter``
+class, while in C++ we have to pass the tensor through the
+``register_parameter`` method instead. The reason for this is that the Python
+API can detect that an attribute is of type ``torch.nn.Parameter`` and
+automatically registers such tensors. In C++, reflection is very limited, so a
+more traditional (and less magical) approach is provided.
+
+Registering Submodules and Traversing the Module Hierarchy
+**********************************************************
+
+In the same way we can register parameters, we can also register submodules. In
+Python, submodules are automatically detected and registered when they are
+assigned as an attribute of a module:
+
+.. code-block:: python
+
+ class Net(torch.nn.Module):
+ def __init__(self, N, M):
+ super(Net, self).__init__()
+ # Registered as a submodule behind the scenes
+ self.linear = torch.nn.Linear(N, M)
+ self.another_bias = torch.nn.Parameter(torch.rand(M))
+
+ def forward(self, input):
+ return self.linear(input) + self.another_bias
+
+This allows, for example, to use the ``parameters()`` method to recursively
+access all parameters in our module hierarchy:
+
+.. code-block:: python
+
+ >>> net = Net(4, 5)
+ >>> print(list(net.parameters()))
+ [Parameter containing:
+ tensor([0.0808, 0.8613, 0.2017, 0.5206, 0.5353], requires_grad=True), Parameter containing:
+ tensor([[-0.3740, -0.0976, -0.4786, -0.4928],
+ [-0.1434, 0.4713, 0.1735, -0.3293],
+ [-0.3467, -0.3858, 0.1980, 0.1986],
+ [-0.1975, 0.4278, -0.1831, -0.2709],
+ [ 0.3730, 0.4307, 0.3236, -0.0629]], requires_grad=True), Parameter containing:
+ tensor([ 0.2038, 0.4638, -0.2023, 0.1230, -0.0516], requires_grad=True)]
+
+To register submodules in C++, use the aptly named ``register_module()`` method
+to register a module like ``torch::nn::Linear``:
+
+.. code-block:: cpp
+
+ struct Net : torch::nn::Module {
+ Net(int64_t N, int64_t M)
+ : linear(register_module("linear", torch::nn::Linear(N, M))) {
+ another_bias = register_parameter("b", torch::randn(M));
+ }
+ torch::Tensor forward(torch::Tensor input) {
+ return linear(input) + another_bias;
+ }
+ torch::nn::Linear linear;
+ torch::Tensor another_bias;
+ };
+
+.. tip::
+
+ You can find the full list of available built-in modules like
+ ``torch::nn::Linear``, ``torch::nn::Dropout`` or ``torch::nn::Conv2d`` in the
+ documentation of the ``torch::nn`` namespace `here
+ `_.
+
+One subtlety about the above code is why the submodule was created in the
+constructor's initializer list, while the parameter was created inside the
+constructor body. There is a good reason for this, which we'll touch upon this
+in the section on the C++ frontend's *ownership model* further below. The end
+result, however, is that we can recursively access our module tree's parameters
+just like in Python. Calling ``parameters()`` returns a
+``std::vector``, which we can iterate over:
+
+.. code-block:: cpp
+
+ int main() {
+ Net net(4, 5);
+ for (const auto& p : net.parameters()) {
+ std::cout << p << std::endl;
+ }
+ }
+
+which prints:
+
+.. code-block:: shell
+
+ root@fa350df05ecf:/home/build# ./dcgan
+ 0.0345
+ 1.4456
+ -0.6313
+ -0.3585
+ -0.4008
+ [ Variable[CPUFloatType]{5} ]
+ -0.1647 0.2891 0.0527 -0.0354
+ 0.3084 0.2025 0.0343 0.1824
+ -0.4630 -0.2862 0.2500 -0.0420
+ 0.3679 -0.1482 -0.0460 0.1967
+ 0.2132 -0.1992 0.4257 0.0739
+ [ Variable[CPUFloatType]{5,4} ]
+ 0.01 *
+ 3.6861
+ -10.1166
+ -45.0333
+ 7.9983
+ -20.0705
+ [ Variable[CPUFloatType]{5} ]
+
+with three parameters just like in Python. To also see the names of these
+parameters, the C++ API provides a ``named_parameters()`` method which returns
+an ``OrderedDict`` just like in Python:
+
+.. code-block:: cpp
+
+ Net net(4, 5);
+ for (const auto& pair : net.named_parameters()) {
+ std::cout << pair.key() << ": " << pair.value() << std::endl;
+ }
+
+which we can execute again to see the output:
+
+.. code-block:: shell
+
+ root@fa350df05ecf:/home/build# make && ./dcgan 11:13:48
+ Scanning dependencies of target dcgan
+ [ 50%] Building CXX object CMakeFiles/dcgan.dir/dcgan.cpp.o
+ [100%] Linking CXX executable dcgan
+ [100%] Built target dcgan
+ b: -0.1863
+ -0.8611
+ -0.1228
+ 1.3269
+ 0.9858
+ [ Variable[CPUFloatType]{5} ]
+ linear.weight: 0.0339 0.2484 0.2035 -0.2103
+ -0.0715 -0.2975 -0.4350 -0.1878
+ -0.3616 0.1050 -0.4982 0.0335
+ -0.1605 0.4963 0.4099 -0.2883
+ 0.1818 -0.3447 -0.1501 -0.0215
+ [ Variable[CPUFloatType]{5,4} ]
+ linear.bias: -0.0250
+ 0.0408
+ 0.3756
+ -0.2149
+ -0.3636
+ [ Variable[CPUFloatType]{5} ]
+
+.. note::
+
+ `The documentation
+ `_
+ for ``torch::nn::Module`` contains the full list of methods that operate on
+ the module hierarchy.
+
+Running the Network in Forward Mode
+***********************************
+
+To execute the network in C++, we simply call the ``forward()`` method we
+defined ourselves:
+
+.. code-block:: cpp
+
+ int main() {
+ Net net(4, 5);
+ std::cout << net.forward(torch::ones({2, 4})) << std::endl;
+ }
+
+which prints something like:
+
+.. code-block:: shell
+
+ root@fa350df05ecf:/home/build# ./dcgan
+ 0.8559 1.1572 2.1069 -0.1247 0.8060
+ 0.8559 1.1572 2.1069 -0.1247 0.8060
+ [ Variable[CPUFloatType]{2,5} ]
+
+Module Ownership
+****************
+
+At this point, we know how to define a module in C++, register parameters,
+register submodules, traverse the module hierarchy via methods like
+``parameters()`` and finally run the module's ``forward()`` method. While there
+are many more methods, classes and topics to devour in the C++ API, I will refer
+you to `docs `_ for
+the full menu. We'll also touch upon some more concepts as we implement the
+DCGAN model and end-to-end training pipeline in just a second. Before we do so,
+let me briefly touch upon the *ownership model* the C++ frontend provides for
+subclasses of ``torch::nn::Module``.
+
+For this discussion, the ownership model refers to the way modules are stored
+and passed around -- which determines who or what *owns* a particular module
+instance. In Python, objects are always allocated dynamically (on the heap) and
+have reference semantics. This is very easy to work with and straightforward to
+understand. In fact, in Python, you can largely forget about where objects live
+and how they get referenced, and focus on getting things done.
+
+C++, being a lower level language, provides more options in this realm. This
+increases complexity and heavily influences the design and ergonomics of the C++
+frontend. In particular, for modules in the C++ frontend, we have the option of
+using *either* value semantics *or* reference semantics. The first case is the
+simplest and was shown in the examples thus far: module objects are allocated on
+the stack and when passed to a function, can be either copied, moved (with
+``std::move``) or taken by reference or by pointer:
+
+.. code-block:: cpp
+
+ struct Net : torch::nn::Module { };
+
+ void a(Net net) { }
+ void b(Net& net) { }
+ void c(Net* net) { }
+
+ int main() {
+ Net net;
+ a(net);
+ a(std::move(net));
+ b(net);
+ c(&net);
+ }
+
+For the second case -- reference semantics -- we can use ``std::shared_ptr``.
+The advantage of reference semantics is that, like in Python, it reduces the
+cognitive overhead of thinking about how modules must be passed to functions and
+how arguments must be declared (assuming you use ``shared_ptr`` everywhere).
+
+.. code-block:: cpp
+
+ struct Net : torch::nn::Module {};
+
+ void a(std::shared_ptr net) { }
+
+ int main() {
+ auto net = std::make_shared();
+ a(net);
+ }
+
+In our experience, researchers coming from dynamic languages greatly prefer
+reference semantics over value semantics, even though the latter is more
+"native" to C++. It is also important to note that ``torch::nn::Module``'s
+design, in order to stay close to the ergonomics of the Python API, relies on
+shared ownership. For example, take our earlier (here shortened) definition of
+``Net``:
+
+.. code-block:: cpp
+
+ struct Net : torch::nn::Module {
+ Net(int64_t N, int64_t M)
+ : linear(register_module("linear", torch::nn::Linear(N, M)))
+ { }
+ torch::nn::Linear linear;
+ };
+
+In order to use the ``linear`` submodule, we want to store it directly in our
+class. However, we also want the module base class to know about and have access
+to this submodule. For this, it must store a reference to this submodule. At
+this point, we have already arrived at the need for shared ownership. Both the
+``torch::nn::Module`` class and concrete ``Net`` class require a reference to
+the submodule. For this reason, the base class stores modules as
+``shared_ptr``\s, and therefore the concrete class must too.
+
+But wait! I don't see any mention of ``shared_ptr`` in the above code! Why is
+that? Well, because ``std::shared_ptr`` is a hell of a lot to type. To
+keep our researchers productive, we came up with an elaborate scheme to hide the
+mention of ``shared_ptr`` -- a benefit usually reserved for value semantics --
+while retaining reference semantics. To understand how this works, we can take a
+look at a simplified definition of the ``torch::nn::Linear`` module in the core
+library (the full definition is `here
+`_):
+
+.. code-block:: cpp
+
+ struct LinearImpl : torch::nn::Module {
+ LinearImpl(int64_t in, int64_t out);
+
+ Tensor forward(const Tensor& input);
+
+ Tensor weight, bias;
+ };
+
+ TORCH_MODULE(Linear);
+
+In brief: the module is not called ``Linear``, but ``LinearImpl``. A macro,
+``TORCH_MODULE`` then defines the actual ``Linear`` class. This "generated"
+class is effectively a wrapper over a ``std::shared_ptr``. It is a
+wrapper instead of a simple typedef so that, among other things, constructors
+still work as expected, i.e. you can still write ``torch::nn::Linear(3, 4)``
+instead of ``std::make_shared(3, 4)``. We call the class created by
+the macro the module *holder*. Like with (shared) pointers, you access the
+underlying object using the arrow operator (like ``model->forward(...)``). The
+end result is an ownership model that resembles that of the Python API quite
+closely. Reference semantics become the default, but without the extra typing of
+``std::shared_ptr`` or ``std::make_shared``. For our ``Net``, using the module
+holder API looks like this:
+
+.. code-block:: cpp
+
+ struct NetImpl : torch::nn::Module {};
+ TORCH_MODULE(Net);
+
+ void a(Net net) { }
+
+ int main() {
+ Net net;
+ a(net);
+ }
+
+There is one subtle issue that deserves mention here. A default constructed
+``std::shared_ptr`` is "empty", i.e. contains a null pointer. What is a default
+constructed ``Linear`` or ``Net``? Well, it's a tricky choice. We could say it
+should be an empty (null) ``std::shared_ptr``. However, recall that
+``Linear(3, 4)`` is the same as ``std::make_shared(3, 4)``. This
+means that if we had decided that ``Linear linear;`` should be a null pointer,
+then there would be no way to construct a module that does not take any
+constructor arguments, or defaults all of them. For this reason, in the current
+API, a default constructed module holder (like ``Linear()``) invokes the
+default constructor of the underlying module (``LinearImpl()``). If the
+underlying module does not have a default constructor, you get a compiler error.
+To instead construct the empty holder, you can pass ``nullptr`` to the
+constructor of the holder.
+
+In practice, this means you can use submodules either like shown earlier, where
+the module is registered and constructed in the *initializer list*:
+
+.. code-block:: cpp
+
+ struct Net : torch::nn::Module {
+ Net(int64_t N, int64_t M)
+ : linear(register_module("linear", torch::nn::Linear(N, M)))
+ { }
+ torch::nn::Linear linear;
+ };
+
+or you can first construct the holder with a null pointer and then assign to it
+in the constructor (more familiar for Pythonistas):
+
+.. code-block:: cpp
+
+ struct Net : torch::nn::Module {
+ Net(int64_t N, int64_t M) {
+ linear = register_module("linear", torch::nn::Linear(N, M));
+ }
+ torch::nn::Linear linear{nullptr}; // construct an empty holder
+ };
+
+In conclusion: Which ownership model -- which semantics -- should you use? The
+C++ frontend's API best supports the ownership model provided by module holders.
+The only disadvantage of this mechanism is one extra line of boilerplate below
+the module declaration. That said, the simplest model is still the value
+semantics model shown in the introduction to C++ modules. For small, simple
+scripts, you may get away with it too. But you'll find sooner or later that, for
+technical reasons, it is not always supported. For example, the serialization
+API (``torch::save`` and ``torch::load``) only supports module holders (or plain
+``shared_ptr``). As such, the module holder API is the recommended way of
+defining modules with the C++ frontend, and we will use this API in this
+tutorial henceforth.
+
+Defining the DCGAN Modules
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+We now have the necessary background and introduction to define the modules for
+the machine learning task we want to solve in this post. To recap: our task is
+to generate images of digits from the `MNIST dataset
+`_. We want to use a `generative adversarial
+network (GAN)
+`_ to solve
+this task. In particular, we'll use a `DCGAN architecture
+`_ -- one of the first and simplest of its
+kind, but entirely sufficient for this task.
+
+.. tip::
+
+ You can find the full source code presented in this tutorial `in this
+ repository `_.
+
+What was a GAN aGAN?
+********************
+
+A GAN consists of two distinct neural network models: a *generator* and a
+*discriminator*. The generator receives samples from a noise distribution, and
+its aim is to transform each noise sample into an image that resembles those of
+a target distribution -- in our case the MNIST dataset. The discriminator in
+turn receives either *real* images from the MNIST dataset, or *fake* images from
+the generator. It is asked to emit a probability judging how real (closer to
+``1``) or fake (closer to ``0``) a particular image is. Feedback from the
+discriminator on how real the images produced by the generator are is used to
+train the generator. Feedback on how good of an eye for authenticity the
+discriminator has is used to optimize the discriminator. In theory, a delicate
+balance between the generator and discriminator makes them improve in tandem,
+leading to the generator producing images indistinguishable from the target
+distribution, fooling the discriminator's (by then) excellent eye into emitting
+a probability of ``0.5`` for both real and fake images. For us, the end result
+is a machine that receives noise as input and generates realistic images of
+digits as its output.
+
+The Generator Module
+********************
+
+We begin by defining the generator module, which consists of a series of
+transposed 2D convolutions, batch normalizations and ReLU activation units.
+We explicitly pass inputs (in a functional way) between modules in the
+``forward()`` method of a module we define ourselves:
+
+.. code-block:: cpp
+
+ struct DCGANGeneratorImpl : nn::Module {
+ DCGANGeneratorImpl(int kNoiseSize)
+ : conv1(nn::ConvTranspose2dOptions(kNoiseSize, 256, 4)
+ .bias(false)),
+ batch_norm1(256),
+ conv2(nn::ConvTranspose2dOptions(256, 128, 3)
+ .stride(2)
+ .padding(1)
+ .bias(false)),
+ batch_norm2(128),
+ conv3(nn::ConvTranspose2dOptions(128, 64, 4)
+ .stride(2)
+ .padding(1)
+ .bias(false)),
+ batch_norm3(64),
+ conv4(nn::ConvTranspose2dOptions(64, 1, 4)
+ .stride(2)
+ .padding(1)
+ .bias(false))
+ {
+ // register_module() is needed if we want to use the parameters() method later on
+ register_module("conv1", conv1);
+ register_module("conv2", conv2);
+ register_module("conv3", conv3);
+ register_module("conv4", conv4);
+ register_module("batch_norm1", batch_norm1);
+ register_module("batch_norm2", batch_norm2);
+ register_module("batch_norm3", batch_norm3);
+ }
+
+ torch::Tensor forward(torch::Tensor x) {
+ x = torch::relu(batch_norm1(conv1(x)));
+ x = torch::relu(batch_norm2(conv2(x)));
+ x = torch::relu(batch_norm3(conv3(x)));
+ x = torch::tanh(conv4(x));
+ return x;
+ }
+
+ nn::ConvTranspose2d conv1, conv2, conv3, conv4;
+ nn::BatchNorm2d batch_norm1, batch_norm2, batch_norm3;
+ };
+ TORCH_MODULE(DCGANGenerator);
+
+ DCGANGenerator generator(kNoiseSize);
+
+We can now invoke ``forward()`` on the ``DCGANGenerator`` to map a noise sample to an image.
+
+The particular modules chosen, like ``nn::ConvTranspose2d`` and ``nn::BatchNorm2d``,
+follows the structure outlined earlier. The ``kNoiseSize`` constant determines
+the size of the input noise vector and is set to ``100``. Hyperparameters were,
+of course, found via grad student descent.
+
+.. attention::
+
+ No grad students were harmed in the discovery of hyperparameters. They were
+ fed Soylent regularly.
+
+.. note::
+
+ A brief word on the way options are passed to built-in modules like ``Conv2d``
+ in the C++ frontend: Every module has some required options, like the number
+ of features for ``BatchNorm2d``. If you only need to configure the required
+ options, you can pass them directly to the module's constructor, like
+ ``BatchNorm2d(128)`` or ``Dropout(0.5)`` or ``Conv2d(8, 4, 2)`` (for input
+ channel count, output channel count, and kernel size). If, however, you need
+ to modify other options, which are normally defaulted, such as ``bias``
+ for ``Conv2d``, you need to construct and pass an *options* object. Every
+ module in the C++ frontend has an associated options struct, called
+ ``ModuleOptions`` where ``Module`` is the name of the module, like
+ ``LinearOptions`` for ``Linear``. This is what we do for the ``Conv2d``
+ modules above.
+
+The Discriminator Module
+************************
+
+The discriminator is similarly a sequence of convolutions, batch normalizations
+and activations. However, the convolutions are now regular ones instead of
+transposed, and we use a leaky ReLU with an alpha value of 0.2 instead of a
+vanilla ReLU. Also, the final activation becomes a Sigmoid, which squashes
+values into a range between 0 and 1. We can then interpret these squashed values
+as the probabilities the discriminator assigns to images being real.
+
+To build the discriminator, we will try something different: a `Sequential` module.
+Like in Python, PyTorch here provides two APIs for model definition: a functional one
+where inputs are passed through successive functions (e.g. the generator module example),
+and a more object-oriented one where we build a `Sequential` module containing the
+entire model as submodules. Using `Sequential`, the discriminator would look like:
+
+.. code-block:: cpp
+
+ nn::Sequential discriminator(
+ // Layer 1
+ nn::Conv2d(
+ nn::Conv2dOptions(1, 64, 4).stride(2).padding(1).bias(false)),
+ nn::LeakyReLU(nn::LeakyReLUOptions().negative_slope(0.2)),
+ // Layer 2
+ nn::Conv2d(
+ nn::Conv2dOptions(64, 128, 4).stride(2).padding(1).bias(false)),
+ nn::BatchNorm2d(128),
+ nn::LeakyReLU(nn::LeakyReLUOptions().negative_slope(0.2)),
+ // Layer 3
+ nn::Conv2d(
+ nn::Conv2dOptions(128, 256, 4).stride(2).padding(1).bias(false)),
+ nn::BatchNorm2d(256),
+ nn::LeakyReLU(nn::LeakyReLUOptions().negative_slope(0.2)),
+ // Layer 4
+ nn::Conv2d(
+ nn::Conv2dOptions(256, 1, 3).stride(1).padding(0).bias(false)),
+ nn::Sigmoid());
+
+.. tip::
+
+ A ``Sequential`` module simply performs function composition. The output of
+ the first submodule becomes the input of the second, the output of the third
+ becomes the input of the fourth and so on.
+
+
+Loading Data
+------------
+
+Now that we have defined the generator and discriminator model, we need some
+data we can train these models with. The C++ frontend, like the Python one,
+comes with a powerful parallel data loader. This data loader can read batches of
+data from a dataset (which you can define yourself) and provides many
+configuration knobs.
+
+.. note::
+
+ While the Python data loader uses multi-processing, the C++ data loader is truly
+ multi-threaded and does not launch any new processes.
+
+The data loader is part of the C++ frontend's ``data`` api, contained in the
+``torch::data::`` namespace. This API consists of a few different components:
+
+- The data loader class,
+- An API for defining datasets,
+- An API for defining *transforms*, which can be applied to datasets,
+- An API for defining *samplers*, which produce the indices with which datasets are indexed,
+- A library of existing datasets, transforms and samplers.
+
+For this tutorial, we can use the ``MNIST`` dataset that comes with the C++
+frontend. Let's instantiate a ``torch::data::datasets::MNIST`` for this, and
+apply two transformations: First, we normalize the images so that they are in
+the range of ``-1`` to ``+1`` (from an original range of ``0`` to ``1``).
+Second, we apply the ``Stack`` *collation*, which takes a batch of tensors and
+stacks them into a single tensor along the first dimension:
+
+.. code-block:: cpp
+
+ auto dataset = torch::data::datasets::MNIST("./mnist")
+ .map(torch::data::transforms::Normalize<>(0.5, 0.5))
+ .map(torch::data::transforms::Stack<>());
+
+Note that the MNIST dataset should be located in the ``./mnist`` directory
+relative to wherever you execute the training binary from. You can use `this
+script `_
+to download the MNIST dataset.
+
+Next, we create a data loader and pass it this dataset. To make a new data
+loader, we use ``torch::data::make_data_loader``, which returns a
+``std::unique_ptr`` of the correct type (which depends on the type of the
+dataset, the type of the sampler and some other implementation details):
+
+.. code-block:: cpp
+
+ auto data_loader = torch::data::make_data_loader(std::move(dataset));
+
+The data loader does come with a lot of options. You can inspect the full set
+`here
+`_.
+For example, to speed up the data loading, we can increase the number of
+workers. The default number is zero, which means the main thread will be used.
+If we set ``workers`` to ``2``, two threads will be spawned that load data
+concurrently. We should also increase the batch size from its default of ``1``
+to something more reasonable, like ``64`` (the value of ``kBatchSize``). So
+let's create a ``DataLoaderOptions`` object and set the appropriate properties:
+
+.. code-block:: cpp
+
+ auto data_loader = torch::data::make_data_loader(
+ std::move(dataset),
+ torch::data::DataLoaderOptions().batch_size(kBatchSize).workers(2));
+
+
+We can now write a loop to load batches of data, which we'll only print to the
+console for now:
+
+.. code-block:: cpp
+
+ for (torch::data::Example<>& batch : *data_loader) {
+ std::cout << "Batch size: " << batch.data.size(0) << " | Labels: ";
+ for (int64_t i = 0; i < batch.data.size(0); ++i) {
+ std::cout << batch.target[i].item() << " ";
+ }
+ std::cout << std::endl;
+ }
+
+The type returned by the data loader in this case is a ``torch::data::Example``.
+This type is a simple struct with a ``data`` field for the data and a ``target``
+field for the label. Because we applied the ``Stack`` collation earlier, the
+data loader returns only a single such example. If we had not applied the
+collation, the data loader would yield ``std::vector>``
+instead, with one element per example in the batch.
+
+If you rebuild and run this code, you should see something like this:
+
+.. code-block:: shell
+
+ root@fa350df05ecf:/home/build# make
+ Scanning dependencies of target dcgan
+ [ 50%] Building CXX object CMakeFiles/dcgan.dir/dcgan.cpp.o
+ [100%] Linking CXX executable dcgan
+ [100%] Built target dcgan
+ root@fa350df05ecf:/home/build# make
+ [100%] Built target dcgan
+ root@fa350df05ecf:/home/build# ./dcgan
+ Batch size: 64 | Labels: 5 2 6 7 2 1 6 7 0 1 6 2 3 6 9 1 8 4 0 6 5 3 3 0 4 6 6 6 4 0 8 6 0 6 9 2 4 0 2 8 6 3 3 2 9 2 0 1 4 2 3 4 8 2 9 9 3 5 8 0 0 7 9 9
+ Batch size: 64 | Labels: 2 2 4 7 1 2 8 8 6 9 0 2 2 9 3 6 1 3 8 0 4 4 8 8 8 9 2 6 4 7 1 5 0 9 7 5 4 3 5 4 1 2 8 0 7 1 9 6 1 6 5 3 4 4 1 2 3 2 3 5 0 1 6 2
+ Batch size: 64 | Labels: 4 5 4 2 1 4 8 3 8 3 6 1 5 4 3 6 2 2 5 1 3 1 5 0 8 2 1 5 3 2 4 4 5 9 7 2 8 9 2 0 6 7 4 3 8 3 5 8 8 3 0 5 8 0 8 7 8 5 5 6 1 7 8 0
+ Batch size: 64 | Labels: 3 3 7 1 4 1 6 1 0 3 6 4 0 2 5 4 0 4 2 8 1 9 6 5 1 6 3 2 8 9 2 3 8 7 4 5 9 6 0 8 3 0 0 6 4 8 2 5 4 1 8 3 7 8 0 0 8 9 6 7 2 1 4 7
+ Batch size: 64 | Labels: 3 0 5 5 9 8 3 9 8 9 5 9 5 0 4 1 2 7 7 2 0 0 5 4 8 7 7 6 1 0 7 9 3 0 6 3 2 6 2 7 6 3 3 4 0 5 8 8 9 1 9 2 1 9 4 4 9 2 4 6 2 9 4 0
+ Batch size: 64 | Labels: 9 6 7 5 3 5 9 0 8 6 6 7 8 2 1 9 8 8 1 1 8 2 0 7 1 4 1 6 7 5 1 7 7 4 0 3 2 9 0 6 6 3 4 4 8 1 2 8 6 9 2 0 3 1 2 8 5 6 4 8 5 8 6 2
+ Batch size: 64 | Labels: 9 3 0 3 6 5 1 8 6 0 1 9 9 1 6 1 7 7 4 4 4 7 8 8 6 7 8 2 6 0 4 6 8 2 5 3 9 8 4 0 9 9 3 7 0 5 8 2 4 5 6 2 8 2 5 3 7 1 9 1 8 2 2 7
+ Batch size: 64 | Labels: 9 1 9 2 7 2 6 0 8 6 8 7 7 4 8 6 1 1 6 8 5 7 9 1 3 2 0 5 1 7 3 1 6 1 0 8 6 0 8 1 0 5 4 9 3 8 5 8 4 8 0 1 2 6 2 4 2 7 7 3 7 4 5 3
+ Batch size: 64 | Labels: 8 8 3 1 8 6 4 2 9 5 8 0 2 8 6 6 7 0 9 8 3 8 7 1 6 6 2 7 7 4 5 5 2 1 7 9 5 4 9 1 0 3 1 9 3 9 8 8 5 3 7 5 3 6 8 9 4 2 0 1 2 5 4 7
+ Batch size: 64 | Labels: 9 2 7 0 8 4 4 2 7 5 0 0 6 2 0 5 9 5 9 8 8 9 3 5 7 5 4 7 3 0 5 7 6 5 7 1 6 2 8 7 6 3 2 6 5 6 1 2 7 7 0 0 5 9 0 0 9 1 7 8 3 2 9 4
+ Batch size: 64 | Labels: 7 6 5 7 7 5 2 2 4 9 9 4 8 7 4 8 9 4 5 7 1 2 6 9 8 5 1 2 3 6 7 8 1 1 3 9 8 7 9 5 0 8 5 1 8 7 2 6 5 1 2 0 9 7 4 0 9 0 4 6 0 0 8 6
+ ...
+
+Which means we are successfully able to load data from the MNIST dataset.
+
+Writing the Training Loop
+-------------------------
+
+Let's now finish the algorithmic part of our example and implement the delicate
+dance between the generator and discriminator. First, we'll create two
+optimizers, one for the generator and one for the discriminator. The optimizers
+we use implement the `Adam `_ algorithm:
+
+.. code-block:: cpp
+
+ torch::optim::Adam generator_optimizer(
+ generator->parameters(), torch::optim::AdamOptions(2e-4).betas(std::make_tuple(0.5, 0.5)));
+ torch::optim::Adam discriminator_optimizer(
+ discriminator->parameters(), torch::optim::AdamOptions(5e-4).betas(std::make_tuple(0.5, 0.5)));
+
+.. note::
+
+ As of this writing, the C++ frontend provides optimizers implementing Adagrad,
+ Adam, LBFGS, RMSprop and SGD. The `docs
+ `_ have the
+ up-to-date list.
+
+Next, we need to update our training loop. We'll add an outer loop to exhaust
+the data loader every epoch and then write the GAN training code:
+
+.. code-block:: cpp
+
+ for (int64_t epoch = 1; epoch <= kNumberOfEpochs; ++epoch) {
+ int64_t batch_index = 0;
+ for (torch::data::Example<>& batch : *data_loader) {
+ // Train discriminator with real images.
+ discriminator->zero_grad();
+ torch::Tensor real_images = batch.data;
+ torch::Tensor real_labels = torch::empty(batch.data.size(0)).uniform_(0.8, 1.0);
+ torch::Tensor real_output = discriminator->forward(real_images).reshape(real_labels.sizes());
+ torch::Tensor d_loss_real = torch::binary_cross_entropy(real_output, real_labels);
+ d_loss_real.backward();
+
+ // Train discriminator with fake images.
+ torch::Tensor noise = torch::randn({batch.data.size(0), kNoiseSize, 1, 1});
+ torch::Tensor fake_images = generator->forward(noise);
+ torch::Tensor fake_labels = torch::zeros(batch.data.size(0));
+ torch::Tensor fake_output = discriminator->forward(fake_images.detach()).reshape(fake_labels.sizes());
+ torch::Tensor d_loss_fake = torch::binary_cross_entropy(fake_output, fake_labels);
+ d_loss_fake.backward();
+
+ torch::Tensor d_loss = d_loss_real + d_loss_fake;
+ discriminator_optimizer.step();
+
+ // Train generator.
+ generator->zero_grad();
+ fake_labels.fill_(1);
+ fake_output = discriminator->forward(fake_images).reshape(fake_labels.sizes());
+ torch::Tensor g_loss = torch::binary_cross_entropy(fake_output, fake_labels);
+ g_loss.backward();
+ generator_optimizer.step();
+
+ std::printf(
+ "\r[%2ld/%2ld][%3ld/%3ld] D_loss: %.4f | G_loss: %.4f",
+ epoch,
+ kNumberOfEpochs,
+ ++batch_index,
+ batches_per_epoch,
+ d_loss.item(),
+ g_loss.item());
+ }
+ }
+
+Above, we first evaluate the discriminator on real images, for which it should
+assign a high probability. For this, we use
+``torch::empty(batch.data.size(0)).uniform_(0.8, 1.0)`` as the target
+probabilities.
+
+.. note::
+
+ We pick random values uniformly distributed between 0.8 and 1.0 instead of 1.0
+ everywhere in order to make the discriminator training more robust. This trick
+ is called *label smoothing*.
+
+Before evaluating the discriminator, we zero out the gradients of its
+parameters. After computing the loss, we back-propagate it through the network by
+calling ``d_loss.backward()`` to compute new gradients. We repeat this spiel for
+the fake images. Instead of using images from the dataset, we let the generator
+create fake images for this by feeding it a batch of random noise. We then
+forward those fake images to the discriminator. This time, we want the
+discriminator to emit low probabilities, ideally all zeros. Once we have
+computed the discriminator loss for both the batch of real and the batch of fake
+images, we can progress the discriminator's optimizer by one step in order to
+update its parameters.
+
+To train the generator, we again first zero its gradients, and then re-evaluate
+the discriminator on the fake images. However, this time we want the
+discriminator to assign probabilities very close to one, which would indicate
+that the generator can produce images that fool the discriminator into thinking
+they are actually real (from the dataset). For this, we fill the ``fake_labels``
+tensor with all ones. We finally step the generator's optimizer to also update
+its parameters.
+
+We should now be ready to train our model on the CPU. We don't have any code yet
+to capture state or sample outputs, but we'll add this in just a moment. For
+now, let's just observe that our model is doing *something* -- we'll later
+verify based on the generated images whether this something is meaningful.
+Re-building and running should print something like:
+
+.. code-block:: shell
+
+ root@3c0711f20896:/home/build# make && ./dcgan
+ Scanning dependencies of target dcgan
+ [ 50%] Building CXX object CMakeFiles/dcgan.dir/dcgan.cpp.o
+ [100%] Linking CXX executable dcgan
+ [100%] Built target dcga
+ [ 1/10][100/938] D_loss: 0.6876 | G_loss: 4.1304
+ [ 1/10][200/938] D_loss: 0.3776 | G_loss: 4.3101
+ [ 1/10][300/938] D_loss: 0.3652 | G_loss: 4.6626
+ [ 1/10][400/938] D_loss: 0.8057 | G_loss: 2.2795
+ [ 1/10][500/938] D_loss: 0.3531 | G_loss: 4.4452
+ [ 1/10][600/938] D_loss: 0.3501 | G_loss: 5.0811
+ [ 1/10][700/938] D_loss: 0.3581 | G_loss: 4.5623
+ [ 1/10][800/938] D_loss: 0.6423 | G_loss: 1.7385
+ [ 1/10][900/938] D_loss: 0.3592 | G_loss: 4.7333
+ [ 2/10][100/938] D_loss: 0.4660 | G_loss: 2.5242
+ [ 2/10][200/938] D_loss: 0.6364 | G_loss: 2.0886
+ [ 2/10][300/938] D_loss: 0.3717 | G_loss: 3.8103
+ [ 2/10][400/938] D_loss: 1.0201 | G_loss: 1.3544
+ [ 2/10][500/938] D_loss: 0.4522 | G_loss: 2.6545
+ ...
+
+Moving to the GPU
+-----------------
+
+While our current script can run just fine on the CPU, we all know convolutions
+are a lot faster on GPU. Let's quickly discuss how we can move our training onto
+the GPU. We'll need to do two things for this: pass a GPU device specification
+to tensors we allocate ourselves, and explicitly copy any other tensors onto the
+GPU via the ``to()`` method all tensors and modules in the C++ frontend have.
+The simplest way to achieve both is to create an instance of ``torch::Device``
+at the top level of our training script, and then pass that device to tensor
+factory functions like ``torch::zeros`` as well as the ``to()`` method. We can
+start by doing this with a CPU device:
+
+.. code-block:: cpp
+
+ // Place this somewhere at the top of your training script.
+ torch::Device device(torch::kCPU);
+
+New tensor allocations like
+
+.. code-block:: cpp
+
+ torch::Tensor fake_labels = torch::zeros(batch.data.size(0));
+
+should be updated to take the ``device`` as the last argument:
+
+.. code-block:: cpp
+
+ torch::Tensor fake_labels = torch::zeros(batch.data.size(0), device);
+
+For tensors whose creation is not in our hands, like those coming from the MNIST
+dataset, we must insert explicit ``to()`` calls. This means
+
+.. code-block:: cpp
+
+ torch::Tensor real_images = batch.data;
+
+becomes
+
+.. code-block:: cpp
+
+ torch::Tensor real_images = batch.data.to(device);
+
+and also our model parameters should be moved to the correct device:
+
+.. code-block:: cpp
+
+ generator->to(device);
+ discriminator->to(device);
+
+.. note::
+
+ If a tensor already lives on the device supplied to ``to()``, the call is a
+ no-op. No extra copy is made.
+
+At this point, we've just made our previous CPU-residing code more explicit.
+However, it is now also very easy to change the device to a CUDA device:
+
+.. code-block:: cpp
+
+ torch::Device device(torch::kCUDA)
+
+And now all tensors will live on the GPU, calling into fast CUDA kernels for all
+operations, without us having to change any downstream code. If we wanted to
+specify a particular device index, it could be passed as the second argument to
+the ``Device`` constructor. If we wanted different tensors to live on different
+devices, we could pass separate device instances (for example one on CUDA device
+0 and the other on CUDA device 1). We can even do this configuration
+dynamically, which is often useful to make our training scripts more portable:
+
+.. code-block:: cpp
+
+ torch::Device device = torch::kCPU;
+ if (torch::cuda::is_available()) {
+ std::cout << "CUDA is available! Training on GPU." << std::endl;
+ device = torch::kCUDA;
+ }
+
+or even
+
+.. code-block:: cpp
+
+ torch::Device device(torch::cuda::is_available() ? torch::kCUDA : torch::kCPU);
+
+Checkpointing and Recovering the Training State
+-----------------------------------------------
+
+The last augmentation we should make to our training script is to periodically
+save the state of our model parameters, the state of our optimizers as well as a
+few generated image samples. If our computer were to crash in the middle of the
+training procedure, the first two will allow us to restore the training state.
+For long-lasting training sessions, this is absolutely essential. Fortunately,
+the C++ frontend provides an API to serialize and deserialize both model and
+optimizer state, as well as individual tensors.
+
+The core API for this is ``torch::save(thing,filename)`` and
+``torch::load(thing,filename)``, where ``thing`` could be a
+``torch::nn::Module`` subclass or an optimizer instance like the ``Adam`` object
+we have in our training script. Let's update our training loop to checkpoint the
+model and optimizer state at a certain interval:
+
+.. code-block:: cpp
+
+ if (batch_index % kCheckpointEvery == 0) {
+ // Checkpoint the model and optimizer state.
+ torch::save(generator, "generator-checkpoint.pt");
+ torch::save(generator_optimizer, "generator-optimizer-checkpoint.pt");
+ torch::save(discriminator, "discriminator-checkpoint.pt");
+ torch::save(discriminator_optimizer, "discriminator-optimizer-checkpoint.pt");
+ // Sample the generator and save the images.
+ torch::Tensor samples = generator->forward(torch::randn({8, kNoiseSize, 1, 1}, device));
+ torch::save((samples + 1.0) / 2.0, torch::str("dcgan-sample-", checkpoint_counter, ".pt"));
+ std::cout << "\n-> checkpoint " << ++checkpoint_counter << '\n';
+ }
+
+where ``kCheckpointEvery`` is an integer set to something like ``100`` to
+checkpoint every ``100`` batches, and ``checkpoint_counter`` is a counter bumped
+every time we make a checkpoint.
+
+To restore the training state, you can add lines like these after all models and
+optimizers are created, but before the training loop:
+
+.. code-block:: cpp
+
+ torch::optim::Adam generator_optimizer(
+ generator->parameters(), torch::optim::AdamOptions(2e-4).beta1(0.5));
+ torch::optim::Adam discriminator_optimizer(
+ discriminator->parameters(), torch::optim::AdamOptions(2e-4).beta1(0.5));
+
+ if (kRestoreFromCheckpoint) {
+ torch::load(generator, "generator-checkpoint.pt");
+ torch::load(generator_optimizer, "generator-optimizer-checkpoint.pt");
+ torch::load(discriminator, "discriminator-checkpoint.pt");
+ torch::load(
+ discriminator_optimizer, "discriminator-optimizer-checkpoint.pt");
+ }
+
+ int64_t checkpoint_counter = 0;
+ for (int64_t epoch = 1; epoch <= kNumberOfEpochs; ++epoch) {
+ int64_t batch_index = 0;
+ for (torch::data::Example<>& batch : *data_loader) {
+
+
+Inspecting Generated Images
+---------------------------
+
+Our training script is now complete. We are ready to train our GAN, whether on
+CPU or GPU. To inspect the intermediary output of our training procedure, for
+which we added code to periodically save image samples to the
+``"dcgan-sample-xxx.pt"`` file, we can write a tiny Python script to load the
+tensors and display them with matplotlib:
+
+.. code-block:: python
+
+ import argparse
+
+ import matplotlib.pyplot as plt
+ import torch
+
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument("-i", "--sample-file", required=True)
+ parser.add_argument("-o", "--out-file", default="out.png")
+ parser.add_argument("-d", "--dimension", type=int, default=3)
+ options = parser.parse_args()
+
+ module = torch.jit.load(options.sample_file)
+ images = list(module.parameters())[0]
+
+ for index in range(options.dimension * options.dimension):
+ image = images[index].detach().cpu().reshape(28, 28).mul(255).to(torch.uint8)
+ array = image.numpy()
+ axis = plt.subplot(options.dimension, options.dimension, 1 + index)
+ plt.imshow(array, cmap="gray")
+ axis.get_xaxis().set_visible(False)
+ axis.get_yaxis().set_visible(False)
+
+ plt.savefig(options.out_file)
+ print("Saved ", options.out_file)
+
+Let's now train our model for around 30 epochs:
+
+.. code-block:: shell
+
+ root@3c0711f20896:/home/build# make && ./dcgan 10:17:57
+ Scanning dependencies of target dcgan
+ [ 50%] Building CXX object CMakeFiles/dcgan.dir/dcgan.cpp.o
+ [100%] Linking CXX executable dcgan
+ [100%] Built target dcgan
+ CUDA is available! Training on GPU.
+ [ 1/30][200/938] D_loss: 0.4953 | G_loss: 4.0195
+ -> checkpoint 1
+ [ 1/30][400/938] D_loss: 0.3610 | G_loss: 4.8148
+ -> checkpoint 2
+ [ 1/30][600/938] D_loss: 0.4072 | G_loss: 4.36760
+ -> checkpoint 3
+ [ 1/30][800/938] D_loss: 0.4444 | G_loss: 4.0250
+ -> checkpoint 4
+ [ 2/30][200/938] D_loss: 0.3761 | G_loss: 3.8790
+ -> checkpoint 5
+ [ 2/30][400/938] D_loss: 0.3977 | G_loss: 3.3315
+ ...
+ -> checkpoint 120
+ [30/30][938/938] D_loss: 0.3610 | G_loss: 3.8084
+
+And display the images in a plot:
+
+.. code-block:: shell
+
+ root@3c0711f20896:/home/build# python display.py -i dcgan-sample-100.pt
+ Saved out.png
+
+Which should look something like this:
+
+.. figure:: /_static/img/cpp-frontend/digits.png
+ :alt: digits
+
+Digits! Hooray! Now the ball is in your court: can you improve the model to make
+the digits look even better?
+
+Conclusion
+----------
+
+This tutorial has hopefully given you a digestible digest of the PyTorch C++
+frontend. A machine learning library like PyTorch by necessity has a very broad
+and extensive API. As such, there are many concepts we did not have time or
+space to discuss here. However, I encourage you to try out the API, and consult
+`our documentation `_ and in particular the
+`Library API `_ section when
+you get stuck. Also, remember that you can expect the C++ frontend to follow the
+design and semantics of the Python frontend whenever we could make this
+possible, so you can leverage this fact to increase your learning rate.
+
+.. tip::
+
+ You can find the full source code presented in this tutorial `in this
+ repository `_.
+
+As always, if you run into any problems or have questions, you can use our
+`forum `_ or `GitHub issues
+`_ to get in touch.
diff --git a/advanced_source/custom_class_pt2.rst b/advanced_source/custom_class_pt2.rst
new file mode 100644
index 00000000000..229a94f2ce9
--- /dev/null
+++ b/advanced_source/custom_class_pt2.rst
@@ -0,0 +1,275 @@
+Supporting Custom C++ Classes in torch.compile/torch.export
+===========================================================
+
+
+This tutorial is a follow-on to the
+:doc:`custom C++ classes ` tutorial, and
+introduces additional steps that are needed to support custom C++ classes in
+torch.compile/torch.export.
+
+.. warning::
+
+ This feature is in prototype status and is subject to backwards compatibility
+ breaking changes. This tutorial provides a snapshot as of PyTorch 2.8. If
+ you run into any issues, please reach out to us on Github!
+
+Concretely, there are a few steps:
+
+1. Implement an ``__obj_flatten__`` method to the C++ custom class
+ implementation to allow us to inspect its states and guard the changes. The
+ method should return a tuple of tuple of attribute_name, value
+ (``tuple[tuple[str, value] * n]``).
+
+2. Register a python fake class using ``@torch._library.register_fake_class``
+
+ a. Implement “fake methods” of each of the class’s c++ methods, which should
+ have the same schema as the C++ implementation.
+
+ b. Additionally, implement an ``__obj_unflatten__`` classmethod in the Python
+ fake class to tell us how to create a fake class from the flattened
+ states returned by ``__obj_flatten__``.
+
+Here is a breakdown of the diff. Following the guide in
+:doc:`Extending TorchScript with Custom C++ Classes `,
+we can create a thread-safe tensor queue and build it.
+
+.. code-block:: cpp
+
+ // Thread-safe Tensor Queue
+
+ #include
+ #include
+
+ #include
+ #include
+ #include
+
+ using namespace torch::jit;
+
+ // Thread-safe Tensor Queue
+ struct TensorQueue : torch::CustomClassHolder {
+ explicit TensorQueue(at::Tensor t) : init_tensor_(t) {}
+
+ explicit TensorQueue(c10::Dict dict) {
+ init_tensor_ = dict.at(std::string("init_tensor"));
+ const std::string key = "queue";
+ at::Tensor size_tensor;
+ size_tensor = dict.at(std::string(key + "/size")).cpu();
+ const auto* size_tensor_acc = size_tensor.const_data_ptr();
+ int64_t queue_size = size_tensor_acc[0];
+
+ for (const auto index : c10::irange(queue_size)) {
+ at::Tensor val;
+ queue_[index] = dict.at(key + "/" + std::to_string(index));
+ queue_.push_back(val);
+ }
+ }
+
+ // Push the element to the rear of queue.
+ // Lock is added for thread safe.
+ void push(at::Tensor x) {
+ std::lock_guard guard(mutex_);
+ queue_.push_back(x);
+ }
+ // Pop the front element of queue and return it.
+ // If empty, return init_tensor_.
+ // Lock is added for thread safe.
+ at::Tensor pop() {
+ std::lock_guard guard(mutex_);
+ if (!queue_.empty()) {
+ auto val = queue_.front();
+ queue_.pop_front();
+ return val;
+ } else {
+ return init_tensor_;
+ }
+ }
+
+ std::vector get_raw_queue() {
+ std::vector raw_queue(queue_.begin(), queue_.end());
+ return raw_queue;
+ }
+
+ private:
+ std::deque queue_;
+ std::mutex mutex_;
+ at::Tensor init_tensor_;
+ };
+
+ // The torch binding code
+ TORCH_LIBRARY(MyCustomClass, m) {
+ m.class_("TensorQueue")
+ .def(torch::init())
+ .def("push", &TensorQueue::push)
+ .def("pop", &TensorQueue::pop)
+ .def("get_raw_queue", &TensorQueue::get_raw_queue);
+ }
+
+**Step 1**: Add an ``__obj_flatten__`` method to the C++ custom class implementation:
+
+.. code-block:: cpp
+
+ // Thread-safe Tensor Queue
+ struct TensorQueue : torch::CustomClassHolder {
+ ...
+ std::tuple>, std::tuple> __obj_flatten__() {
+ return std::tuple(std::tuple("queue", this->get_raw_queue()), std::tuple("init_tensor_", this->init_tensor_.clone()));
+ }
+ ...
+ };
+
+ TORCH_LIBRARY(MyCustomClass, m) {
+ m.class_("TensorQueue")
+ .def(torch::init())
+ ...
+ .def("__obj_flatten__", &TensorQueue::__obj_flatten__);
+ }
+
+**Step 2a**: Register a fake class in Python that implements each method.
+
+.. code-block:: python
+
+ # namespace::class_name
+ @torch._library.register_fake_class("MyCustomClass::TensorQueue")
+ class FakeTensorQueue:
+ def __init__(
+ self,
+ queue: List[torch.Tensor],
+ init_tensor_: torch.Tensor
+ ) -> None:
+ self.queue = queue
+ self.init_tensor_ = init_tensor_
+
+ def push(self, tensor: torch.Tensor) -> None:
+ self.queue.append(tensor)
+
+ def pop(self) -> torch.Tensor:
+ if len(self.queue) > 0:
+ return self.queue.pop(0)
+ return self.init_tensor_
+
+**Step 2b**: Implement an ``__obj_unflatten__`` classmethod in Python.
+
+.. code-block:: python
+
+ # namespace::class_name
+ @torch._library.register_fake_class("MyCustomClass::TensorQueue")
+ class FakeTensorQueue:
+ ...
+ @classmethod
+ def __obj_unflatten__(cls, flattened_tq):
+ return cls(**dict(flattened_tq))
+
+
+That’s it! Now we can create a module that uses this object and run it with ``torch.compile`` or ``torch.export``.
+
+.. code-block:: python
+
+ import torch
+
+ torch.classes.load_library("build/libcustom_class.so")
+ tq = torch.classes.MyCustomClass.TensorQueue(torch.empty(0).fill_(-1))
+
+ class Mod(torch.nn.Module):
+ def forward(self, tq, x):
+ tq.push(x.sin())
+ tq.push(x.cos())
+ poped_t = tq.pop()
+ assert torch.allclose(poped_t, x.sin())
+ return tq, poped_t
+
+ tq, poped_t = torch.compile(Mod(), backend="eager", fullgraph=True)(tq, torch.randn(2, 3))
+ assert tq.size() == 1
+
+ exported_program = torch.export.export(Mod(), (tq, torch.randn(2, 3),), strict=False)
+ exported_program.module()(tq, torch.randn(2, 3))
+
+We can also implement custom ops that take custom classes as inputs. For
+example, we could register a custom op ``for_each_add_(tq, tensor)``
+
+.. code-block:: cpp
+
+ struct TensorQueue : torch::CustomClassHolder {
+ ...
+ void for_each_add_(at::Tensor inc) {
+ for (auto& t : queue_) {
+ t.add_(inc);
+ }
+ }
+ ...
+ }
+
+
+ TORCH_LIBRARY_FRAGMENT(MyCustomClass, m) {
+ m.class_("TensorQueue")
+ ...
+ .def("for_each_add_", &TensorQueue::for_each_add_);
+
+ m.def(
+ "for_each_add_(__torch__.torch.classes.MyCustomClass.TensorQueue foo, Tensor inc) -> ()");
+ }
+
+ void for_each_add_(c10::intrusive_ptr tq, at::Tensor inc) {
+ tq->for_each_add_(inc);
+ }
+
+ TORCH_LIBRARY_IMPL(MyCustomClass, CPU, m) {
+ m.impl("for_each_add_", for_each_add_);
+ }
+
+
+Since the fake class is implemented in python, we require the fake
+implementation of custom op must also be registered in python:
+
+.. code-block:: python
+
+ @torch.library.register_fake("MyCustomClass::for_each_add_")
+ def fake_for_each_add_(tq, inc):
+ tq.for_each_add_(inc)
+
+After re-compilation, we can export the custom op with:
+
+.. code-block:: python
+
+ class ForEachAdd(torch.nn.Module):
+ def forward(self, tq: torch.ScriptObject, a: torch.Tensor) -> torch.ScriptObject:
+ torch.ops.MyCustomClass.for_each_add_(tq, a)
+ return tq
+
+ mod = ForEachAdd()
+ tq = empty_tensor_queue()
+ qlen = 10
+ for i in range(qlen):
+ tq.push(torch.zeros(1))
+
+ ep = torch.export.export(mod, (tq, torch.ones(1)), strict=False)
+
+Why do we need to make a Fake Class?
+------------------------------------
+
+Tracing with real custom object has several major downsides:
+
+1. Operators on real objects can be time consuming e.g. the custom object
+ might be reading from the network or loading data from the disk.
+
+2. We don’t want to mutate the real custom object or create side-effects to the environment while tracing.
+
+3. It cannot support dynamic shapes.
+
+However, it may be difficult for users to write a fake class, e.g. if the
+original class uses some third-party library that determines the output shape of
+the methods, or is complicated and written by others. In such cases, users can
+disable the fakification requirement by defining a ``tracing_mode`` method to
+return ``"real"``:
+
+.. code-block:: cpp
+
+ std::string tracing_mode() {
+ return "real";
+ }
+
+
+A caveat of fakification is regarding **tensor aliasing.** We assume that no
+tensors within a torchbind object aliases a tensor outside of the torchbind
+object. Therefore, mutating one of these tensors will result in undefined
+behavior.
diff --git a/advanced_source/custom_classes.rst b/advanced_source/custom_classes.rst
new file mode 100644
index 00000000000..014bac2eebf
--- /dev/null
+++ b/advanced_source/custom_classes.rst
@@ -0,0 +1,231 @@
+Extending PyTorch with Custom C++ Classes
+===============================================
+
+
+This tutorial introduces an API for binding C++ classes into PyTorch.
+The API is very similar to
+`pybind11 `_, and most of the concepts will transfer
+over if you're familiar with that system.
+
+Implementing and Binding the Class in C++
+-----------------------------------------
+
+For this tutorial, we are going to define a simple C++ class that maintains persistent
+state in a member variable.
+
+.. literalinclude:: ../advanced_source/custom_classes/custom_class_project/class.cpp
+ :language: cpp
+ :start-after: BEGIN class
+ :end-before: END class
+
+There are several things to note:
+
+- ``torch/custom_class.h`` is the header you need to include to extend PyTorch
+ with your custom class.
+- Notice that whenever we are working with instances of the custom
+ class, we do it via instances of ``c10::intrusive_ptr<>``. Think of ``intrusive_ptr``
+ as a smart pointer like ``std::shared_ptr``, but the reference count is stored
+ directly in the object, as opposed to a separate metadata block (as is done in
+ ``std::shared_ptr``. ``torch::Tensor`` internally uses the same pointer type;
+ and custom classes have to also use this pointer type so that we can
+ consistently manage different object types.
+- The second thing to notice is that the user-defined class must inherit from
+ ``torch::CustomClassHolder``. This ensures that the custom class has space to
+ store the reference count.
+
+Now let's take a look at how we will make this class visible to PyTorch, a process called
+*binding* the class:
+
+.. literalinclude:: ../advanced_source/custom_classes/custom_class_project/class.cpp
+ :language: cpp
+ :start-after: BEGIN binding
+ :end-before: END binding
+ :append:
+ ;
+ }
+
+
+
+Building the Example as a C++ Project With CMake
+------------------------------------------------
+
+Now, we're going to build the above C++ code with the `CMake
+`_ build system. First, take all the C++ code
+we've covered so far and place it in a file called ``class.cpp``.
+Then, write a simple ``CMakeLists.txt`` file and place it in the
+same directory. Here is what ``CMakeLists.txt`` should look like:
+
+.. literalinclude:: ../advanced_source/custom_classes/custom_class_project/CMakeLists.txt
+ :language: cmake
+
+Also, create a ``build`` directory. Your file tree should look like this::
+
+ custom_class_project/
+ class.cpp
+ CMakeLists.txt
+ build/
+
+Go ahead and invoke cmake and then make to build the project:
+
+.. code-block:: shell
+
+ $ cd build
+ $ cmake -DCMAKE_PREFIX_PATH="$(python -c 'import torch.utils; print(torch.utils.cmake_prefix_path)')" ..
+ -- The C compiler identification is GNU 7.3.1
+ -- The CXX compiler identification is GNU 7.3.1
+ -- Check for working C compiler: /opt/rh/devtoolset-7/root/usr/bin/cc
+ -- Check for working C compiler: /opt/rh/devtoolset-7/root/usr/bin/cc -- works
+ -- Detecting C compiler ABI info
+ -- Detecting C compiler ABI info - done
+ -- Detecting C compile features
+ -- Detecting C compile features - done
+ -- Check for working CXX compiler: /opt/rh/devtoolset-7/root/usr/bin/c++
+ -- Check for working CXX compiler: /opt/rh/devtoolset-7/root/usr/bin/c++ -- works
+ -- Detecting CXX compiler ABI info
+ -- Detecting CXX compiler ABI info - done
+ -- Detecting CXX compile features
+ -- Detecting CXX compile features - done
+ -- Looking for pthread.h
+ -- Looking for pthread.h - found
+ -- Looking for pthread_create
+ -- Looking for pthread_create - not found
+ -- Looking for pthread_create in pthreads
+ -- Looking for pthread_create in pthreads - not found
+ -- Looking for pthread_create in pthread
+ -- Looking for pthread_create in pthread - found
+ -- Found Threads: TRUE
+ -- Found torch: /torchbind_tutorial/libtorch/lib/libtorch.so
+ -- Configuring done
+ -- Generating done
+ -- Build files have been written to: /torchbind_tutorial/build
+ $ make -j
+ Scanning dependencies of target custom_class
+ [ 50%] Building CXX object CMakeFiles/custom_class.dir/class.cpp.o
+ [100%] Linking CXX shared library libcustom_class.so
+ [100%] Built target custom_class
+
+What you'll find is there is now (among other things) a dynamic library
+file present in the build directory. On Linux, this is probably named
+``libcustom_class.so``. So the file tree should look like::
+
+ custom_class_project/
+ class.cpp
+ CMakeLists.txt
+ build/
+ libcustom_class.so
+
+Using the C++ Class from Python
+-----------------------------------------------
+
+Now that we have our class and its registration compiled into an ``.so`` file,
+we can load that `.so` into Python and try it out. Here's a script that
+demonstrates that:
+
+.. literalinclude:: ../advanced_source/custom_classes/custom_class_project/custom_test.py
+ :language: python
+
+
+Defining Serialization/Deserialization Methods for Custom C++ Classes
+---------------------------------------------------------------------
+
+If you try to save a ``ScriptModule`` with a custom-bound C++ class as
+an attribute, you'll get the following error:
+
+.. literalinclude:: ../advanced_source/custom_classes/custom_class_project/export_attr.py
+ :language: python
+
+.. code-block:: shell
+
+ $ python export_attr.py
+ RuntimeError: Cannot serialize custom bound C++ class __torch__.torch.classes.my_classes.MyStackClass. Please define serialization methods via def_pickle for this class. (pushIValueImpl at ../torch/csrc/jit/pickler.cpp:128)
+
+This is because PyTorch cannot automatically figure out what information
+save from your C++ class. You must specify that manually. The way to do that
+is to define ``__getstate__`` and ``__setstate__`` methods on the class using
+the special ``def_pickle`` method on ``class_``.
+
+.. note::
+ The semantics of ``__getstate__`` and ``__setstate__`` are
+ equivalent to that of the Python pickle module. You can
+ `read more `_
+ about how we use these methods.
+
+Here is an example of the ``def_pickle`` call we can add to the registration of
+``MyStackClass`` to include serialization methods:
+
+.. literalinclude:: ../advanced_source/custom_classes/custom_class_project/class.cpp
+ :language: cpp
+ :start-after: BEGIN def_pickle
+ :end-before: END def_pickle
+
+.. note::
+ We take a different approach from pybind11 in the pickle API. Whereas pybind11
+ as a special function ``pybind11::pickle()`` which you pass into ``class_::def()``,
+ we have a separate method ``def_pickle`` for this purpose. This is because the
+ name ``torch::jit::pickle`` was already taken, and we didn't want to cause confusion.
+
+Once we have defined the (de)serialization behavior in this way, our script can
+now run successfully:
+
+.. code-block:: shell
+
+ $ python ../export_attr.py
+ testing
+
+Defining Custom Operators that Take or Return Bound C++ Classes
+---------------------------------------------------------------
+
+Once you've defined a custom C++ class, you can also use that class
+as an argument or return from a custom operator (i.e. free functions). Suppose
+you have the following free function:
+
+.. literalinclude:: ../advanced_source/custom_classes/custom_class_project/class.cpp
+ :language: cpp
+ :start-after: BEGIN free_function
+ :end-before: END free_function
+
+You can register it running the following code inside your ``TORCH_LIBRARY``
+block:
+
+.. literalinclude:: ../advanced_source/custom_classes/custom_class_project/class.cpp
+ :language: cpp
+ :start-after: BEGIN def_free
+ :end-before: END def_free
+
+Once this is done, you can use the op like the following example:
+
+.. code-block:: python
+
+ class TryCustomOp(torch.nn.Module):
+ def __init__(self):
+ super(TryCustomOp, self).__init__()
+ self.f = torch.classes.my_classes.MyStackClass(["foo", "bar"])
+
+ def forward(self):
+ return torch.ops.my_classes.manipulate_instance(self.f)
+
+.. note::
+
+ Registration of an operator that takes a C++ class as an argument requires that
+ the custom class has already been registered. You can enforce this by
+ making sure the custom class registration and your free function definitions
+ are in the same ``TORCH_LIBRARY`` block, and that the custom class
+ registration comes first. In the future, we may relax this requirement,
+ so that these can be registered in any order.
+
+
+Conclusion
+----------
+
+This tutorial walked you through how to expose a C++ class to PyTorch, how to
+register its methods, how to use that class from Python, and how to save and
+load code using the class and run that code in a standalone C++ process. You
+are now ready to extend your PyTorch models with C++ classes that interface
+with third party C++ libraries or implement any other use case that requires
+the lines between Python and C++ to blend smoothly.
+
+As always, if you run into any problems or have questions, you can use our
+`forum `_ or `GitHub issues
+`_ to get in touch. Also, our
+`frequently asked questions (FAQ) page
+`_ may have helpful information.
diff --git a/advanced_source/custom_classes/CMakeLists.txt b/advanced_source/custom_classes/CMakeLists.txt
new file mode 100644
index 00000000000..6a1eb3e87fa
--- /dev/null
+++ b/advanced_source/custom_classes/CMakeLists.txt
@@ -0,0 +1,15 @@
+cmake_minimum_required(VERSION 3.1 FATAL_ERROR)
+project(infer)
+
+find_package(Torch REQUIRED)
+
+add_subdirectory(custom_class_project)
+
+# Define our library target
+add_executable(infer infer.cpp)
+set(CMAKE_CXX_STANDARD 14)
+# Link against LibTorch
+target_link_libraries(infer "${TORCH_LIBRARIES}")
+# This is where we link in our libcustom_class code, making our
+# custom class available in our binary.
+target_link_libraries(infer -Wl,--no-as-needed custom_class)
diff --git a/advanced_source/custom_classes/custom_class_project/CMakeLists.txt b/advanced_source/custom_classes/custom_class_project/CMakeLists.txt
new file mode 100644
index 00000000000..bb3d41aa997
--- /dev/null
+++ b/advanced_source/custom_classes/custom_class_project/CMakeLists.txt
@@ -0,0 +1,10 @@
+cmake_minimum_required(VERSION 3.1 FATAL_ERROR)
+project(custom_class)
+
+find_package(Torch REQUIRED)
+
+# Define our library target
+add_library(custom_class SHARED class.cpp)
+set(CMAKE_CXX_STANDARD 14)
+# Link against LibTorch
+target_link_libraries(custom_class "${TORCH_LIBRARIES}")
diff --git a/advanced_source/custom_classes/custom_class_project/class.cpp b/advanced_source/custom_classes/custom_class_project/class.cpp
new file mode 100644
index 00000000000..dc89a3ecb2e
--- /dev/null
+++ b/advanced_source/custom_classes/custom_class_project/class.cpp
@@ -0,0 +1,132 @@
+// BEGIN class
+// This header is all you need to do the C++ portions of this
+// tutorial
+#include
+// This header is what defines the custom class registration
+// behavior specifically. script.h already includes this, but
+// we include it here so you know it exists in case you want
+// to look at the API or implementation.
+#include
+
+#include
+#include
+
+template
+struct MyStackClass : torch::CustomClassHolder {
+ std::vector stack_;
+ MyStackClass(std::vector init) : stack_(init.begin(), init.end()) {}
+
+ void push(T x) {
+ stack_.push_back(x);
+ }
+ T pop() {
+ auto val = stack_.back();
+ stack_.pop_back();
+ return val;
+ }
+
+ c10::intrusive_ptr clone() const {
+ return c10::make_intrusive(stack_);
+ }
+
+ void merge(const c10::intrusive_ptr& c) {
+ for (auto& elem : c->stack_) {
+ push(elem);
+ }
+ }
+};
+// END class
+
+// BEGIN free_function
+c10::intrusive_ptr> manipulate_instance(const c10::intrusive_ptr>& instance) {
+ instance->pop();
+ return instance;
+}
+// END free_function
+
+// BEGIN binding
+// Notice a few things:
+// - We pass the class to be registered as a template parameter to
+// `torch::class_`. In this instance, we've passed the
+// specialization of the MyStackClass class ``MyStackClass``.
+// In general, you cannot register a non-specialized template
+// class. For non-templated classes, you can just pass the
+// class name directly as the template parameter.
+// - The arguments passed to the constructor make up the "qualified name"
+// of the class. In this case, the registered class will appear in
+// Python and C++ as `torch.classes.my_classes.MyStackClass`. We call
+// the first argument the "namespace" and the second argument the
+// actual class name.
+TORCH_LIBRARY(my_classes, m) {
+ m.class_>("MyStackClass")
+ // The following line registers the contructor of our MyStackClass
+ // class that takes a single `std::vector` argument,
+ // i.e. it exposes the C++ method `MyStackClass(std::vector init)`.
+ // Currently, we do not support registering overloaded
+ // constructors, so for now you can only `def()` one instance of
+ // `torch::init`.
+ .def(torch::init>())
+ // The next line registers a stateless (i.e. no captures) C++ lambda
+ // function as a method. Note that a lambda function must take a
+ // `c10::intrusive_ptr` (or some const/ref version of that)
+ // as the first argument. Other arguments can be whatever you want.
+ .def("top", [](const c10::intrusive_ptr>& self) {
+ return self->stack_.back();
+ })
+ // The following four lines expose methods of the MyStackClass
+ // class as-is. `torch::class_` will automatically examine the
+ // argument and return types of the passed-in method pointers and
+ // expose these to Python and TorchScript accordingly. Finally, notice
+ // that we must take the *address* of the fully-qualified method name,
+ // i.e. use the unary `&` operator, due to C++ typing rules.
+ .def("push", &MyStackClass::push)
+ .def("pop", &MyStackClass::pop)
+ .def("clone", &MyStackClass::clone)
+ .def("merge", &MyStackClass::merge)
+// END binding
+#ifndef NO_PICKLE
+// BEGIN def_pickle
+ // class_<>::def_pickle allows you to define the serialization
+ // and deserialization methods for your C++ class.
+ // Currently, we only support passing stateless lambda functions
+ // as arguments to def_pickle
+ .def_pickle(
+ // __getstate__
+ // This function defines what data structure should be produced
+ // when we serialize an instance of this class. The function
+ // must take a single `self` argument, which is an intrusive_ptr
+ // to the instance of the object. The function can return
+ // any type that is supported as a return value of the TorchScript
+ // custom operator API. In this instance, we've chosen to return
+ // a std::vector as the salient data to preserve
+ // from the class.
+ [](const c10::intrusive_ptr>& self)
+ -> std::vector {
+ return self->stack_;
+ },
+ // __setstate__
+ // This function defines how to create a new instance of the C++
+ // class when we are deserializing. The function must take a
+ // single argument of the same type as the return value of
+ // `__getstate__`. The function must return an intrusive_ptr
+ // to a new instance of the C++ class, initialized however
+ // you would like given the serialized state.
+ [](std::vector state)
+ -> c10::intrusive_ptr> {
+ // A convenient way to instantiate an object and get an
+ // intrusive_ptr to it is via `make_intrusive`. We use
+ // that here to allocate an instance of MyStackClass
+ // and call the single-argument std::vector
+ // constructor with the serialized state.
+ return c10::make_intrusive>(std::move(state));
+ });
+// END def_pickle
+#endif // NO_PICKLE
+
+// BEGIN def_free
+ m.def(
+ "manipulate_instance(__torch__.torch.classes.my_classes.MyStackClass x) -> __torch__.torch.classes.my_classes.MyStackClass Y",
+ manipulate_instance
+ );
+// END def_free
+}
diff --git a/advanced_source/custom_classes/custom_class_project/custom_test.py b/advanced_source/custom_classes/custom_class_project/custom_test.py
new file mode 100644
index 00000000000..1deda445310
--- /dev/null
+++ b/advanced_source/custom_classes/custom_class_project/custom_test.py
@@ -0,0 +1,53 @@
+import torch
+
+# `torch.classes.load_library()` allows you to pass the path to your .so file
+# to load it in and make the custom C++ classes available to both Python and
+# TorchScript
+torch.classes.load_library("build/libcustom_class.so")
+# You can query the loaded libraries like this:
+print(torch.classes.loaded_libraries)
+# prints {'/custom_class_project/build/libcustom_class.so'}
+
+# We can find and instantiate our custom C++ class in python by using the
+# `torch.classes` namespace:
+#
+# This instantiation will invoke the MyStackClass(std::vector init)
+# constructor we registered earlier
+s = torch.classes.my_classes.MyStackClass(["foo", "bar"])
+
+# We can call methods in Python
+s.push("pushed")
+assert s.pop() == "pushed"
+
+# Test custom operator
+s.push("pushed")
+torch.ops.my_classes.manipulate_instance(s) # acting as s.pop()
+assert s.top() == "bar"
+
+# Returning and passing instances of custom classes works as you'd expect
+s2 = s.clone()
+s.merge(s2)
+for expected in ["bar", "foo", "bar", "foo"]:
+ assert s.pop() == expected
+
+# We can also use the class in TorchScript
+# For now, we need to assign the class's type to a local in order to
+# annotate the type on the TorchScript function. This may change
+# in the future.
+MyStackClass = torch.classes.my_classes.MyStackClass
+
+
+@torch.jit.script
+def do_stacks(s: MyStackClass): # We can pass a custom class instance
+ # We can instantiate the class
+ s2 = torch.classes.my_classes.MyStackClass(["hi", "mom"])
+ s2.merge(s) # We can call a method on the class
+ # We can also return instances of the class
+ # from TorchScript function/methods
+ return s2.clone(), s2.top()
+
+
+stack, top = do_stacks(torch.classes.my_classes.MyStackClass(["wow"]))
+assert top == "wow"
+for expected in ["wow", "mom", "hi"]:
+ assert stack.pop() == expected
diff --git a/advanced_source/custom_classes/custom_class_project/export_attr.py b/advanced_source/custom_classes/custom_class_project/export_attr.py
new file mode 100644
index 00000000000..9999d5c8183
--- /dev/null
+++ b/advanced_source/custom_classes/custom_class_project/export_attr.py
@@ -0,0 +1,21 @@
+# export_attr.py
+import torch
+
+torch.classes.load_library('build/libcustom_class.so')
+
+
+class Foo(torch.nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.stack = torch.classes.my_classes.MyStackClass(["just", "testing"])
+
+ def forward(self, s: str) -> str:
+ return self.stack.pop() + s
+
+
+scripted_foo = torch.jit.script(Foo())
+
+scripted_foo.save('foo.pt')
+loaded = torch.jit.load('foo.pt')
+
+print(loaded.stack.pop())
diff --git a/advanced_source/custom_classes/custom_class_project/save.py b/advanced_source/custom_classes/custom_class_project/save.py
new file mode 100644
index 00000000000..8826f95da7c
--- /dev/null
+++ b/advanced_source/custom_classes/custom_class_project/save.py
@@ -0,0 +1,18 @@
+import torch
+
+torch.classes.load_library('build/libcustom_class.so')
+
+
+class Foo(torch.nn.Module):
+ def __init__(self):
+ super().__init__()
+
+ def forward(self, s: str) -> str:
+ stack = torch.classes.my_classes.MyStackClass(["hi", "mom"])
+ return stack.pop() + s
+
+
+scripted_foo = torch.jit.script(Foo())
+print(scripted_foo.graph)
+
+scripted_foo.save('foo.pt')
diff --git a/advanced_source/custom_classes/infer.cpp b/advanced_source/custom_classes/infer.cpp
new file mode 100644
index 00000000000..1ca5b002383
--- /dev/null
+++ b/advanced_source/custom_classes/infer.cpp
@@ -0,0 +1,20 @@
+#include
+
+#include
+#include
+
+int main(int argc, const char* argv[]) {
+ torch::jit::Module module;
+ try {
+ // Deserialize the ScriptModule from a file using torch::jit::load().
+ module = torch::jit::load("foo.pt");
+ }
+ catch (const c10::Error& e) {
+ std::cerr << "error loading the model\n";
+ return -1;
+ }
+
+ std::vector inputs = {"foobarbaz"};
+ auto output = module.forward(inputs).toString();
+ std::cout << output->string() << std::endl;
+}
diff --git a/advanced_source/custom_classes/run.sh b/advanced_source/custom_classes/run.sh
new file mode 100755
index 00000000000..52c59581309
--- /dev/null
+++ b/advanced_source/custom_classes/run.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+set -ex
+
+rm -rf build
+rm -rf custom_class_project/build
+
+pushd custom_class_project
+ mkdir build
+ (cd build && cmake CXXFLAGS="-DNO_PICKLE" -DCMAKE_PREFIX_PATH="$(python -c 'import torch.utils; print(torch.utils.cmake_prefix_path)')" ..)
+ (cd build && make)
+ python custom_test.py
+ python save.py
+ ! python export_attr.py
+popd
+
+mkdir build
+(cd build && cmake -DCMAKE_PREFIX_PATH="$(python -c 'import torch.utils; print(torch.utils.cmake_prefix_path)')" ..)
+(cd build && make)
+mv custom_class_project/foo.pt build/foo.pt
+(cd build && ./infer)
diff --git a/advanced_source/custom_classes/run2.sh b/advanced_source/custom_classes/run2.sh
new file mode 100755
index 00000000000..d4ef0101a83
--- /dev/null
+++ b/advanced_source/custom_classes/run2.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+set -ex
+
+rm -rf build
+rm -rf custom_class_project/build
+
+pushd custom_class_project
+ mkdir build
+ (cd build && cmake -DCMAKE_PREFIX_PATH="$(python -c 'import torch.utils; print(torch.utils.cmake_prefix_path)')" ..)
+ (cd build && make)
+ python export_attr.py
+popd
diff --git a/advanced_source/custom_ops_landing_page.rst b/advanced_source/custom_ops_landing_page.rst
new file mode 100644
index 00000000000..f05eee43060
--- /dev/null
+++ b/advanced_source/custom_ops_landing_page.rst
@@ -0,0 +1,66 @@
+.. _custom-ops-landing-page:
+
+PyTorch Custom Operators
+===========================
+
+PyTorch offers a large library of operators that work on Tensors (e.g. ``torch.add``,
+``torch.sum``, etc). However, you may wish to bring a new custom operation to PyTorch
+and get it to work with subsystems like ``torch.compile``, autograd, and ``torch.vmap``.
+In order to do so, you must register the custom operation with PyTorch via the Python
+`torch.library docs `_ or C++ ``TORCH_LIBRARY``
+APIs.
+
+
+
+Authoring a custom operator from Python
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Please see :ref:`python-custom-ops-tutorial`.
+
+You may wish to author a custom operator from Python (as opposed to C++) if:
+
+- you have a Python function you want PyTorch to treat as an opaque callable, especially with
+ respect to ``torch.compile`` and ``torch.export``.
+- you have some Python bindings to C++/CUDA kernels and want those to compose with PyTorch
+ subsystems (like ``torch.compile`` or ``torch.autograd``)
+- you are using Python (and not a C++-only environment like AOTInductor).
+
+Integrating custom C++ and/or CUDA code with PyTorch
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Please see :ref:`cpp-custom-ops-tutorial`.
+
+.. note::
+
+ ``SYCL`` serves as the backend programming language for Intel GPUs. Integrate custom Sycl code refer to :ref:`cpp-custom-ops-tutorial-sycl`.
+
+You may wish to author a custom operator from C++ (as opposed to Python) if:
+
+- you have custom C++ and/or CUDA code.
+- you plan to use this code with ``AOTInductor`` to do Python-less inference.
+
+The Custom Operators Manual
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+For information not covered in the tutorials and this page, please see
+`The Custom Operators Manual `_
+(we're working on moving the information to our docs site). We recommend that you
+first read one of the tutorials above and then use the Custom Operators Manual as a reference;
+it is not meant to be read head to toe.
+
+When should I create a Custom Operator?
+---------------------------------------
+If your operation is expressible as a composition of built-in PyTorch operators
+then please write it as a Python function and call it instead of creating a
+custom operator. Use the operator registration APIs to create a custom operator if you
+are calling into some library that PyTorch doesn't understand (e.g. custom C/C++ code,
+a custom CUDA kernel, or Python bindings to C/C++/CUDA extensions).
+
+Why should I create a Custom Operator?
+--------------------------------------
+
+It is possible to use a C/C++/CUDA kernel by grabbing a Tensor's data pointer
+and passing it to a pybind'ed kernel. However, this approach doesn't compose with
+PyTorch subsystems like autograd, torch.compile, vmap, and more. In order
+for an operation to compose with PyTorch subsystems, it must be registered
+via the operator registration APIs.
diff --git a/advanced_source/ddp_pipeline.rst b/advanced_source/ddp_pipeline.rst
new file mode 100644
index 00000000000..bf9e4d28f33
--- /dev/null
+++ b/advanced_source/ddp_pipeline.rst
@@ -0,0 +1,10 @@
+Training Transformer models using Distributed Data Parallel and Pipeline Parallelism
+====================================================================================
+
+This tutorial has been deprecated.
+
+Redirecting to the latest parallelism APIs in 3 seconds...
+
+.. raw:: html
+
+
diff --git a/advanced_source/dispatcher.rst b/advanced_source/dispatcher.rst
new file mode 100644
index 00000000000..4b03803c15b
--- /dev/null
+++ b/advanced_source/dispatcher.rst
@@ -0,0 +1,410 @@
+Registering a Dispatched Operator in C++
+========================================
+
+.. warning::
+
+ This tutorial is deprecated as of PyTorch 2.4. Please see :ref:`custom-ops-landing-page`
+ for the newest up-to-date guides on extending PyTorch with Custom Operators.
+
+The dispatcher is an internal component of PyTorch which is responsible for
+figuring out what code should actually get run when you call a function like
+``torch::add``. This can be nontrivial, because PyTorch operations need
+to handle a lot of cross-cutting concerns that are "layered" on top of one
+of another. Here is a sampling of some of the things it handles:
+
+* Switching between the CPU and CUDA implementations of an operator, depending
+ on the devices of the input tensors.
+* Switching between the autograd and backend implementations of an operator,
+ depending on whether or not autograd handling is necessary.
+* Applying autocasting when necessary for automatic mixed precision.
+* Applying batching rules when an operator is run under a ``vmap`` call.
+* Tracing execution of operations, if you are tracing a model for export.
+
+If in your `custom operator code `_ you find yourself
+manually writing if statements to handle these cases, the dispatcher APIs can
+help organize your code. (Conversely, if your custom operator is very simple
+and is only for CPU inference, you probably don't need to use the dispatcher,
+just use the basic API.)
+
+In this tutorial, we will describe how to structure a custom operator
+registration to use the dispatcher to organize various components. We'll
+assume that you are familiar with how to
+`register an operator `_ and how to write
+a `custom autograd function `_.
+
+Defining schema and backend implementations
+-------------------------------------------
+
+The general principle behind the dispatcher is that it divides the
+implementation of an operator into multiple kernels, each of which implements
+functionality for a specific *dispatch key*, e.g. CPU, CUDA. The dispatcher
+determines what the highest priority dispatch key is at the time
+you call an operator (this is done by looking at both the tensor arguments as
+well as some thread local state), and transfers control to the kernel for that
+dispatch key. The end effect is that when you call an operator, we first
+execute the Autograd kernel, and then we redispatch to the backend kernel
+depending on the device types of the passed in tensors.
+
+Let's take a look at the various parts involved in making this
+happen. First, we must define the schema for the operator in question.
+Unlike simple pybind11-style operator registration, we don't actually
+provide an implementation of our operator at this point; we just
+provide a schema string specifying the type signature of the operator
+that all of our other kernels will abide by:
+
+.. literalinclude:: ../advanced_source/dispatcher/op.cpp
+ :language: cpp
+ :start-after: BEGIN TORCH_LIBRARY
+ :end-before: END TORCH_LIBRARY
+
+Next, we need to actually provide some implementations of this operator.
+For concreteness, here is a really simple implementation of addition on CPU:
+
+.. literalinclude:: ../advanced_source/dispatcher/op.cpp
+ :language: cpp
+ :start-after: BEGIN myadd_cpu
+ :end-before: END myadd_cpu
+
+We'd like to register this function as an implementation of ``myops::myadd``.
+However, the simple way of registering it (``def("myadd", myadd_cpu)``) would
+register the kernel to run in all cases, even if the tensor is not a CPU
+tensor! (Internally, we refer to these as "catch-all" kernels, since they
+catch all cases.) To ensure that ``myadd_cpu`` is only run for
+CPU tensors, we can use the ``TORCH_LIBRARY_IMPL`` macro:
+
+.. literalinclude:: ../advanced_source/dispatcher/op.cpp
+ :language: cpp
+ :start-after: BEGIN TORCH_LIBRARY_IMPL CPU
+ :end-before: END TORCH_LIBRARY_IMPL CPU
+
+The ``TORCH_LIBRARY_IMPL`` lets us register implementations for operators on
+a specific dispatch key (in this case, CPU). Each call to ``impl``
+associates a CPU kernel with the corresponding operator (which we previously
+defined in the ``TORCH_LIBRARY`` block). If we also have a CUDA implementation ``myadd_cuda``,
+we can register it in a separate ``TORCH_LIBRARY_IMPL`` block:
+
+.. literalinclude:: ../advanced_source/dispatcher/op.cpp
+ :language: cpp
+ :start-after: BEGIN TORCH_LIBRARY_IMPL CUDA
+ :end-before: END TORCH_LIBRARY_IMPL CUDA
+
+These registrations can be split across files or even across library boundaries; so
+for example, you could have these two ``TORCH_LIBRARY_IMPL`` blocks compiled
+into a separate ``myops_cpu`` and ``myops_cuda`` dynamic libraries. Generally,
+speaking, the structure of your registrations will look like this:
+
+1. A single ``TORCH_LIBRARY`` that lists every custom operator in your namespace
+ in a centralized place.
+2. A ``TORCH_LIBRARY_IMPL`` per dispatch key that registers implementations for
+ that key (e.g., CPU or CUDA). If you like, you can further subdivide
+ ``TORCH_LIBRARY_IMPL`` blocks into a block per operator. This is convenient
+ if you have a separate file per operator implementation, but don't want to
+ expose the operators in a header; you can just put the registration in the
+ cpp file that defines your operator.
+
+.. note::
+
+ Did you know that you can also write ``TORCH_LIBRARY_IMPL`` blocks for existing
+ core operators in PyTorch? This is how XLA support for PyTorch is
+ implemented: the ``torch_xla`` library contains a ``TORCH_LIBRARY_IMPL``
+ that provides implementations for all basic operators on the XLA dispatch
+ key.
+
+
+For operators that do not need autograd
+---------------------------------------
+
+Note: This section only applies to versions of PyTorch ``>= 1.10``.
+
+In the next section, we will discuss how to add autograd support to an operator.
+But for the ops that do not need autograd support, the following kernel should be
+registered improve useability and make your op behave like PyTorch's built-in
+operators.
+
+.. code-block:: cpp
+
+ TORCH_LIBRARY_IMPL(myops, Autograd, m) {
+ m.impl(op, autogradNotImplementedFallback());
+ }
+
+The above lines registers an ``Autograd`` kernel that appends a dummy
+``NotImplemented`` node on forward (preserving the ``require_grad``-ness of the inputs).
+On backward, the ``NotImplemented`` node raises an error. This can be helpful
+for debugging in larger models where previously it can be hard to pin-point
+exactly where the ``requires_grad``-ness is lost during the forward pass.
+
+In-place or view ops
+^^^^^^^^^^^^^^^^^^^^
+
+To ensure correctness and best possible performance, if your op mutates an input
+in-place or returns a tensor that aliases with one of the inputs, two additional
+steps should be taken:
+
+1. Register an ``ADInplaceOrView`` kernel in addition to the ``Autograd`` kernel
+ above. This kernel handles the necessary bookkeeping to ensure the correctness
+ of in-place or view operations. It is important to note that this ADInplaceOrView
+ kernel should only be used with ``autogradNotImplementedFallback``.
+
+.. code-block:: cpp
+
+ TORCH_LIBRARY_IMPL(myops, Autograd, m) {
+ m.impl(op, autogradNotImplementedFallback());
+ }
+ TORCH_LIBRARY_IMPL(myops, ADInplaceOrView, m) {
+ m.impl(op, autogradNotImplementedInplaceOrViewFallback());
+ }
+
+2. The ``Autograd`` or ``ADInplaceOrView`` boxed kernels registered above
+ rely on operator schema information in their logi. If your op mutates an input
+ in-place or returns a tensor that aliases with one of the inputs it is important to
+ ensure that your schema properly reflects this. See
+ `here `_
+ for more information on how to annotate the schema.
+
+.. _autograd-support:
+
+Adding autograd support
+-----------------------
+
+At this point, we have an operator with both CPU and CUDA implementations. How
+can we add autograd support to it? As you might guess, we will register an
+autograd kernel (similar to what's described in the `custom autograd function `_ tutorial)!
+However, there is a twist: unlike the CPU and CUDA kernels, the autograd kernel
+needs to *redispatch*: it needs to call back into the dispatcher to get to
+the inference kernels, e.g. CPU or CUDA implementations.
+
+Thus, before we write the autograd kernel, let's write a *dispatching function*
+which calls into the dispatcher to find the right kernel for your operator.
+This function constitutes the public C++ API for your operators--in fact, all of
+the tensor functions in PyTorch's C++ API all call the dispatcher in the same
+way under the hood. Here's what the dispatching function looks like:
+
+.. literalinclude:: ../advanced_source/dispatcher/op.cpp
+ :language: cpp
+ :start-after: BEGIN myadd
+ :end-before: END myadd
+
+Let's break it down:
+
+* In the first line, we look up a typed operator handle from the dispatcher
+ corresponding to the operator that we are going to dispatch to.
+ ``findSchemaOrThrow`` takes two arguments: the (namespace qualified) name
+ of the operator, and the overload name of the operator (typically just
+ the empty string). ``typed`` casts the dynamically typed handle into
+ a statically typed handle (doing a runtime test to make sure you've given
+ the correct C++ type), so that we can do a normal C++ call on it. We
+ pass it ``decltype(myadd)`` since the type of the dispatching function is
+ the same as the type of the underlying kernels registered to the dispatcher.
+
+ For performance, this computation is done in a static variable, so that
+ we only need to do the (slow) lookup once. If you typoed the name of the
+ operator you want to call, this lookup will error the first time you call this
+ function.
+
+* In the second line, we simply ``call`` the operator handle with all of the
+ arguments passed into the dispatching function. This will actually invoke
+ the dispatcher and in the end control will be transferred to whatever kernel
+ is appropriate for this call.
+
+With the dispatch function in hand, we can now write the autograd kernel:
+
+.. literalinclude:: ../advanced_source/dispatcher/op.cpp
+ :language: cpp
+ :start-after: BEGIN myadd_autograd
+ :end-before: END myadd_autograd
+
+The autograd function is written as normal using ``torch::autograd::Function``,
+except that instead of directly writing the implementation in ``forward()``,
+we:
+
+1. Turn off autograd handling with the ``at::AutoNonVariableTypeMode`` RAII
+ guard, and then
+2. Call the dispatch function ``myadd`` to call back into the dispatcher.
+
+Without (1), your calls will infinite loop (and stack overflow), because
+``myadd`` will send you back to this function (as the highest priority dispatch
+key would still be autograd.) With (1),
+autograd is excluded from the set of dispatch keys under consideration, and
+we will go to the next handlers, which will either be CPU and CUDA.
+
+We can now register this function in the same way we registered the CPU/CUDA
+functions:
+
+.. literalinclude:: ../advanced_source/dispatcher/op.cpp
+ :language: cpp
+ :start-after: BEGIN TORCH_LIBRARY_IMPL Autograd
+ :end-before: END TORCH_LIBRARY_IMPL Autograd
+
+
+.. note::
+
+ In this example we register the kernel to ``Autograd``, which installs it as the
+ autograd kernel for all backends. You can also register optimized kernels for specific
+ backends by using the corresponding backend-specific dispatch key - for example,
+ ``AutogradCPU`` or ``AutogradCUDA``. To explore these and other dispatch key
+ options in more detail, check out the ``PythonDispatcher`` tool provided in
+ `torch/_python_dispatcher.py `_.
+
+
+Going beyond autograd
+---------------------
+
+In some sense, the dispatcher isn't doing all that much: all it does is
+implement a glorified if-statement, along the lines of this:
+
+.. code-block:: cpp
+
+ class MyAddFunction : ... {
+ public:
+ static Tensor forward(
+ AutogradContext *ctx, torch::Tensor self, torch::Tensor other) {
+
+ if (self.device().type() == DeviceType::CPU) {
+ return add_cpu(self, other);
+ } else if (self.device().type() == DeviceType::CUDA) {
+ return add_cuda(self, other);
+ } else {
+ TORCH_CHECK(0, "Unsupported device ", self.device().type());
+ }
+ }
+ ...
+ }
+
+So why use the dispatcher? There are a few reasons:
+
+1. It is decentralized. You can assemble all of the pieces of an operator
+ (CPU, CUDA, Autograd) without having to write a single, centralized
+ if statement that refers to all of them. Importantly, third parties can
+ register extra implementations for other aspects without having to patch the
+ original definition of an operator. We'll talk more about extending the
+ dispatcher in `extending dispatcher for a new backend `_.
+
+2. It supports more dispatch keys than CPU, CUDA and Autograd. You can
+ see a full list of dispatch keys that are currently implemented
+ in PyTorch in ``c10/core/DispatchKey.h``. These dispatch keys
+ implement a variety of optional functionality for operators, and if you
+ decide you want your custom operator to support this functionality,
+ all you have to register a kernel for the appropriate key.
+
+3. The dispatcher implements support for boxed fallback functions, which
+ are functions that can be implemented once and apply to all operators
+ in the system. Boxed fallbacks can be used to provide default behavior
+ for a dispatch key; if you use the dispatcher to implement your operator,
+ you also opt into the fallbacks for all of these operations.
+
+Here are some particular dispatch keys which you may need to define an operator
+for.
+
+Autocast
+^^^^^^^^
+
+The Autocast dispatch key implements support for
+`automatic mixed precision (AMP) `_.
+An autocast wrapper kernel typically casts incoming ``float16`` or ``float32`` CUDA tensors
+to some preferred precision before running the op.
+For example, matmuls and convolutions on floating-point CUDA tensors usually run faster
+and use less memory in ``float16`` without impairing convergence.
+Autocast wrappers only have an effect in
+`autocast-enabled contexts `_.
+
+Here's an autocast wrapper for a hypothetical custom matmul, along with its registration:
+
+.. code-block:: cpp
+
+ // Autocast-specific helper functions
+ #include
+
+ Tensor mymatmul_autocast(const Tensor& self, const Tensor& other) {
+ c10::impl::ExcludeDispatchKeyGuard no_autocast(c10::DispatchKey::Autocast);
+ return mymatmul(at::autocast::cached_cast(at::kHalf, self),
+ at::autocast::cached_cast(at::kHalf, other));
+ }
+
+ TORCH_LIBRARY_IMPL(myops, Autocast, m) {
+ m.impl("mymatmul", mymatmul_autocast);
+ }
+
+``cached_cast(kHalf, tensor)`` casts ``tensor`` to ``float16`` if ``tensor`` is CUDA and ``float32``,
+otherwise, it leaves ``tensor`` unchanged (c.f. the
+`eligibility policy `_ for natively autocasted ops).
+This ensures if the network calls ``mymatmul`` on any mixture of ``float16`` and ``float32`` CUDA tensors,
+``mymatmul`` runs in ``float16``. Meanwhile, calls to ``mymatmul`` with non-CUDA, integer-type, or ``float64``
+inputs are unaffected. Using ``cached_cast`` to follow the native eligibility policy in your own autocast wrapper
+is recommended, but not required. For example, if you wanted to force ``float16`` execution for all input types,
+you could ``return mymatmul(self.half(), other.half());`` instead of using ``cached_cast``.
+
+Notice that, like our autograd kernels, we exclude the ``Autocast`` key from
+dispatch before redispatching.
+
+By default, if no autocast wrapper is provided,
+we fallthrough directly to the regular operator implementation (no
+autocasting occurs). (We didn't use ``myadd`` for this example, since pointwise
+addition doesn't need autocasting and should just fall through.)
+
+When should an autocast wrapper be registered? Unfortunately, there aren't
+cut-and-dried rules for an op's preferred precision. You can
+get a sense for some native ops' preferred precisions by looking at the
+`cast lists `_.
+General guidance:
+
+* Ops that do reductions should probably execute in ``float32``,
+* Any op that does a convolution or gemm under the hood should
+ probably execute in ``float16``, and
+* Other ops with multiple floating-point tensor inputs should standardize
+ them to a common precision (unless the implementation supports inputs with different precisions).
+
+If your custom op falls into the third category, the ``promote_type`` template
+helps figure out the widest floating-point type present among input tensors, which is
+the safest choice for the execution type:
+
+.. code-block:: cpp
+
+ #include
+
+ Tensor my_multiple_input_op_autocast(const Tensor& t0, const Tensor& t1) {
+ c10::impl::ExcludeDispatchKeyGuard no_autocast(c10::DispatchKey::Autocast);
+ // The required at::kHalf argument is an optimistic initial guess.
+ auto exec_type = at::autocast::promote_type(at::kHalf, t0, t1);
+ return my_multiple_input_op(at::autocast::cached_cast(exec_type, t0),
+ at::autocast::cached_cast(exec_type, t1));
+ }
+
+If your custom op is :ref:`autograd-enabled`, you only need to write and register
+an autocast wrapper for the same name onto which the autograd wrapper is registered.
+For example, if you wanted an autocast wrapper for the ``myadd`` function shown
+in the autograd section, all you'd need is
+
+.. code-block:: cpp
+
+ Tensor myadd_autocast(const Tensor& self, const Tensor& other) {
+ c10::impl::ExcludeDispatchKeyGuard no_autocast(c10::DispatchKey::Autocast);
+ return myadd(at::autocast::cached_cast(, self),
+ at::autocast::cached_cast(, other));
+ }
+
+ TORCH_LIBRARY_IMPL(myops, Autocast, m) {
+ m.impl("myadd", myadd_autocast);
+ }
+
+There are no separate gymnastics to make the backward method autocast compatible.
+However, the backward method defined in your custom autograd function will run in the same
+dtype as autocast sets for the forward method, so you should choose a ````
+suitable for both your forward and backward methods.
+
+Batched
+^^^^^^^
+
+Batched tensors allow you to write your code in a per-example manner, and then
+have them be automatically batched when run under a ``vmap`` invocation. The
+API for writing batching rules is currently under development, but once it is
+stabilized, you can add support for ``vmap`` for your operators by registering
+a kernel at the Batched dispatch key.
+
+Tracer
+^^^^^^
+
+The Tracer dispatch key implements support for recording invocations of operators
+into a trace when you run ``torch.jit.trace``. We intend to provide a
+boxed fallback that will implement tracing for arbitrary operations,
+see `issue #41478 `_ to track
+progress.
diff --git a/advanced_source/dispatcher/CMakeLists.txt b/advanced_source/dispatcher/CMakeLists.txt
new file mode 100644
index 00000000000..0ef448a9644
--- /dev/null
+++ b/advanced_source/dispatcher/CMakeLists.txt
@@ -0,0 +1,8 @@
+cmake_minimum_required(VERSION 3.1 FATAL_ERROR)
+project(dispatcher)
+
+find_package(Torch REQUIRED)
+
+add_library(dispatcher SHARED op.cpp)
+target_compile_features(dispatcher PRIVATE cxx_std_14)
+target_link_libraries(dispatcher "${TORCH_LIBRARIES}")
diff --git a/advanced_source/dispatcher/op.cpp b/advanced_source/dispatcher/op.cpp
new file mode 100644
index 00000000000..c3a90aed448
--- /dev/null
+++ b/advanced_source/dispatcher/op.cpp
@@ -0,0 +1,105 @@
+#include
+#include
+
+#include
+
+using torch::Tensor;
+using torch::DeviceType;
+using torch::autograd::tensor_list;
+using torch::autograd::AutogradContext;
+
+// BEGIN myadd
+Tensor myadd(const Tensor& self, const Tensor& other) {
+ static auto op = torch::Dispatcher::singleton()
+ .findSchemaOrThrow("myops::myadd", "")
+ .typed();
+ return op.call(self, other);
+}
+// END myadd
+
+// BEGIN TORCH_LIBRARY
+TORCH_LIBRARY(myops, m) {
+ m.def("myadd(Tensor self, Tensor other) -> Tensor");
+}
+// END TORCH_LIBRARY
+
+// BEGIN myadd_cpu
+Tensor myadd_cpu(const Tensor& self_, const Tensor& other_) {
+ TORCH_CHECK(self_.sizes() == other_.sizes());
+ TORCH_INTERNAL_ASSERT(self_.device().type() == DeviceType::CPU);
+ TORCH_INTERNAL_ASSERT(other_.device().type() == DeviceType::CPU);
+ Tensor self = self_.contiguous();
+ Tensor other = other_.contiguous();
+ Tensor result = torch::empty(self.sizes(), self.options());
+ const float* self_ptr = self.data_ptr();
+ const float* other_ptr = other.data_ptr();
+ float* result_ptr = result.data_ptr();
+ for (int64_t i = 0; i < result.numel(); i++) {
+ result_ptr[i] = self_ptr[i] + other_ptr[i];
+ }
+ return result;
+}
+// END myadd_cpu
+
+// BEGIN TORCH_LIBRARY_IMPL CPU
+TORCH_LIBRARY_IMPL(myops, CPU, m) {
+ m.impl("myadd", myadd_cpu);
+}
+// END TORCH_LIBRARY_IMPL CPU
+
+Tensor myadd_cuda(const Tensor& self, const Tensor& other) {
+ // Insert your CUDA implementation here
+ TORCH_CHECK(0, "CUDA not yet implemented");
+}
+
+// BEGIN TORCH_LIBRARY_IMPL CUDA
+TORCH_LIBRARY_IMPL(myops, CUDA, m) {
+ m.impl("myadd", myadd_cuda);
+}
+// END TORCH_LIBRARY_IMPL CUDA
+
+// BEGIN myadd_autograd
+class MyAddFunction : public torch::autograd::Function {
+ public:
+ static Tensor forward(
+ AutogradContext *ctx, torch::Tensor self, torch::Tensor other) {
+ at::AutoNonVariableTypeMode g;
+ return myadd(self, other);
+ }
+
+ static tensor_list backward(AutogradContext *ctx, tensor_list grad_outputs) {
+ auto grad_output = grad_outputs[0];
+ return {grad_output, grad_output};
+ }
+};
+
+Tensor myadd_autograd(const Tensor& self, const Tensor& other) {
+ return MyAddFunction::apply(self, other)[0];
+}
+// END myadd_autograd
+
+// BEGIN TORCH_LIBRARY_IMPL Autograd
+TORCH_LIBRARY_IMPL(myops, Autograd, m) {
+ m.impl("myadd", myadd_autograd);
+}
+// END TORCH_LIBRARY_IMPL Autograd
+
+#if 0
+// BEGIN TORCH_LIBRARY_IMPL Named
+Tensor myadd_named(const Tensor& self, const Tensor& other) {
+ // TODO: shouldn't need to do size check here
+ TORCH_CHECK(self.sizes() == other.sizes());
+ auto maybe_outnames = at::unify_from_right(self.names(), other.names());
+ auto result = ([&]() {
+ at::NoNamesGuard guard;
+ return myadd(self, other);
+ })();
+ at::namedinference::propagate_names_if_nonempty(result, maybe_outnames);
+ return result;
+}
+
+TORCH_LIBRARY_IMPL(myops, Named, m) {
+ m.impl("myadd", myadd_named);
+}
+// END TORCH_LIBRARY_IMPL Named
+#endif
diff --git a/advanced_source/dispatcher/test.py b/advanced_source/dispatcher/test.py
new file mode 100644
index 00000000000..cd35b05a47a
--- /dev/null
+++ b/advanced_source/dispatcher/test.py
@@ -0,0 +1,11 @@
+import torch
+
+torch.ops.load_library("build/libdispatcher.so")
+print(torch.ops.myops.myadd(torch.randn(32, 32), torch.rand(32, 32)))
+"""
+# Doesn't currently work, because Python frontend on torch.ops doesn't
+# support names (for not a good reason?)
+x = torch.randn(32, 32, names=('A', 'B'))
+y = torch.rand(32, 32, names=('A', 'B'))
+print(torch.ops.myops.myadd(x, y))
+"""
diff --git a/advanced_source/extend_dispatcher.rst b/advanced_source/extend_dispatcher.rst
new file mode 100644
index 00000000000..12f15355f5f
--- /dev/null
+++ b/advanced_source/extend_dispatcher.rst
@@ -0,0 +1,380 @@
+Extending dispatcher for a new backend in C++
+=============================================
+
+In this tutorial we will walk through all necessary steps to extend the dispatcher to
+add a new device living outside ``pytorch/pytorch`` repo and maintain it to keep in
+sync with native PyTorch devices. Here we'll assume that you're familiar with how
+to `register a dispatched operator in C++ `_ and how to write a
+`custom autograd function `_.
+
+
+.. note::
+
+ This tutorial touches a lot of internal components inside PyTorch which are being actively improved,
+ please expect changes to APIs if you decide to follow this tutorial. We'll keep this tutorial
+ up to date with the latest APIs.
+
+What's a new backend?
+---------------------
+
+Adding a new backend to PyTorch requires a lot of development and maintenance from backend extenders.
+Before adding a new backend, let's first consider a few common use cases and recommended solutions for them:
+
+* If you have new algorithms for an existing PyTorch operator, send a PR to PyTorch.
+* If you want to propose a new operator, send a feature request/PR to PyTorch.
+* If you want to add support for a new device/hardware like Google TPU and customized chips, which often requires using
+ hardware-specific API to write kernels, follow this tutorial and add a out-of-tree backend to PyTorch.
+* If you want to add support for existing operators but with a different Tensor layout/representation
+ like sparse and quantized, which enforces your kernels to be written in a way that's more efficient
+ given the layout/representation limitation, follow this tutorial and add a out-of-tree backend to PyTorch.
+
+In this tutorial we'll mainly focus on adding a new out-of-tree device below. Adding out-of-tree support
+for a different tensor layout might share many common steps with devices, but we haven't seen an example of
+such integrations yet so it might require additional work from PyTorch to support it.
+
+Get a dispatch key for your backend
+-----------------------------------
+
+PyTorch operators are implemented in C++ and made available in Python frontend through Python bindings.
+The PyTorch dispatcher divides the implementation of an operator into multiple kernels, each of which is
+associated with a specific dispatch key. Supporting a new backend in PyTorch essentially means writing
+a kernel for each PyTorch operator in C++ and then registering them to a dispatch key representing your
+customized backend in the dispatcher.
+
+Dispatch key is your identifier in the dispatcher system. The dispatcher looks at the dispatch keys carried on
+input tensors and calls the right kernel accordingly. PyTorch provides three reserved dispatch keys
+(and their corresponding Autograd keys) for prototyping out-of-tree backend extensions:
+
+* PrivateUse1/AutogradPrivateUse1
+* PrivateUse2/AutogradPrivateUse2
+* PrivateUse3/AutogradPrivateUse3
+
+You can choose any of keys above to prototype your customized backend.
+To create a Tensor on ``PrivateUse1`` backend, you need to set dispatch key in ``TensorImpl`` constructor.
+
+.. code-block:: cpp
+
+ /* Example TensorImpl constructor */
+ TensorImpl(
+ Storage&& storage,
+ DispatchKeySet ks,
+ const caffe2::TypeMeta data_type);
+
+ // To create a TensorImpl on PrivateUse1 backend, pass in the following ks to TensorImpl creation.
+ DispatchKeySet ks = c10::DispatchKeySet{c10::DispatchKey::PrivateUse1, c10::DispatchKey::AutogradPrivateUse1};
+
+
+Note that ``TensorImpl`` class above assumes your Tensor is backed by a storage like CPU/CUDA. We also
+provide ``OpaqueTensorImpl`` for backends without a storage. And you might need to tweak/override certain
+methods to fit your customized hardware.
+One example in pytorch repo is `Vulkan TensorImpl `_.
+
+
+.. note::
+ Once the prototype is done and you plan to do regular releases for your backend extension, please feel free to
+ submit a PR to ``pytorch/pytorch`` to reserve a dedicated dispatch key for your backend.
+
+
+Get the full list of PyTorch operators
+--------------------------------------
+
+PyTorch provides a full list of extensible C++ operators in generated file
+``build/aten/src/ATen/RegistrationDeclarations.h``.
+This file is only available after building PyTorch from source.
+Here's a snippet of the file:
+
+.. code-block:: cpp
+
+ Tensor abs(const Tensor & self); // {"schema": "aten::abs(Tensor self) -> Tensor", "dispatch": "True", "default": "True"}
+ Tensor & abs_(Tensor & self); // {"schema": "aten::abs_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "True", "default": "True"}
+ Tensor & abs_out(Tensor & out, const Tensor & self); // {"schema": "aten::abs.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"}
+ Tensor absolute(const Tensor & self); // {"schema": "aten::absolute(Tensor self) -> Tensor", "dispatch": "False", "default": "False"}
+ Tensor & absolute_(Tensor & self); // {"schema": "aten::absolute_(Tensor(a!) self) -> Tensor(a!)", "dispatch": "False", "default": "False"}
+ Tensor & absolute_out(Tensor & out, const Tensor & self); // {"schema": "aten::absolute.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "False", "default": "False"}
+ Tensor angle(const Tensor & self); // {"schema": "aten::angle(Tensor self) -> Tensor", "dispatch": "True", "default": "True"}
+ Tensor & angle_out(Tensor & out, const Tensor & self); // {"schema": "aten::angle.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)", "dispatch": "True", "default": "False"}
+ Tensor sgn(const Tensor & self); // {"schema": "aten::sgn(Tensor self) -> Tensor", "dispatch": "True", "default": "True"}
+
+There're multiple fields associated with a single operator. Let's break it down using ``abs_out`` as an example:
+
+* ``Tensor & abs_out(Tensor & out, const Tensor & self);`` is the C++ signature of the operator, your C++
+ kernel should match this signature exactly.
+* ``aten::abs.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)`` is the unique schema representing the operator,
+ which also contains aliasing and mutation annotations compared to the C++ signature. This is the unique identifier
+ the dispatcher uses to find an operator.
+* ``dispatch`` and ``default`` are boolean fields that provide information about what native PyTorch kernels
+ can do, thus implies whether it's required for backend extenders to implement the kernel.
+ More details can be found in :ref:`register kernels for the new backend`.
+
+
+.. _register-kernel:
+
+Register kernels for the new backend
+------------------------------------
+
+To register your kernels to PyTorch dispatcher, you can use the
+``TORCH_LIBRARY_IMPL`` API described in
+`Registering a Dispatched Operator in C++ `_:
+
+.. code-block:: cpp
+
+ TORCH_LIBRARY_IMPL(aten, PrivateUse1, m) {
+ m.impl(, &my_op1);
+ m.impl(, &my_op2);
+ m.impl(, &my_op2_backward);
+ }
+
+Now let's zoom in and what operator requires a kernel from a customized backend and what's
+inside the kernels exactly.
+
+PyTorch currently has more than 1600 operators and it’s still growing. It’s unrealistic
+for backend extensions to keep up with this speed. Even for native backends like CPU
+or CUDA, it often requires a lot of work to write dedicated kernels for every new op.
+
+Fortunately, some native PyTorch kernels are written in a way that they decompose to
+combination of several known operators. In other words, you only need to implement
+a set of known operators (ops that require registration below) instead of all PyTorch operators.
+
+PyTorch operators can be classified into two categories:
+
+* Ops that require registration: PyTorch native implementation for these ops is backend specific
+ and thus it’s required to provide a kernel for customized backend. Otherwise calling such op
+ on the customized backend will error out.
+ * In ``RegistrationDeclarations.h`` these operators have ``dispatch`` set to True *and* ``default`` set to False
+ in the metadata found in their accompanying comments.
+
+
+* Registration is optional: backend extenders can skip registering to these ops without sacrificing any support.
+ However, if a backend extender wants to override the default kernel provided by PyTorch, they can still
+ register their customized kernel to their backend and the dispatcher will use it for your backend only.
+ For example, current implementation of PyTorch's ``max_pool2d`` returns ``indices`` as part of forward outputs which
+ creates overhead in torch_xla, so torch_xla registers its own kernel for ``max_pool2d`` instead.
+ * In ``RegistrationDeclarations.h`` these operators have ``dispatch`` set to False *or* ``default`` set to True
+ in the metadata found in their accompanying comments.
+
+
+
+Autograd support for the new backend
+------------------------------------
+
+Gradient formulas are mostly purely mathematical and thus are general for all backends.
+PyTorch often registers a kernel to alias dispatch key Autograd, which means it can be used by all backends.
+
+For these operators you don't have to worry about their derivative formulas,
+you can just write forward definitions for operators in ``RegistrationDeclarations.h`` and PyTorch handles
+backward for you automatically.
+
+.. code-block:: cpp
+
+
+ Tensor my_op1(const Tensor& self, const Tensor& other) {
+ // call your backend-specific APIs to implement my_op so that
+ // it matches PyTorch's native behavior
+ }
+ TORCH_LIBRARY_IMPL(aten, PrivateUse1, m) {
+ m.impl(, &my_op);
+ }
+
+
+In some cases, PyTorch backward kernel implementations are also device specific so that they can squeeze out
+max performance out of each backend. For those operators you’ll see op_backward showing up in
+``RegistrationDeclarations.h`` as *required registration* as well.
+
+.. code-block:: cpp
+
+
+ Tensor my_op2_backward(const Tensor& self, const Tensor& other) {
+ // call your backend-specific APIs to implement my_op2_backward so that
+ // it matches PyTorch's native behavior
+ }
+
+ // Note backward kernel is still registered to PrivateUse1 instead of AutogradPrivateUse1.
+ // PyTorch will wrap your backward kernel with proper autograd setup and then link to it in
+ // my_op2's AutogradPrivateUse1 kernel.
+ TORCH_LIBRARY_IMPL(aten, PrivateUse1, m) {
+ m.impl(, &my_op2);
+ m.impl(, &my_op2_backward);
+ }
+
+
+In a few *rare* cases, PyTorch’s gradient formula for certain operators may have assumptions that don’t generalize
+for all backends. In those cases backend extenders can optionally override PyTorch Autograd layer by registering
+a kernel from torch::autograd::Function to the corresponding dispatch key (for example, AutogradPrivateUse1 if
+you're using PrivateUse1 for your backend):
+
+
+.. code-block:: cpp
+
+
+ class MyAddFunction : public torch::autograd::Function {
+ public:
+ static Tensor forward(AutogradContext *ctx, torch::Tensor self, torch::Tensor other) {
+ at::AutoNonVariableTypeMode g;
+ return myadd(self, other);
+ }
+
+ static tensor_list backward(AutogradContext *ctx, tensor_list grad_outputs) {
+ auto grad_output = grad_outputs[0];
+ return {grad_output, grad_output};
+ }
+ };
+
+ Tensor myadd_autograd(const Tensor& self, const Tensor& other) {
+ return MyAddFunction::apply(self, other)[0];
+ }
+
+ // Register the autograd kernel to AutogradPrivateUse1
+ TORCH_LIBRARY_IMPL(aten, AutogradPrivateUse1, m) {
+ m.impl(, &myadd_autograd);
+ }
+
+ // Register the inference kernel to PrivateUse1
+ TORCH_LIBRARY_IMPL(aten, PrivateUse1, m) {
+ m.impl(, &myadd);
+ }
+
+
+
+With this trick you have full control over both training and inference behavior for ``my_add`` operator in your backend.
+Here's `an example `_ in the ``pytorch/xla`` repository.
+
+
+Build an extension
+------------------
+
+Out-of-tree backend is supported by adding a C++ extension to PyTorch.
+Once you have kernels and registrations ready, you can build a C++ extension by
+writing a ``setup.py`` script that uses ``setuptools`` to compile C++ code. Here's a simplified example from
+`pytorch/xla repo `_::
+
+ from setuptools import setup
+ from torch.utils.cpp_extension import BuildExtension, CppExtension
+
+ setup(
+ name='torch_xla',
+ ext_modules=[
+ CppExtension(
+ '_XLAC',
+ torch_xla_sources,
+ include_dirs=include_dirs,
+ extra_compile_args=extra_compile_args,
+ library_dirs=library_dirs,
+ extra_link_args=extra_link_args + \
+ [make_relative_rpath('torch_xla/lib')],
+ ),
+ ],
+ cmdclass={
+ 'build_ext': Build, # Build is a derived class of BuildExtension
+ }
+ # more configs...
+ )
+
+
+See `our C++ extension tutorial `_
+for more details.
+
+
+Custom operator support
+-----------------------
+
+Your new backend should work seamlessly with
+`customized operators extended in python `_
+without writing any new kernels as long as the customized operator is composed of existing
+PyTorch operators (which are already supported by your backend).
+
+For `custom operators extended in C++ `_ they often come with a
+`backend specific C++ kernel implementation e.g. nms kernel in torchvsion `_
+as well as `a customized Python API e.g. torch.ops.torchvision.nms `_.
+To support these operators, backend extenders will need to write a C++ kernel for your backend and properly
+register it to the corresponding namespace in the dispatcher similar to supporting PyTorch native operators.
+Alternatively you could also add a customized API in your extension e.g ``torch_xla.core.functions.nms`` for
+these adhoc requests.
+
+JIT support
+-----------
+
+As we mentioned in `Registering a Dispatched Operator in C++ `_, kernels registered through `m.impl()` API
+support being called in both unboxed and boxed ways. In other words your customized backend can also work with our
+JIT tracing/scripting frontend just like the in-tree backends like CPU or CUDA do. You could potentially also write specialized optimization
+passes for your backend on a JIT graph. But we will not discuss it here since we haven't finalized the integration point
+in JIT, so the current backend support will focus on the eager frontend for now.
+
+
+Testing your backend against native PyTorch backends
+----------------------------------------------------
+
+PyTorch lets tests run on multiple device types using its `generic device type testing framework `_.
+You can find details about `how tests use it `_
+and information about `how to add a new device type