diff --git a/.github/workflows/bot-label-lgtm.yaml b/.github/workflows/bot-label-lgtm.yaml
deleted file mode 100644
index 27f50375cdc..00000000000
--- a/.github/workflows/bot-label-lgtm.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-# This workflow adds the community approval label ("lgtm") to pull requests. It
-# does *not* indicate maintainer approval. This a way to visually highlight that
-# someone in the world thinks the pull request is ready for further review. This
-# event is triggered by a pull request approval, or simply a comment that
-# contains the text "lgtm".
-# Webhook events: Issue comments, Pull request reviews
-name: Community approval
-on:
-  repository_dispatch:
-    # From: issue_comment, pull_request_review
-    types: [created, edited, submitted]
-
-jobs:
-  lgtm-comment:
-    # Check the comment. contains() is case-insensitive.
-    if: >-
-      ${{ github.actor == 'tfdocsbot' &&
-          contains(github.event.client_payload.comment.body, 'LGTM') }}
-    runs-on: ubuntu-latest
-    steps:
-    - name: Add label
-      env:
-        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        ISSUE_URL: ${{ github.event.client_payload.comment.issue_url }}
-      run: |
-        curl -X POST \
-          -H "Accept: application/vnd.github.v3+json" \
-          -H "Authorization: token $GITHUB_TOKEN" \
-          "${ISSUE_URL}/labels" \
-          --data '{"labels":["lgtm"]}'
-
-  review-approval:
-    # Check the pull request review.
-    if: >-
-      ${{ github.actor == 'tfdocsbot' &&
-          contains(github.event.client_payload.review.state, 'approved') }}
-    runs-on: ubuntu-latest
-    steps:
-    - name: Add label
-      env:
-        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        ISSUE_URL: ${{ github.event.client_payload.pull_request.issue_url }}
-      run: |
-        curl -X POST \
-          -H "Accept: application/vnd.github.v3+json" \
-          -H "Authorization: token $GITHUB_TOKEN" \
-          "${ISSUE_URL}/labels" \
-          --data '{"labels":["lgtm"]}'
diff --git a/.github/workflows/bot-nightly.yaml b/.github/workflows/bot-nightly.yaml
deleted file mode 100644
index a0595c74a0b..00000000000
--- a/.github/workflows/bot-nightly.yaml
+++ /dev/null
@@ -1,65 +0,0 @@
-# Nightly jobs run by a bot collaborator.
-name: Nightly jobs
-on:
-  repository_dispatch:
-    types: [nightly]
-
-jobs:
-  snapshot-source:
-    name: Update Keras guides
-    if : ${{ github.actor == 'tfdocsbot' }}
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v2
-      with:
-        repository: keras-team/keras-io
-        path: keras-io
-    - uses: actions/checkout@v2
-      with:
-        # tensorflow/docs branch to save generated notebooks.
-        ref: snapshot-keras
-        path: docs
-    - name: Set up repo
-      run: |
-        # Set commit author.
-        git config --global user.name "$GITHUB_ACTOR"
-        git config --global user.email "$GITHUB_ACTOR@users.noreply.github.com"
-    - name: Set up Python
-      uses: actions/setup-python@v2
-      with:
-        python-version: '3.8'
-    - name: Install requirements
-      run: |
-        python3 -m pip install -U pip
-        python3 -m pip install -U -r keras-io/requirements.txt
-        python3 -m pip install -U git+https://github.com/tensorflow/docs
-    - name: Generate Keras notebooks
-      run: |
-        # Autogen requires formated code
-        echo "[${GITHUB_WORKFLOW}] Format Python files ..."
-        python -m black keras-io/guides/
-        mkdir -p keras-io/tf  # Make sure output dir exists.
-        cd keras-io/scripts/
-        echo "[${GITHUB_WORKFLOW}] Generate Keras guides ..."
-        python3 autogen.py generate_tf_guides
-        echo "[${GITHUB_WORKFLOW}] Format notebooks ..."
-        python3 -m tensorflow_docs.tools.nbfmt ../tf/
-    - name: Sync docs repo
-      env:
-        KERAS_GUIDES_DIR: site/en/guide/keras/
-      run: |
-        rsync --archive --del --checksum ./keras-io/tf/ "./docs/${KERAS_GUIDES_DIR}"
-        cd docs
-        if [[ -z $(git status -s | grep "$KERAS_GUIDES_DIR") ]]; then
-          echo "[${GITHUB_WORKFLOW}] No Keras guides updated, exiting."
-          exit 0
-        fi
-        # Match timestamp format to other snapshot messages.
-        fmt_rfc7231="%a, %d %b %Y %H:%M:%S %Z"
-        TIMESTAMP_STR=$(TZ=GMT date +"$fmt_rfc7231")
-
-        git add "./${KERAS_GUIDES_DIR}"
-        git commit -m "Keras guides snapshot: ${TIMESTAMP_STR}"
-        # Push to current branch.
-        echo "[${GITHUB_WORKFLOW}] Push changes to repo ..."
-        git push origin
diff --git a/.github/workflows/bot-pr-fix.yaml b/.github/workflows/bot-pr-fix.yaml
deleted file mode 100644
index a8ead3aa7ae..00000000000
--- a/.github/workflows/bot-pr-fix.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-# Automatically add commits to fix pull requests. This workflow must initiate
-# from an authenticated bot repo collaborator. Check for opt-out label.
-# Webhook events: Pull requests
-name: Auto-fix pull request
-on:
-  repository_dispatch:
-    types: [opened, synchronize]
-
-jobs:
-  nbfmt:
-    # Check for opt-out label.
-    if: >-
-      ${{ github.actor == 'tfdocsbot' &&
-          !contains(github.event.client_payload.pull_request.labels.*.name, 'nbfmt-disable') }}
-    runs-on: ubuntu-latest
-    steps:
-    - name: Set up Python
-      uses: actions/setup-python@v2
-    - name: Install tensorflow-docs
-      run: python3 -m pip install -U git+https://github.com/tensorflow/docs
-    - name: Fetch pull request branch
-      uses: actions/checkout@v2
-      with:
-        # Head repo is the user's fork. Ref is the branch name.
-        repository: ${{ github.event.client_payload.pull_request.head.repo.full_name }}
-        ref: ${{ github.event.client_payload.pull_request.head.ref }}
-    - name: Fetch base master branch
-      run: git fetch -u "$GITHUB_SERVER_URL/$GITHUB_REPOSITORY" master:master
-    - name: Format notebooks
-      run: |
-        # Only want notebooks modified in this pull request.
-        readarray -t changed_files < <(git diff --name-only master | grep '\.ipynb$' || true)
-        if [[ ${#changed_files[@]} == 0 ]]; then
-          echo "No notebooks modified in this pull request."
-          exit 0
-        fi
-        python3 -m tensorflow_docs.tools.nbfmt "${changed_files[@]}"
-
-        if [[ -z $(git ls-files --modified) ]]; then
-          echo "Notebooks already formatted."
-          exit 0
-        fi
-        # Set author and commit.
-        git config --global user.name "$GITHUB_ACTOR"
-        git config --global user.email "$GITHUB_ACTOR@users.noreply.github.com"
-        git commit -am "nbfmt"
-        # Push to the pull request branch submitted by head.
-        git push
diff --git a/.github/workflows/bot-pr-new.yaml b/.github/workflows/bot-pr-new.yaml
index 7f2c6164832..13724cc14f0 100644
--- a/.github/workflows/bot-pr-new.yaml
+++ b/.github/workflows/bot-pr-new.yaml
@@ -6,8 +6,15 @@ on:
   repository_dispatch:
     types: [opened, reopened]
 
+permissions:
+  contents: read  #  to fetch code (actions/checkout)
+
 jobs:
   comment-welcome:
+    permissions:
+      contents: read  #  to fetch code (actions/checkout)
+      pull-requests: write  #  to comment on pull-request
+
     if: ${{ github.actor == 'tfdocsbot' }}
     runs-on: ubuntu-latest
     steps:
@@ -15,7 +22,7 @@ jobs:
       uses: actions/checkout@v2
       with:
         repository: ${{ github.event.client_payload.pull_request.head.repo.full_name }}
-        ref: ${{ github.event.client_payload.pull_request.head.ref }}
+        ref: ${{ github.event.client_payload.pull_request.head.sha }}
     - name: Fetch base master branch
       run: git fetch -u "$GITHUB_SERVER_URL/$GITHUB_REPOSITORY" master:master
     - name: Create message
diff --git a/.github/workflows/stale.yaml b/.github/workflows/stale.yaml
new file mode 100644
index 00000000000..0ca76b0677e
--- /dev/null
+++ b/.github/workflows/stale.yaml
@@ -0,0 +1,49 @@
+# This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time.
+#
+# You can adjust the behavior by modifying this file.
+# For more information, see:
+# https://github.com/actions/stale
+name: Mark stale issues and pull requests
+
+on:
+  schedule:
+  # Scheduled to run at 1.30 UTC everyday
+  - cron: '30 1 * * *'
+  workflow_dispatch:
+
+jobs:
+  stale:
+
+    runs-on: ubuntu-latest
+    permissions:
+      issues: write
+      pull-requests: write
+
+    steps:
+    - uses: actions/stale@v9
+      with:
+        repo-token: ${{ secrets.GITHUB_TOKEN }}
+        days-before-issue-stale: 14
+        days-before-issue-close: 14
+        stale-issue-label: "status:stale"
+        close-issue-reason: not_planned
+        any-of-labels: "awaiting-contributor-response,cla:no"
+        stale-issue-message: >
+          Marking this issue as stale since it has been open for 14 days with no activity.
+          This issue will be closed if no further activity occurs.
+        close-issue-message: >
+          This issue was closed because it has been inactive for 28 days.
+          Please post a new issue if you need further assistance. Thanks!
+        days-before-pr-stale: 14
+        days-before-pr-close: 14
+        stale-pr-label: "status:stale"
+        stale-pr-message: >
+          Marking this pull request as stale since it has been open for 14 days with no activity.
+          This PR will be closed if no further activity occurs.
+        close-pr-message: >
+          This pull request was closed because it has been inactive for 28 days.
+          Please open a new pull request if you need further assistance. Thanks!
+        # Label that can be assigned to issues to exclude them from being marked as stale
+        exempt-issue-labels: 'override-stale'
+        # Label that can be assigned to PRs to exclude them from being marked as stale
+        exempt-pr-labels: "override-stale"
diff --git a/CODEOWNERS b/CODEOWNERS
index 42fd773cab2..d4d2932d8bc 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -1,11 +1,14 @@
 # https://help.github.com/articles/about-codeowners/
-# Last matching pattern takes preecedence.
+# Last matching pattern takes precedence.
 
 # Default owners for everything in repo.
-*  @lamberta @MarkDaoust @8bitmp3
+*  @tensorflow/docs-team
 
-# Docs
-/site/en/guide/keras/ @fchollet @lamberta @MarkDaoust @8bitmp3
+# Install
+/site/en/install/ @haifeng-jin @MarkDaoust @8bitmp3
 
 # Community
-/site/en/community/ @ewilderj @lamberta @theadactyl @joanafilipa
+/site/en/community/ @ewilderj @theadactyl @joanafilipa
+
+# Hub
+/site/en/hub @gustheman
\ No newline at end of file
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 1559b721f51..6f301eab782 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -6,9 +6,7 @@ This guide shows how to make contributions to [tensorflow.org](https://www.tenso
 
 See the
 [TensorFlow docs contributor guide](https://www.tensorflow.org/community/contribute/docs)
-for guidance. For questions, the
-[docs@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs)
-mailing list is available.
+for guidance. For questions, check out [TensorFlow Forum](https://discuss.tensorflow.org/).
 
 Questions about TensorFlow usage are better addressed on
 [Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow) or the
diff --git a/LICENSE b/LICENSE
index 4862420c023..08026f1ac8e 100644
--- a/LICENSE
+++ b/LICENSE
@@ -201,3 +201,28 @@ Copyright 2018 The TensorFlow Authors.  All rights reserved.
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
+
+
+---------------------------
+
+Where indicated, some files are also distributed under the MIT License:
+
+MIT License
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
\ No newline at end of file
diff --git a/MANIFEST.in b/MANIFEST.in
index fa4266dd2dc..c3f79ccabe2 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1 +1,2 @@
-recursive-include tools/tensorflow_docs/api_generator/gen_java/ *
\ No newline at end of file
+global-include **/templates/*
+global-include *.sh
\ No newline at end of file
diff --git a/OWNERS_METADATA b/OWNERS_METADATA
deleted file mode 100644
index 1460eaaba9e..00000000000
--- a/OWNERS_METADATA
+++ /dev/null
@@ -1,20 +0,0 @@
-# Text proto format: devtools_piper.OwnersMetadataFile (go/owners_metadata)
-# See go/silos-lite for details on how to use this file for read access control.
-silo_acl {
-  # Acknowledgement that silo won't contain privacy sensitive data, like PII.
-  silo_will_contain_privacy_sensitive_data: false
-
-  # This retains normal access for regular engineers and other accounts.
-  access_type: OPEN
-
-  # This is equivalent to above. Currently, default_ganpati_group is a
-  # mandatory field in SiloAcl. We plan to make it optional in case
-  # access_type is OPEN.
-  default_ganpati_group: "piper-group-default-access"
-
-  # Ganpati1 groups that are granted *read* access in addition to the default
-  # group above.
-  allowed_ganpati_groups: "restricted-dev-build"
-  allowed_ganpati_groups: "job-0000396906033-webdevelopmentdevelopermarketingwebprojects"
-  allowed_ganpati_groups: "job-0000551608589-smetechnicalwriterii"
-}
diff --git a/README.md b/README.md
index 7b94ce5f90f..66b6d3fb065 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ To file a docs issue, use the issue tracker in the
 [tensorflow/tensorflow](https://github.com/tensorflow/tensorflow/issues/new?template=20-documentation-issue.md) repo.
 
 And join the TensorFlow documentation contributors on the
-[docs@tensorflow.org mailing list](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs).
+[TensorFlow Forum](https://discuss.tensorflow.org/).
 
 ## Community translations
 
diff --git a/setup.py b/setup.py
index 2077516bf4c..404479668b3 100644
--- a/setup.py
+++ b/setup.py
@@ -14,6 +14,7 @@
 # ==============================================================================
 """tensorflow_docs is a package for generating python api-reference docs."""
 
+import datetime
 import subprocess
 import sys
 
@@ -21,22 +22,35 @@
 from setuptools import setup
 
 project_name = 'tensorflow-docs'
-version = '0.0.0.dev0'
+
+
+def get_version() -> str:
+  ts = int(
+      subprocess.check_output(['git', 'log', '-1', '--format=%ct', 'tools'])
+      .decode('utf-8')
+      .strip()
+  )
+  dt = datetime.datetime.utcfromtimestamp(ts)
+  sec = 60 * 60 * dt.hour + 60 * dt.minute + dt.second
+
+  # calver.org
+  return f'{dt.year}.{dt.month}.{dt.day}.{sec}'
+
+
+version = get_version()
 
 DOCLINES = __doc__.split('\n')
 
 REQUIRED_PKGS = [
     'astor',
     'absl-py',
-    'protobuf>=3.14',
+    'jinja2',
+    'nbformat',
+    'protobuf>=3.12',
     'pyyaml',
 ]
 
-# Dataclasses is in-built from py >=3.7. This version is a backport for py 3.6.
-if (sys.version_info.major, sys.version_info.minor) == (3, 6):
-  REQUIRED_PKGS.append('dataclasses')
-
-VIS_REQURE = [
+VIS_REQUIRE = [
     'numpy',
     'PILLOW',
     'webp',
@@ -45,6 +59,7 @@
 # https://setuptools.readthedocs.io/en/latest/setuptools.html#new-and-changed-setup-keywords
 setup(
     name=project_name,
+    python_requires='>=3.9',
     version=version,
     description=DOCLINES[0],
     long_description='\n'.join(DOCLINES[2:]),
@@ -57,7 +72,7 @@
     package_dir={'': 'tools'},
     scripts=[],
     install_requires=REQUIRED_PKGS,
-    extras_require={'vis': VIS_REQURE},
+    extras_require={'vis': VIS_REQUIRE},
     classifiers=[
         'Development Status :: 4 - Beta',
         'Intended Audience :: Developers',
@@ -65,7 +80,7 @@
         'Topic :: Scientific/Engineering :: Artificial Intelligence',
     ],
     keywords='tensorflow api reference',
-    # Include_package_data is required for setup.py to recognize the MAINFEST.in
+    # Include_package_data is required for setup.py to recognize the MANIFEST.in
     #   https://python-packaging.readthedocs.io/en/latest/non-code-files.html
     include_package_data=True,
 )
diff --git a/site/en/README.md b/site/en/README.md
new file mode 100644
index 00000000000..28dc0cce7d4
--- /dev/null
+++ b/site/en/README.md
@@ -0,0 +1,50 @@
+# TensorFlow docs
+
+These are the source files for the core TensorFlow
+[guide](https://www.tensorflow.org/guide),
+[tutorials](https://www.tensorflow.org/tutorials), and other technical docs.
+Please read the
+[contributor guide](https://www.tensorflow.org/community/contribute)
+to submit patches to the TensorFlow documentation and code.
+
+## TensorFlow ecosystem projects
+
+In addition to the core TensorFlow docs,
+[tensorflow.org](https://www.tensorflow.org) hosts documentation for many
+[libraries and extensions](https://www.tensorflow.org/resources/libraries-extensions).
+These docs are sourced from separate project repos and where pull requests can
+be sent. The following is a list of TensorFlow documentation projects published
+on the website and a link to their source files:
+
+tensorflow.org project | GitHub docs location
+-----------------------|---------------------
+[/addons](https://www.tensorflow.org/addons) | https://github.com/tensorflow/addons/tree/master/docs
+[/agents](https://www.tensorflow.org/agents) | https://github.com/tensorflow/agents/tree/master/docs
+[/cloud](https://www.tensorflow.org/cloud) | https://github.com/tensorflow/cloud/tree/master/g3doc
+[/datasets](https://www.tensorflow.org/datasets) | https://github.com/tensorflow/datasets/tree/master/docs
+[/decision_forests](https://www.tensorflow.org/decision_forests) | https://github.com/tensorflow/decision-forests/tree/main/documentation
+[/federated](https://www.tensorflow.org/federated) | https://github.com/tensorflow/federated/tree/main/docs
+[/graphics](https://www.tensorflow.org/graphics) | https://github.com/tensorflow/graphics/tree/master/tensorflow_graphics/g3doc
+[/hub](https://www.tensorflow.org/hub) | https://github.com/tensorflow/hub/tree/master/docs
+[/io](https://www.tensorflow.org/io) | https://github.com/tensorflow/io/tree/master/docs/
+[/js](https://www.tensorflow.org/js) | https://github.com/tensorflow/tfjs-website/tree/master/docs
+[/jvm](https://www.tensorflow.org/jvm) | https://github.com/tensorflow/java/tree/master/docs
+[/lattice](https://www.tensorflow.org/lattice) | https://github.com/tensorflow/lattice/tree/master/docs
+[/lite](https://www.tensorflow.org/lite) | https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/g3doc
+[/mlir](https://www.tensorflow.org/mlir) | https://github.com/tensorflow/tensorflow/tree/master/tensorflow/compiler/mlir/g3doc
+[/model_optimization](https://www.tensorflow.org/model_optimization) | https://github.com/tensorflow/model-optimization/tree/master/tensorflow_model_optimization/g3doc
+[/neural_structured_learning](https://www.tensorflow.org/neural_structured_learning) | https://github.com/tensorflow/neural-structured-learning/tree/master/g3doc
+[/probability](https://www.tensorflow.org/probability) | https://github.com/tensorflow/probability/tree/main/tensorflow_probability/g3doc
+[/quantum](https://www.tensorflow.org/quantum) | https://github.com/tensorflow/quantum/tree/master/docs
+[/ranking](https://www.tensorflow.org/ranking) | https://github.com/tensorflow/ranking/tree/master/docs
+[/recommenders](https://www.tensorflow.org/recommenders) | https://github.com/tensorflow/recommenders/tree/main/docs
+[/responsible_ai/fairness_indicators](https://www.tensorflow.org/responsible_ai/fairness_indicators/guide) | https://github.com/tensorflow/fairness-indicators/tree/master/g3doc
+[/responsible_ai/model_card_toolkit](https://www.tensorflow.org/responsible_ai/model_card_toolkit/guide) | https://github.com/tensorflow/model-card-toolkit/tree/main/model_card_toolkit/documentation
+[/responsible_ai/model_remediation](https://www.tensorflow.org/responsible_ai/model_remediation) | https://github.com/tensorflow/model-remediation/tree/master/docs
+[/responsible_ai/privacy](https://www.tensorflow.org/responsible_ai/privacy/guide) | https://github.com/tensorflow/privacy/tree/master/g3doc
+[/tensorboard](https://www.tensorflow.org/tensorboard) | https://github.com/tensorflow/tensorboard/tree/master/docs
+[/guide/keras](https://www.tensorflow.org/guide/keras/) | https://github.com/keras-team/keras-io/tree/master/guides
+[/text](https://www.tensorflow.org/text) | https://github.com/tensorflow/text/tree/master/docs
+[/tfx](https://www.tensorflow.org/tfx) | https://github.com/tensorflow/tfx/tree/master/docs
+[/tfx/guide/serving](https://www.tensorflow.org/tfx/guide/serving) | https://github.com/tensorflow/serving/tree/master/tensorflow_serving/g3doc
+[/xla](https://www.tensorflow.org/xla) | https://github.com/tensorflow/tensorflow/tree/master/tensorflow/compiler/xla/g3doc
diff --git a/site/en/about/_menu_toc.yaml b/site/en/about/_menu_toc.yaml
index d5fe88d3745..8f34ca8089d 100644
--- a/site/en/about/_menu_toc.yaml
+++ b/site/en/about/_menu_toc.yaml
@@ -5,5 +5,3 @@ toc:
       path: /about/
     - label: "Case studies"
       path: /about/case-studies/
-    - label: "AI Service Partners"
-      path: /partners/
diff --git a/site/en/about/bib.md b/site/en/about/bib.md
index a45eec3a40c..16da75adc3e 100644
--- a/site/en/about/bib.md
+++ b/site/en/about/bib.md
@@ -43,7 +43,7 @@ title={ {TensorFlow}: Large-Scale Machine Learning on Heterogeneous Systems},
 url={https://www.tensorflow.org/},
 note={Software available from tensorflow.org},
 author={
-    Mart\'{\i}n~Abadi and
+    Mart\'{i}n~Abadi and
     Ashish~Agarwal and
     Paul~Barham and
     Eugene~Brevdo and
diff --git a/site/en/addons/README.md b/site/en/addons/README.md
deleted file mode 100644
index 3f0ca09e73f..00000000000
--- a/site/en/addons/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-Welcome to the warp zone!
-
-# TensorFlow SIG Addons
-
-These docs are available here: https://github.com/tensorflow/addons/tree/master/docs
diff --git a/site/en/agents/README.md b/site/en/agents/README.md
deleted file mode 100644
index 468efd02dcd..00000000000
--- a/site/en/agents/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-Welcome to the warp zone!
-
-# TensorFlow Agents
-
-These docs are available here: https://github.com/tensorflow/agents/tree/master/docs
diff --git a/site/en/community/_toc.yaml b/site/en/community/_toc.yaml
index 074ecf72531..1a81d38cb01 100644
--- a/site/en/community/_toc.yaml
+++ b/site/en/community/_toc.yaml
@@ -34,5 +34,7 @@ toc:
   - heading: "Community"
   - title: "Contribute to the community"
     path: /community/contribute/community
+  - title: "Contribute to SIGs"
+    path: /community/contribute/sigs
   - title: "RFC process"
     path: /community/contribute/rfc_process
diff --git a/site/en/community/contribute/code.md b/site/en/community/contribute/code.md
index 35f2417046a..2f71f12d7fe 100644
--- a/site/en/community/contribute/code.md
+++ b/site/en/community/contribute/code.md
@@ -2,14 +2,24 @@
 
 Whether you are adding a loss function, improving test coverage, or writing an
 RFC for a major design change, this portion of the contributor guide will help
-you get started. Thank you for work and interest in improving TensorFlow.
+you get started. Thank you for your work and interest in improving TensorFlow.
 
 ## Before you get started
 
-Before you contribute source code to a TensorFlow project, please review the `CONTRIBUTING.md` file in the GitHub repo of the project. (For example, see the
-[CONTRIBUTING.md file for the core TensorFlow repo](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md).) All code contributors are required to sign a [Contributor License Agreement](https://cla.developers.google.com/clas) (CLA).
-
-To avoid duplicating work, please review [current](https://github.com/tensorflow/community/tree/master/rfcs) or [proposed](https://github.com/tensorflow/community/labels/RFC%3A%20Proposed) RFCs and contact the developers on the TensorFlow forums ([developers@tensorflow.org](https://groups.google.com/u/1/a/tensorflow.org/g/developers)) before you start work on a non-trivial feature. We are somewhat selective when deciding to add new functionality, and the best way to contribute and help the project is to work on known issues. 
+Before you contribute source code to a TensorFlow project, please review the
+`CONTRIBUTING.md` file in the GitHub repo of the project. For example, see the
+[CONTRIBUTING.md](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md)
+file in the core TensorFlow repo. All code contributors are required to sign a
+[Contributor License Agreement](https://cla.developers.google.com/clas) (CLA).
+
+To avoid duplicating work, please review
+[current](https://github.com/tensorflow/community/tree/master/rfcs) or
+[proposed](https://github.com/tensorflow/community/labels/RFC%3A%20Proposed)
+RFCs and contact the developers on the TensorFlow forums
+([developers@tensorflow.org](https://groups.google.com/u/1/a/tensorflow.org/g/developers))
+before you start work on a non-trivial feature. We are somewhat selective when
+deciding to add new functionality, and the best way to contribute and help the
+project is to work on known issues.
 
 ## Issues for new contributors
 
@@ -22,14 +32,20 @@ workflow, and for the core devs to become acquainted with the contributor.
 -   [good first issue](https://github.com/tensorflow/tensorflow/labels/good%20first%20issue)
 -   [contributions welcome](https://github.com/tensorflow/tensorflow/labels/stat%3Acontributions%20welcome)
 
-If you are interested in recruiting a team to help tackle a large-scale problem or a new feature, please email the [developers@ group](https://groups.google.com/a/tensorflow.org/forum/#!forum/developers) and review our current list of RFCs. 
-
+If you are interested in recruiting a team to help tackle a large-scale problem
+or a new feature, please email the
+[developers@ group](https://groups.google.com/a/tensorflow.org/g/developers)
+and review our current list of RFCs.
 
 ## Code review
 
-New features, bug fixes, and any other changes to the code base are subject to code review.
+New features, bug fixes, and any other changes to the code base are subject to
+code review.
 
-Reviewing code contributed to the project as pull requests is a crucial component of TensorFlow development. We encourage anyone to start reviewing code submitted by other developers, especially if the feature is something that you are likely to use.
+Reviewing code contributed to the project as pull requests is a crucial
+component of TensorFlow development. We encourage anyone to start reviewing code
+submitted by other developers, especially if the feature is something that you
+are likely to use.
 
 Here are some questions to keep in mind during the code review process:
 
@@ -44,36 +60,47 @@ Here are some questions to keep in mind during the code review process:
 
 ## Test and improve test coverage
 
-High-quality unit testing is a corner-stone of the TensorFlow development process. For this purpose, we use Docker images. The test functions are appropriately named, and are responsible for checking the validity of algorithms as well as different options of the code.
+High-quality unit testing is a corner-stone of the TensorFlow development
+process. For this purpose, we use Docker images. The test functions are
+appropriately named, and are responsible for checking the validity of algorithms
+as well as different options of the code.
 
-All new features and bug fixes *must* include adequate test coverage. We also welcome contributions of new test cases or improvements to existing tests. If you discover that our existing tests are not complete — even if that is not currently causing a bug — please file an issue and, if possible, a pull request.
+All new features and bug fixes *must* include adequate test coverage. We also
+welcome contributions of new test cases or improvements to existing tests. If
+you discover that our existing tests are not complete — even if that is not
+currently causing a bug — please file an issue and, if possible, a pull request.
 
-For the specific details of testing procedures in each TensorFlow project, see the `README.md` and `CONTRIBUTING.md` files in the project repo on GitHub.
+For the specific details of testing procedures in each TensorFlow project, see
+the `README.md` and `CONTRIBUTING.md` files in the project repo on GitHub.
 
 Of particular concerns in *adequate testing*:
 
-*   Is *every public function and class* tested? 
-*   Are a *reasonable set of parameters*, their values, value types, and combinations tested? 
-*   Do the tests validate that the *code is correct*, and that it is *doing what the documentation says* the code is intended to do?
+*   Is *every public function and class* tested?
+*   Are a *reasonable set of parameters*, their values, value types, and
+    combinations tested?
+*   Do the tests validate that the *code is correct*, and that it is *doing what
+    the documentation says* the code is intended to do?
 *   If the change is a bug fix, is a *non-regression test* included?
 *   Do the tests *pass the continuous integration* build?
-*   Do the tests *cover every line of code?* If not, are the exceptions reasonable and explicit?
-
-If you find any problems, please consider helping the contributor understand those problems and resolve them. 
+*   Do the tests *cover every line of code?* If not, are the exceptions
+    reasonable and explicit?
 
+If you find any problems, please consider helping the contributor understand
+those problems and resolve them.
 
 ## Improve error messages or logs
 
-We welcome contributions that improve error messages and logging. 
-
+We welcome contributions that improve error messages and logging.
 
 ## Contribution workflow
 
-Code contributions—bug fixes, new development, test improvement—all follow a GitHub-centered workflow. To participate in TensorFlow development, set up a GitHub account. Then:
+Code contributions—bug fixes, new development, test improvement—all follow a
+GitHub-centered workflow. To participate in TensorFlow development, set up a
+GitHub account. Then:
 
-1.  Fork the repo you plan to work on.
-    Go to the project repo page and use the *Fork* button. This will create a copy of the
-    repo, under your username. (For more details on how to fork a repository see
+1.  Fork the repo you plan to work on. Go to the project repo page and use the
+    *Fork* button. This will create a copy of the repo, under your username.
+    (For more details on how to fork a repository see
     [this guide](https://help.github.com/articles/fork-a-repo/).)
 
 2.  Clone down the repo to your local system.
@@ -96,36 +123,46 @@ Code contributions—bug fixes, new development, test improvement—all follow a
 
     `$ git push origin branch-name`
 
-7.  Open a *Pull Request* (PR). Go to the original project repo on GitHub. There will be a message about your recently pushed branch, asking if you would like to open a pull request. Follow the prompts, *compare across repositories*, and submit the PR. This will send an email to the committers. You may want to consider sending an email to the mailing list for more visibility. (For more details, see the [GitHub guide on PRs](https://help.github.com/articles/creating-a-pull-request-from-a-fork). 
+7.  Open a *Pull Request* (PR). Go to the original project repo on GitHub. There
+    will be a message about your recently pushed branch, asking if you would
+    like to open a pull request. Follow the prompts, *compare across
+    repositories*, and submit the PR. This will send an email to the committers.
+    You may want to consider sending an email to the mailing list for more
+    visibility. (For more details, see the
+    [GitHub guide on PRs](https://help.github.com/articles/creating-a-pull-request-from-a-fork).
 
-8.  Maintainers and other contributors will *review your PR*. Please participate in the conversation, and try to *make any requested changes*. Once the PR is approved, the code will be merged.
+8.  Maintainers and other contributors will *review your PR*. Please participate
+    in the conversation, and try to *make any requested changes*. Once the PR is
+    approved, the code will be merged.
 
-*Before working on your next contribution*, make sure your local repository is up to date.
+*Before working on your next contribution*, make sure your local repository is
+up to date.
 
-1. Set the upstream remote. (You only have to do this once per project, not every time.)
+1.  Set the upstream remote. (You only have to do this once per project, not
+    every time.)
 
     `$ git remote add upstream git@github.com:tensorflow/project-repo-name`
 
-2. Switch to the local master branch.
+2.  Switch to the local master branch.
 
     `$ git checkout master`
 
-3. Pull down the changes from upstream.
+3.  Pull down the changes from upstream.
 
     `$ git pull upstream master`
 
-4. Push the changes to your GitHub account. (Optional, but a good practice.)
+4.  Push the changes to your GitHub account. (Optional, but a good practice.)
 
     `$ git push origin master`
 
-5. Create a new branch if you are starting new work.
+5.  Create a new branch if you are starting new work.
 
     `$ git checkout -b branch-name`
 
 Additional `git` and GitHub resources:
 
 *   [Git documentation](https://git-scm.com/documentation)
-*   [Git development workflow](https://docs.scipy.org/doc/numpy/dev/gitwash/development_workflow.html)
+*   [Git development workflow](https://docs.scipy.org/doc/numpy/dev/development_workflow.html)
 *   [Resolving merge conflicts](https://help.github.com/articles/resolving-a-merge-conflict-using-the-command-line/).
 
 
diff --git a/site/en/community/contribute/community.md b/site/en/community/contribute/community.md
index 26cb66e6a40..bb2b2035d1b 100644
--- a/site/en/community/contribute/community.md
+++ b/site/en/community/contribute/community.md
@@ -2,14 +2,28 @@
 
 An open source project isn't just about the code, it's also about the community of users, developers, writers, researchers, and other contributors. You can help grow and support this community.
 
-Please read the TensorFlow [Code and Collaboration governance](https://github.com/tensorflow/community/blob/master/governance/code-and-collaboration.md)
+Please read the TensorFlow [Code and Collaboration governance](https://github.com/tensorflow/community/blob/master/governance/code-and-collaboration.md).
 
 ## Community support
 
-Many people [ask questions about TensorFlow on Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow). Answering those questions and pointing people to the relevant documentation is a great service to the community.
+Many people [ask questions about TensorFlow on the TensorFlow Forum](https://discuss.tensorflow.org/). Answering those questions and pointing people to the relevant documentation is a great service to the community.
 
 Some users also ask support questions as GitHub issues. We try to discourage this, as GitHub issues are not the best place to ask for technical support. However, if you notice these issues, you are encouraged to answer them and point people to the relevant documentation.
 
+### TensorFlow Forum
+
+The [TensorFlow Forum](https://discuss.tensorflow.org/) is a central platform for community discussion and support. It brings our community together to share ideas, best practices and use cases related to TensorFlow. We foster an open and welcoming environment according to the [TensorFlow Code of Conduct](https://discuss.tensorflow.org/faq).
+
+The TensorFlow Forum is organized by categories, subcategories and tags. We encourage you to create an account and follow categories and tags of interest. When you create a new post, select the most appropriate [category or subcategory](https://discuss.tensorflow.org/categories) and [tags](https://discuss.tensorflow.org/tags) to help other users find your topic.
+
+For more information on Discourse features, read the [Discourse New User Guide](https://meta.discourse.org/t/discourse-new-user-guide/96331).
+
+### Become a Forum expert
+
+Discourse uses [trust levels](https://blog.discourse.org/2018/06/understanding-discourse-trust-levels/) to reward increasing levels of participation in the forum. The Forum facilitates learning by doing, letting you to collect [badges](https://discuss.tensorflow.org/badges) that are displayed on your profile. This is a great way to be recognized for helping fellow community members. The more you invest in helping community members, the more badges and forum tools you will unlock.
+
+Certain groups, such as TensorFlow Team members and Machine Learning GDEs, display a special icon for easier identification.
+
 ## Communication
 
 The TensorFlow community has a number of formal and informal ways of keeping in touch.
@@ -18,95 +32,33 @@ The TensorFlow community has a number of formal and informal ways of keeping in
 
 The primary communication about work on TensorFlow happens in the [TensorFlow repositories on GitHub](https://github.com/tensorflow). This is the place to discuss bugs, new features, and in-progress work.
 
-<!--
-### Forums
--->
-
 ### Mailing lists
 
-Mailing lists are reserved for announcements and contributor conversation. They are not intended to provide technical support.
-
-#### General TensorFlow lists
+Most communication happens on the TensorFlow Forum. The following mailing lists are still used for announcements and contributor conversations. Note that they are not intended to provide technical support.
 
 *   [announce@tensorflow.org](mailto:announce@tensorflow.org) — All major releases and important announcements are sent to this mailing group. We recommend that you join this list if you depend on TensorFlow in any way.
-*   [discuss@tensorflow.org](mailto:discuss@tensorflow.org) — General discussion about TensorFlow development and direction.
 *   [developers@tensorflow.org](mailto:developers@tensorflow.org) — Discussion for developers who are contributing to TensorFlow.
 
+For more information on project-specific communication, visit the [Contribute to SIGs](https://tensorflow.org/community/contribute/sigs) page.
+
+### Blog and social media
+
+The [TensorFlow Blog](http://blog.tensorflow.org/) is full of great content both from our team at Google and the broader community. We'd love to hear what you have to say, so if you would like to submit an article for review, please contact us at tensorflow-blog@google.com. Note that we receive many great submissions, and setting expectations, we can only publish a few.
+
+On [Twitter](https://twitter.com/tensorflow) we share the latest and greatest from our community, and our [YouTube channel](https://www.youtube.com/tensorflow) has free educational content to help you create, understand and deploy models for a variety of applications.
+
+## TensorFlow Community Spotlight
+
+The TensorFlow Community Spotlight Program provides an opportunity to showcase your passion projects using TensorFlow. [Submit your project](https://services.google.com/fb/forms/tensorflowprojectrecognitionform/) for a chance to be featured and recognized on TensorFlow’s Twitter account.
+
+Follow the [#TFCommunitySpotlight](https://twitter.com/hashtag/TFCommunitySpotlight?src=hashtag_click) hashtag and find out more about past winners [here](https://blog.tensorflow.org/2020/11/tensorflow-community-spotlight-program-update.html).
+
+## User groups
+
+[TensorFlow User Groups](https://www.tensorflow.org/community/groups) (or TFUGs, for short) are local communities of developers and researchers around the world. If you don’t have a TFUG in your country or city, we encourage you to start one by reaching out to [tfug-help@tensorflow.org](mailto:tfug-help@tensorflow.org).
+
+## Events
+
+The TensorFlow team hosts and supports events all around the world! If your TFUG is planning an upcoming event or meetup, please let our Community know by posting about it on the TensorFlow Forum under the [Events category](https://discuss.tensorflow.org/c/events/27).
 
-#### Project-specific lists
-
-*   [docs@tensorflow.org](mailto:docs@tensorflow.org) — If you are interested in contributing to the TensorFlow documentation, join this mailing list.
-*   [hub@tensorflow.org](mailto:hub@tensorflow.org) — Discussion and collaboration around TensorFlow Hub.
-*   [magenta-discuss@tensorflow.org](mailto:magenta-discuss@tensorflow.org) — General discussion about Magenta development and direction.
-*   [swift@tensorflow.org](mailto:swift@tensorflow.org) — Community and collaboration around Swift for TensorFlow.
-*   [tensor2tensor@tensorflow.org](mailto:tensor2tensor@tensorflow.org) — Discussion and peer support for Tensor2Tensor.
-*   [tfjs-announce@tensorflow.org](mailto:tfjs-announce@tensorflow.org) — Announcements of new TensorFlow.js releases.
-*   [tfjs@tensorflow.org](mailto:tfjs@tensorflow.org) — Discussion and peer support for TensorFlow.js.
-*   [tflite@tensorflow.org](mailto:tflite@tensorflow.org) — Discussion and peer support for TensorFlow Lite.
-*   [tfprobability@tensorflow.org](mailto:tfprobability@tensorflow.org) — Discussion and peer support for TensorFlow Probability.
-*   [tpu-users@tensorflow.org](mailto:tpu-users@tensorflow.org) — Community discussion and support for TPU users.
-
-
-### Blog
-
-We post regularly to the [TensorFlow Blog](http://blog.tensorflow.org/), with content sourced from both TensorFlow developers and the broader community. If you would like to submit an article for review, please contact the TensorFlow Developer Relations team.
-
-### Social media
-
-For news and updates from around the universe of TensorFlow projects, follow [@tensorflow](https://twitter.com/tensorflow) on Twitter. To watch TensorFlow-related content, check out our [YouTube](http://youtube.com/tensorflow/) channel.
-
-### User groups
-
-TensorFlow has many communities all over the world! For a complete listing, please refer to the [Community](https://www.tensorflow.org/community/groups) section on the TensorFlow website.
-
-### Special Interest Groups (SIGs)
-
-To enable focused collaboration on particular areas of TensorFlow, we host Special Interest Groups (SIGs). SIGs do their work in public. If you want to join and contribute, review the work of the group, and get in touch with the relevant SIG leader. Membership policies vary on a per-SIG basis.
-
-
-#### Current SIGs
-
-As of January 2019, the current TF-SIGs are:
-
-<table>
-  <tr>
-   <td><a href="https://groups.google.com/a/tensorflow.org/d/forum/addons">SIG Addons</a>
-   </td>
-   <td>Maintains a repository of contributions that conform to well-established API patterns, but implement new functionality not available in core TensorFlow.
-   </td>
-  </tr>
-  <tr>
-   <td><a href="https://groups.google.com/a/tensorflow.org/d/forum/build">SIG Build</a>
-   </td>
-   <td>Focuses on issues surrounding building, packaging, and distribution of TensorFlow.
-   </td>
-  </tr>
-  <tr>
-   <td><a href="https://groups.google.com/a/tensorflow.org/d/forum/io">SIG IO</a>
-   </td>
-   <td>Works on support for file systems and formats other than those in core TensorFlow (such as Apache Ignite FS, or Apache Hadoop SequenceFile), as subclasses of tf.data.Dataset and TensorFlow filesystems.
-   </td>
-  </tr>
-  <tr>
-   <td><a href="https://groups.google.com/a/tensorflow.org/d/forum/networking">SIG Networking</a>
-   </td>
-   <td>Maintains network fabrics and protocols not available in core TensorFlow.
-   </td>
-  </tr>
-  <tr>
-   <td><a href="https://groups.google.com/a/tensorflow.org/d/forum/tensorboard">SIG TensorBoard</a>
-   </td>
-   <td>Furthers the development and direction of TensorBoard and its plug-ins.
-   </td>
-  </tr>
-  <tr>
-   <td><a href="https://groups.google.com/a/tensorflow.org/forum/#!forum/rust">SIG Rust</a>
-   </td>
-   <td>Collaborates on the development of TensorFlow's Rust bindings.
-   </td>
-  </tr>
-</table>
-
-If you believe there is a strong need for a new SIG,
-please read the [SIG playbook]() and get in touch with
-the TensorFlow Developer Relations Team.
+If you’ve already held your event, please share feedback with us [here](https://docs.google.com/forms/d/e/1FAIpQLSdvb8c2ZFXxS05aX6dpUVZlfYA0WsFFq-sUAzjiohVKAQ1RLw/viewform)! Feel free to share recaps and recordings on the Forum as well.
diff --git a/site/en/community/contribute/docs.md b/site/en/community/contribute/docs.md
index e055b3601b8..34b1619ca5d 100644
--- a/site/en/community/contribute/docs.md
+++ b/site/en/community/contribute/docs.md
@@ -24,12 +24,15 @@ To participate in the TensorFlow docs community:
 
 * Watch the [tensorflow/docs](https://github.com/tensorflow/docs) GitHub
   repository.
-* Subscribe to [docs@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs).
+* Follow the [docs](https://discuss.tensorflow.org/tag/docs) tag on the
+  [TensorFlow Forum](https://discuss.tensorflow.org/).
+
 
 ## API reference
 
-To update reference documentation, find the
-[source file](https://www.tensorflow.org/code/tensorflow/python/)
+For details, use the [TensorFlow API docs contributor guide](docs_ref.md). This
+shows you how to find the
+[source file](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/)
 and edit the symbol's
 <a href="https://www.python.org/dev/peps/pep-0257/" class="external">docstring</a>.
 Many API reference pages on tensorflow.org include a link to the source file
@@ -38,9 +41,6 @@ where the symbol is defined. Docstrings support
 and can be (approximately) previewed using any
 <a href="http://tmpvar.com/markdown.html" class="external">Markdown previewer</a>.
 
-For reference documentation quality and how to get involved with doc sprints and
-the community, see the
-[TensorFlow 2 API Docs advice](https://docs.google.com/document/d/1e20k9CuaZ_-hp25-sSd8E8qldxKPKQR-SkwojYr_r-U/preview).
 
 ### Versions and branches
 
@@ -53,9 +53,9 @@ main
 <a href="https://github.com/tensorflow/tensorflow" class="external">tensorflow/tensorflow</a>
 repo. The reference documentation is generated from code comments
 and docstrings in the source code for
-<a href="https://www.tensorflow.org/code/tensorflow/python/" class="external">Python</a>,
-<a href="https://www.tensorflow.org/code/tensorflow/cc/" class="external">C++</a>, and
-<a href="https://www.tensorflow.org/code/tensorflow/java/" class="external">Java</a>.
+<a href="https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/" class="external">Python</a>,
+<a href="https://github.com/tensorflow/tensorflow/blob/master/tensorflow/cc/" class="external">C++</a>, and
+<a href="https://github.com/tensorflow/tensorflow/blob/master/tensorflow/java/" class="external">Java</a>.
 
 Previous versions of the TensorFlow documentation are available as
 [rX.x branches](https://github.com/tensorflow/docs/branches) in the TensorFlow
@@ -167,21 +167,21 @@ when you submit your pull request.
 Add a remote:
 
 <pre class="prettyprint lang-bsh">
-<code class="devsite-terminal">git remote add <var>upstream</var> git@github.com:tensorflow/docs.git</code>
+<code class="devsite-terminal">git remote add upstream git@github.com:tensorflow/docs.git</code>
 
 # View remote repos
 <code class="devsite-terminal">git remote -v</code>
 origin    git@github.com:<var>username</var>/docs.git (fetch)
 origin    git@github.com:<var>username</var>/docs.git (push)
-<var>upstream</var>  git@github.com:tensorflow/docs.git (fetch)
-<var>upstream</var>  git@github.com:tensorflow/docs.git (push)
+upstream  git@github.com:tensorflow/docs.git (fetch)
+upstream  git@github.com:tensorflow/docs.git (push)
 </pre>
 
 To update:
 
 <pre class="prettyprint lang-bsh">
 <code class="devsite-terminal">git checkout master</code>
-<code class="devsite-terminal">git pull <var>upstream</var> master</code>
+<code class="devsite-terminal">git pull upstream master</code>
 
 <code class="devsite-terminal">git push</code>  # Push changes to your GitHub account (defaults to origin)
 </pre>
diff --git a/site/en/community/contribute/docs_ref.md b/site/en/community/contribute/docs_ref.md
index bf4bf8ef2f3..41fce4dde40 100644
--- a/site/en/community/contribute/docs_ref.md
+++ b/site/en/community/contribute/docs_ref.md
@@ -8,7 +8,7 @@ TensorFlow uses [DocTest](https://docs.python.org/3/library/doctest.html) to
 test code snippets in Python docstrings. The snippet must be executable Python
 code. To enable testing, prepend the line with `>>>` (three left-angle
 brackets). For example, here's a excerpt from the `tf.concat` function in the
-[array_ops.py](https://www.tensorflow.org/code/tensorflow/python/ops/array_ops.py)
+[array_ops.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/array_ops.py)
 source file:
 
 ```
@@ -45,6 +45,11 @@ def concat(values, axis, name="concat"):
 
 Note: TensorFlow DocTest uses TensorFlow 2 and Python 3.
 
+To assess reference documentation quality, see the example section of the
+[TensorFlow 2 API Docs advice](https://docs.google.com/document/d/1e20k9CuaZ_-hp25-sSd8E8qldxKPKQR-SkwojYr_r-U/preview).
+(Be aware that the Task Tracker on this sheet is no longer in use.)
+
+
 ### Make the code testable with DocTest
 
 Currently, many docstrings use backticks (```) to identify code. To make the
@@ -95,7 +100,7 @@ TensorFlow uses a few customizations to the builtin doctest logic:
 
     ```
     def NewLayer():
-      “””This layer does cool stuff.
+      """This layer does cool stuff.
 
       Example usage:
 
@@ -103,7 +108,7 @@ TensorFlow uses a few customizations to the builtin doctest logic:
       >>> new_layer = NewLayer(x)
       >>> new_layer
       <tf.Tensor: shape=(1, 14, 14, 3), dtype=int32, numpy=...>
-      “””
+      """
     ```
 
 *   *Floating point values*: The TensorFlow doctest extracts float values from
@@ -173,7 +178,7 @@ There are two ways to test the code in the docstring locally:
 
 *   If you are only changing the docstring of a class/function/method, then you
     can test it by passing that file's path to
-    [tf_doctest.py](https://www.tensorflow.org/code/tensorflow/tools/docs/tf_doctest.py).
+    [tf_doctest.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/docs/tf_doctest.py).
     For example:
 
     <pre class="prettyprint lang-bsh">
diff --git a/site/en/community/contribute/docs_style.md b/site/en/community/contribute/docs_style.md
index 9c730c7f100..10f18e52699 100644
--- a/site/en/community/contribute/docs_style.md
+++ b/site/en/community/contribute/docs_style.md
@@ -48,50 +48,94 @@ language after the first backtick group, for example:
 &#96;&#96;&#96;
 </code></pre>
 
-### Links in Markdown
+### Links in Markdown and notebooks
 
-#### Links between files in this repository
+#### Links between files in a repository
 
-Use relative links between files in a repository. This works on
-[tensorflow.org](https://www.tensorflow.org) and
-[GitHub](https://github.com/tensorflow/docs/tree/master/site/en):<br/>
-<code>\[Custom layers\]\(../tutorials/eager/custom_layers.ipynb\)</code> produces
-[Custom layers](https://www.tensorflow.org/tutorials/eager/custom_layers) on the
-site.
+Use relative links between files in a single GitHub repository. Include the file
+extension.
 
-#### Links to API documentation
-
-API links are converted when the site is published. To link to a symbol's API
-reference page, enclose the full symbol path in backticks:
+For example, **this file you're reading** is from the
+[https://github.com/tensorflow/docs](https://github.com/tensorflow/docs)
+repository. Therefore, it can use relative paths to link to other files in the same
+repository like this:
 
-*   <code>&#96;tf.data.Dataset&#96;</code> produces
-    [`tf.data.Dataset`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset)
+* <code>\[Basics\]\(../../guide/basics.ipynb\)</code> produces
+[Basics](../../guide/basics.ipynb).
 
-For the C++ API, use the namespace path:
+This is the preferred approach because this way the links on
+[tensorflow.org](https://www.tensorflow.org),
+[GitHub](https://github.com/tensorflow/docs) and
+[Colab](https://github.com/tensorflow/docs/tree/master/site/en/guide/bazics.ipynb)
+all work. Also, the reader stays in the same site when they click a link.
 
-*   `tensorflow::Tensor` produces
-    [tensorflow::Tensor](https://www.tensorflow.org/api_docs/cc/class/tensorflow/tensor)
+Note: You should include the file extension—such as `.ipynb` or `.md`—for
+relative links. It will rendered on `tensorflow.org` without an extension.
 
 #### External links
 
-For external links, including files on <var>https://www.tensorflow.org</var>
-that are not in the `tensorflow/docs` repository, use standard Markdown links
-with the full URI.
+For links to files that are not in the current repository, use standard Markdown
+links with the full URI. Prefer to link to the
+[tensorflow.org](https://www.tensorflow.org) URI if it's available.
 
 To link to source code, use a link starting with
 <var>https://www.github.com/tensorflow/tensorflow/blob/master/</var>, followed
 by the file name starting at the GitHub root.
 
-This URI naming scheme ensures that <var>https://www.tensorflow.org</var> can
-forward the link to the branch of the code corresponding to the version of the
-documentation you're viewing.
+When linking off of [tensorflow.org](https://www.tensorflow.org), include a
+`` on the Markdown link so that the "external link" symbol is shown.
+
+* `[GitHub](https://github.com/tensorflow/docs)` produces
+  [GitHub](https://github.com/tensorflow/docs)
+
+Do not include URI query parameters in the link:
+
+* Use: `https://www.tensorflow.org/guide/data`
+* Not: `https://www.tensorflow.org/guide/data?hl=en`
+
+
+#### Images
+
+The advice in the previous section is for links to pages. Images are handled
+differently.
+
+Generally, you should not check in images, and instead add the
+[TensorFlow-Docs team](https://github.com/tensorflow/docs) to your PR, and ask
+them to host the images on [tensorflow.org](https://www.tensorflow.org).
+This helps keep the size of your repository down.
+
+If you do submit images to your repository, note that some systems do not handle
+relative paths to images. Prefer to use a full URL pointing to the image's
+eventual location on [tensorflow.org](https://www.tensorflow.org).
+
+#### Links to API documentation
+
+API links are converted when the site is published. To link to a symbol's API
+reference page, enclose the symbol path in backticks:
+
+*   <code>&#96;tf.data.Dataset&#96;</code> produces
+    [`tf.data.Dataset`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset)
+
+Full paths are slightly preferred except for long paths. Paths
+can be abbreviated by dropping the leading path components. Partial paths will
+be converted to links if:
+
+*   There is at least one `.` in the path, and
+*   The partial path is unique within the project.
+
+API paths are linked **for every project** with a Python API published on
+[tensorflow.org](https://www.tensorflow.org). You can easily link to multiple
+subprojects from a single file by wrapping the API names with backticks.
+For example:
 
-Do not include URI query parameters in the link.
+*   <code>&#96;tf.metrics&#96;</code>, <code>&#96;tf_agents.metrics&#96;</code>,
+    <code>&#96;text.metrics&#96;</code> produces: `tf.metrics`,
+    `tf_agents.metrics`, `text.metrics`.
 
-File paths use underscores for spaces, for example, `custom_layers.ipynb`.
+For symbols with multiple path aliases there is a slight preference for the
+path that matches the API-page on [tensorflow.org](https://www.tensorflow.org).
+All aliases will redirect to the correct page.
 
-Include the file extension in links to use on the site *and* GitHub, for example,<br/>
-<code>\[Custom layers\]\(../tutorials/eager/custom_layers.ipynb\)</code>.
 
 ### Math in Markdown
 
diff --git a/site/en/community/contribute/sigs.md b/site/en/community/contribute/sigs.md
new file mode 100644
index 00000000000..b736ec5919a
--- /dev/null
+++ b/site/en/community/contribute/sigs.md
@@ -0,0 +1,97 @@
+# Contribute to TensorFlow Special Interest Groups (SIGs)
+
+The TensorFlow Special Interest Groups (TF SIGs) organize community contributions to key parts of the TensorFlow ecosystem. SIG leads and members work together to build and support important TensorFlow use cases.
+
+SIGs are led by members of the open source community, including industry collaborators and [Machine Learning Google Developer Experts](https://developers.google.com/community/experts) (ML GDEs). TensorFlow's success is due in large part to their hard work and contributions.
+
+We encourage you to join a SIG working on the area of TensorFlow's ecosystem you care most about. Not all SIGs will have the same level of energy, breadth of scope, or governance models — browse our [SIG charters](https://github.com/tensorflow/community/tree/master/sigs) to learn more. Stay connected with SIG leads and members on the [TensorFlow Forum](https://discuss.tensorflow.org/c/special-interest-groups/8), where you can subscribe to preferred [tags](https://discuss.tensorflow.org/tags) and learn more about the regular SIG meetings.
+
+## SIG Addons
+
+SIG Addons builds and maintains a repository of community contributions that conform to well-established API patterns, but implement new functionality not available in core TensorFlow. 
+
+TensorFlow natively supports a large number of operators, layers, metrics, losses, optimizers, and more. However, in a fast-moving field like ML, there are many new developments that cannot be integrated into core TensorFlow (because their broad applicability is not yet clear, or it is mostly used by a smaller subset of the community). SIG Addons enables users to introduce new extensions to the TensorFlow ecosystem in a sustainable manner.
+
+<a class="button button-primary" href="https://github.com/tensorflow/addons">SIG Addons on GitHub</a> <a class="button" href="https://github.com/tensorflow/addons/blob/master/CONTRIBUTING.md">Contributing</a> <a class="button" href="https://discuss.tensorflow.org/c/special-interest-groups/addons/11">Discuss on the Forum</a>
+
+## SIG Build
+
+SIG Build improves and extends the TensorFlow build process. SIG Build maintains a repository showcasing resources, guides, tools, and builds contributed by the community, for the community.
+
+<a class="button button-primary" href="https://github.com/tensorflow/build">SIG Build on GitHub</a> <a class="button" href="https://github.com/tensorflow/build/blob/master/CONTRIBUTING.md">Contributing</a> <a class="button" href="https://discuss.tensorflow.org/c/special-interest-groups/build">Discuss on the Forum</a>
+
+## SIG IO
+
+SIG IO maintains TensorFlow I/O, a collection of file systems and file formats that are not available in TensorFlow's built-in support.
+
+<a class="button button-primary" href="https://github.com/tensorflow/io">SIG IO on GitHub</a> <a class="button" href="https://github.com/tensorflow/io/blob/master/CONTRIBUTING.md">Contributing</a> <a class="button" href="https://discuss.tensorflow.org/c/special-interest-groups/io">Discuss on the Forum</a>
+
+## SIG JVM
+
+SIG JVM maintains the TF Java bindings to let users use JVM for building, training and running machine learning models.
+
+Java and other JVM languages, such as Scala or Kotlin, are frequently used in small-to-large enterprises all over the world, which makes TensorFlow a strategic choice for adopting machine learning at a large scale.
+
+<a class="button button-primary" href="https://github.com/tensorflow/java">SIG JVM on GitHub</a> <a class="button" href="https://github.com/tensorflow/java/blob/master/CONTRIBUTING.md">Contributing</a> <a class="button" href="https://discuss.tensorflow.org/c/special-interest-groups/jvm">Discuss on the Forum</a>
+
+## SIG Models
+
+SIG Models focuses on enabling contributions to the state-of-the-art model implementation in TensorFlow 2, and sharing best practices of using TensorFlow 2 for state-of-the-art research. Subgroups orient around different machine learning applications (Vision, NLP, etc.).
+
+SIG Models host discussions and collaborations around the [TensorFlow Model Garden](https://github.com/tensorflow/models) and [TensorFlow Hub](https://tfhub.dev). Learn how to contribute on GitHub below, or discuss [Research & Models](https://discuss.tensorflow.org/c/research-models/26) on the Forum.
+
+<a class="button button-primary" href="https://github.com/tensorflow/models">TensorFlow Model Garden on GitHub</a> <a class="button" href="https://github.com/tensorflow/models/blob/master/CONTRIBUTING.md">Contributing</a> 
+
+<a class="button button-primary" href="https://github.com/tensorflow/hub">TensorFlow Hub on GitHub</a> <a class="button" href="https://github.com/tensorflow/hub/blob/master/CONTRIBUTING.md">Contributing</a> 
+
+## SIG Micro
+
+SIG Micro discusses and shares updates on [TensorFlow Lite for Microcontrollers](https://www.tensorflow.org/lite/microcontrollers), a port of TensorFlow Lite designed to run machine learning models on DSPs, microcontrollers and other devices with limited memory.
+
+<a class="button button-primary" href="https://github.com/tensorflow/tflite-micro">TensorFlow Lite Micro on GitHub</a> <a class="button" href="https://github.com/tensorflow/tflite-micro/blob/main/CONTRIBUTING.md">Contributing</a> <a class="button" href="https://discuss.tensorflow.org/c/special-interest-groups/micro">Discuss on the Forum</a>
+
+## SIG MLIR
+
+SIG MLIR maintains [MLIR](https://mlir.llvm.org/) dialects and utilities for TensorFlow, XLA and TF Lite, providing high performance compilers and optimization techniques that can be applied to TensorFlow graphs and code generation. Their overarching goal is to create common intermediate representation (IR) that reduces the cost to bring up new hardware, and improve usability for existing TensorFlow users.
+
+<a class="button button-primary" href="https://github.com/tensorflow/tensorflow/tree/master/tensorflow/compiler/mlir">SIG MLIR on GitHub</a> <a class="button" href="https://mlir.llvm.org/">Contributing</a> <a class="button" href="https://discuss.tensorflow.org/c/special-interest-groups/mlir">Discuss on the Forum</a>
+
+## SIG Networking
+
+SIG Networking maintains the TensorFlow Networking repository for platform-specific networking extensions to core TensorFlow and related utilities.
+
+<a class="button button-primary" href="https://github.com/tensorflow/networking">SIG Networking on GitHub</a> <a class="button" href="https://discuss.tensorflow.org/c/special-interest-groups/networking">Discuss on the Forum</a>
+
+## SIG Recommenders
+
+SIG Recommenders maintains a collection of projects related to large-scale recommendation systems built upon TensorFlow contributed and maintained by the community. Those contributions are complementary to [TensorFlow Core](https://www.tensorflow.org/overview) and [TensorFlow Recommenders](https://www.tensorflow.org/recommenders).
+
+<a class="button button-primary" href="https://github.com/tensorflow/recommenders-addons">SIG Recommenders on GitHub</a> <a class="button" href="https://github.com/tensorflow/recommenders-addons/blob/master/CONTRIBUTING.md/">Contributing</a> <a class="button" href="https://discuss.tensorflow.org/c/special-interest-groups/recommenders">Discuss on the Forum</a>
+
+## SIG Rust
+
+SIG Rust maintains idiomatic Rust language bindings for TensorFlow.
+
+<a class="button button-primary" href="https://github.com/tensorflow/rust/blob/master/CONTRIBUTING.md">SIG Rust on GitHub</a> <a class="button" href="https://github.com/tensorflow/rust/blob/master/CONTRIBUTING.md">Contributing</a> <a class="button" href="https://discuss.tensorflow.org/c/special-interest-groups/rust">Discuss on the Forum</a>
+
+## SIG TensorBoard
+
+SIG TensorBoard facilitates discussion around [TensorBoard](https://www.tensorflow.org/tensorboard)—a suite of tools for inspecting, debugging and optimizing TensorFlow programs.
+
+<a class="button button-primary" href="https://github.com/tensorflow/tensorboard">TensorBoard on GitHub</a> <a class="button" href="https://github.com/tensorflow/tensorboard/blob/master/CONTRIBUTING.md">Contributing</a> <a class="button" href="https://discuss.tensorflow.org/c/special-interest-groups/tensorboard/">Discuss on the Forum</a>
+
+## SIG TF.js
+
+SIG TF.js facilitates community-contributed components to [TensorFlow.js](https://www.tensorflow.org/js) and offers project support through the SIG.
+
+<a class="button button-primary" href="https://github.com/tensorflow/tfjs">TensorFlow.js on GitHub</a> <a class="button" href="https://github.com/tensorflow/tfjs/blob/master/CONTRIBUTING.md">Contributing</a> <a class="button" href="https://discuss.tensorflow.org/c/special-interest-groups/tfjs/">Discuss on the Forum</a>
+
+## SIG TFX-Addons
+
+SIG TFX-Addons accelerates the sharing of customizations and additions to meet the needs of production ML, expand the vision, and help drive new directions for [TensorFlow Extended (TFX)](https://www.tensorflow.org/tfx) and the ML community.
+
+<a class="button button-primary" href="https://github.com/tensorflow/tfx-addons">SIG TFX-Addons on GitHub</a> <a class="button" href="https://github.com/tensorflow/tfx-addons/blob/main/CONTRIBUTING.md">Contributing</a> <a class="button" href="https://discuss.tensorflow.org/c/special-interest-groups/tfx-addons/">Discuss on the Forum</a>
+
+## New SIGs
+
+Didn't find what you were looking for? If you believe there is a strong need for a new TensorFlow SIG, please read the [SIG playbook](https://www.tensorflow.org/community/sig_playbook) and follow instructions on how to propose it to our contributor community.
diff --git a/site/en/community/mailing-lists.md b/site/en/community/mailing-lists.md
index a33f758cdbf..35bfb218ba1 100644
--- a/site/en/community/mailing-lists.md
+++ b/site/en/community/mailing-lists.md
@@ -2,11 +2,12 @@
 
 As a community, we do much of our collaboration on public mailing lists. Please
 note that if you're looking for help using TensorFlow,
-[Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow) and
+[TensorFlow Forum](https://discuss.tensorflow.org/),
+[Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow), and
 [GitHub issues](https://github.com/tensorflow/tensorflow/issues) are the best
-initial places to look.
+initial places to look. To receive a roundup of updates from the TensorFlow team each quarter, subscribe to the [TensorFlow newsletter](https://services.google.com/fb/forms/tensorflow/).
 
-## General TensorFlow lists
+## General TensorFlow lists and forums
 
 *   [announce](https://groups.google.com/a/tensorflow.org/d/forum/announce) -
     Low-volume announcements of new releases.
@@ -14,7 +15,7 @@ initial places to look.
     General community discussion around TensorFlow.
 *   [developers](https://groups.google.com/a/tensorflow.org/d/forum/developers) -
     Discussion for developers contributing to TensorFlow.
-*   [documentation](https://groups.google.com/a/tensorflow.org/d/forum/docs) -
+*   [documentation](https://discuss.tensorflow.org/tag/docs) -
     Discussion for contributing to TensorFlow documentation. See
     [community translations](https://www.tensorflow.org/community/contribute/docs#community_translations)
     for language-specific docs lists.
diff --git a/site/en/community/sig_playbook.md b/site/en/community/sig_playbook.md
index 75e277d3d96..6ec7a554a5b 100644
--- a/site/en/community/sig_playbook.md
+++ b/site/en/community/sig_playbook.md
@@ -55,7 +55,7 @@ must demonstrate:
     application area)
 *   Two or more contributors willing to act as group leads, existence of other
     contributors, and evidence of demand for the group
-*   Resources it will initially require (usually, mailing list and regular VC
+*   Resources it will initially require (usually, mailing list and regular video conference 
     call.) 
 
 Approval for the group will be given by a decision of the TF Community Team,
diff --git a/site/en/datasets/README.md b/site/en/datasets/README.md
deleted file mode 100644
index 422d0b5c9a3..00000000000
--- a/site/en/datasets/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-Welcome to the warp zone!
-
-# TensorFlow Datasets
-
-These docs are available here: https://github.com/tensorflow/datasets/tree/master/docs
diff --git a/site/en/federated/README.md b/site/en/federated/README.md
deleted file mode 100644
index 518fde837cb..00000000000
--- a/site/en/federated/README.md
+++ /dev/null
@@ -1,6 +0,0 @@
-Welcome to the warp zone!
-
-# TensorFlow Federated
-
-These docs are available here:
-https://github.com/tensorflow/federated/tree/main/docs
diff --git a/site/en/graphics/README.md b/site/en/graphics/README.md
deleted file mode 100644
index 9182fae0c12..00000000000
--- a/site/en/graphics/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-Welcome to the warp zone!
-
-# TensorFlow Graphics
-
-These docs are available here: https://github.com/tensorflow/graphics/tree/master/tensorflow_graphics/g3doc
diff --git a/site/en/guide/_index.yaml b/site/en/guide/_index.yaml
index f9d873dbf2a..e39dd37ead5 100644
--- a/site/en/guide/_index.yaml
+++ b/site/en/guide/_index.yaml
@@ -95,10 +95,11 @@ landing_page:
     items:
     - list:
       - description: >
-          <a href="/tensorboard"><h3 class="tfo-landing-page-heading no-link">TensorBoard</h3></a>
-          A suite of visualization tools to understand, debug, and optimize
-          TensorFlow programs.
-        path: /tensorboard
+          <a href="/decision_forests"><h3 class="tfo-landing-page-heading no-link">TensorFlow Decision Forests
+          </h3></a>
+          A library to train, run and interpret decision forest models (e.g., Random Forests,
+          Gradient Boosted Trees) in TensorFlow.
+        path: /decision_forests
         icon:
           icon_name: chevron_right
           foreground: theme
@@ -113,10 +114,10 @@ landing_page:
           foreground: theme
           background: grey
       - description: >
-          <a href="/model_optimization"><h3 class="tfo-landing-page-heading no-link">Model Optimization</h3></a>
-          The TensorFlow Model Optimization Toolkit is a suite of tools for
-          optimizing ML models for deployment and execution.
-        path: /model_optimization
+          <a href="/tfx/guide/serving"><h3 class="tfo-landing-page-heading no-link">Serving</h3></a>
+          A TFX serving system for ML models, designed for high-performance in
+          production environments.
+        path: /tfx/guide/serving
         icon:
           icon_name: chevron_right
           foreground: theme
@@ -147,7 +148,24 @@ landing_page:
           icon_name: chevron_right
           foreground: theme
           background: grey
+      - description: >
+          <a href="https://github.com/tensorflow/addons"><h3 class="tfo-landing-page-heading no-link">SIG Addons</h3></a>
+          Extra functionality for TensorFlow, maintained by SIG&nbsp;Addons.
+        path: https://github.com/tensorflow/addons
+        icon:
+          icon_name: chevron_right
+          foreground: theme
+          background: grey
     - list:
+      - description: >
+          <a href="/tensorboard"><h3 class="tfo-landing-page-heading no-link">TensorBoard</h3></a>
+          A suite of visualization tools to understand, debug, and optimize
+          TensorFlow programs.
+        path: /tensorboard
+        icon:
+          icon_name: chevron_right
+          foreground: theme
+          background: grey
       - description: >
           <a href="/datasets"><h3 class="tfo-landing-page-heading no-link">Datasets</h3></a>
           A collection of datasets ready to use with TensorFlow.
@@ -157,10 +175,10 @@ landing_page:
           foreground: theme
           background: grey
       - description: >
-          <a href="/tfx/guide/serving"><h3 class="tfo-landing-page-heading no-link">Serving</h3></a>
-          A TFX serving system for ML models, designed for high-performance in
-          production environments.
-        path: /tfx/guide/serving
+          <a href="/model_optimization"><h3 class="tfo-landing-page-heading no-link">Model Optimization</h3></a>
+          The TensorFlow Model Optimization Toolkit is a suite of tools for
+          optimizing ML models for deployment and execution.
+        path: /model_optimization
         icon:
           icon_name: chevron_right
           foreground: theme
@@ -192,14 +210,6 @@ landing_page:
           icon_name: chevron_right
           foreground: theme
           background: grey
-      - description: >
-          <a href="https://github.com/tensorflow/addons"><h3 class="tfo-landing-page-heading no-link">SIG Addons</h3></a>
-          Extra functionality for TensorFlow, maintained by SIG&nbsp;Addons.
-        path: https://github.com/tensorflow/addons
-        icon:
-          icon_name: chevron_right
-          foreground: theme
-          background: grey
       - description: >
           <a href="https://github.com/tensorflow/io"><h3 class="tfo-landing-page-heading no-link">SIG IO</h3></a>
           Dataset, streaming, and file system extensions, maintained by SIG&nbsp;IO.
diff --git a/site/en/guide/_toc.yaml b/site/en/guide/_toc.yaml
index 62410bfcde6..92e5d6a80c3 100644
--- a/site/en/guide/_toc.yaml
+++ b/site/en/guide/_toc.yaml
@@ -3,41 +3,73 @@ toc:
   path: /guide/
 
 - heading: "TensorFlow basics"
-- title: "Eager execution"
-  path: /guide/eager
-- title: "Tensor"
+- title: "Overview"
+  path: /guide/basics
+- title: "Tensors"
   path: /guide/tensor
-- title: "Variable"
+- title: "Variables"
   path: /guide/variable
 - title: "Automatic differentiation"
   path: /guide/autodiff
-- title: "Intro to graphs and functions"
+- title: "Graphs and functions"
   path: /guide/intro_to_graphs
-- title: "Intro to modules, layers, and models"
+- title: "Modules, layers, and models"
   path: /guide/intro_to_modules
 - title: "Training loops"
   path: /guide/basic_training_loops
+
+- heading: "Keras"
+- title: "Overview"
+  path: /guide/keras
+- include: /guide/keras/_toc.yaml
+
+- heading: "Build with Core"
+  status: new
+- title: "Overview"
+  path: /guide/core/index
+- title: "Quickstart for Core"
+  path: /guide/core/quickstart_core
+- title: "Logistic regression"
+  path: /guide/core/logistic_regression_core
+- title: "Multilayer perceptrons"
+  path: /guide/core/mlp_core
+- title: "Matrix approximation"
+  path: /guide/core/matrix_core
+- title: "Custom optimizers"
+  path: /guide/core/optimizers_core
+- title: "DTensor with Core APIs"
+  path: /guide/core/distribution
+  status: experimental
+
+- heading: "TensorFlow in depth"
+- title: "Tensor slicing"
+  path: /guide/tensor_slicing
 - title: "Advanced autodiff"
   path: /guide/advanced_autodiff
 - title: "Ragged tensor"
   path: /guide/ragged_tensor
 - title: "Sparse tensor"
   path: /guide/sparse_tensor
+- title: "Random number generation"
+  path: /guide/random_numbers
 - title: "NumPy API"
+  status: experimental
   path: /guide/tf_numpy
-- title: "Tensor slicing"
-  path: /guide/tensor_slicing
+- title: "NumPy API Type Promotion"
+  status: nightly
+  path: /guide/tf_numpy_type_promotion
+- title: "DTensor concepts"
+  path: /guide/dtensor_overview
+  status: experimental
 - title: "Thinking in TensorFlow 2"
   path: /guide/effective_tf2
 
-- heading: "Keras"
-- include: /guide/keras/_toc.yaml
-
 - heading: "Customization"
 - title: "Create an op"
   path: /guide/create_op
-- title: "Random number generation"
-  path: /guide/random_numbers
+- title: "Extension types"
+  path: /guide/extension_type
+  status: experimental
 
 - heading: "Data input pipelines"
 - title: "tf.data"
@@ -47,11 +79,14 @@ toc:
 - title: "Analyze pipeline performance"
   path: /guide/data_performance_analysis
 
-- heading: "Save a model"
+- heading: "Import and export"
 - title: "Checkpoint"
   path: /guide/checkpoint
 - title: "SavedModel"
   path: /guide/saved_model
+- title: "Import a JAX model using JAX2TF"
+  status: new
+  path: /guide/jax2tf
 
 - heading: "Accelerators"
 - title: "Distributed training"
@@ -73,7 +108,19 @@ toc:
 - title: "Mixed precision"
   path: /guide/mixed_precision
 
+- heading: "Model Garden"
+  status: new
+- title: "Overview"
+  path: /tfmodels
+- title: "Training with Orbit"
+  path: /tfmodels/orbit
+- title: "TFModels - NLP"
+  path: /tfmodels/nlp
+  status: external
+- include: /tfmodels/vision/_toc.yaml
+
 - heading: "Estimators"
+  status: deprecated
 - title: "Estimator overview"
   path: /guide/estimator
 
diff --git a/site/en/guide/advanced_autodiff.ipynb b/site/en/guide/advanced_autodiff.ipynb
index 7da53d8ee48..e04b9db4d77 100644
--- a/site/en/guide/advanced_autodiff.ipynb
+++ b/site/en/guide/advanced_autodiff.ipynb
@@ -1101,7 +1101,7 @@
         "id": "M_x7ih5sarvG"
       },
       "source": [
-        "In this case, `batch_jacobian` still runs and returns _something_ with the expected shape, but it's contents have an unclear meaning:"
+        "In this case, `batch_jacobian` still runs and returns _something_ with the expected shape, but its contents have an unclear meaning:"
       ]
     },
     {
diff --git a/site/en/guide/autodiff.ipynb b/site/en/guide/autodiff.ipynb
index c9c57cd4e69..237a224569b 100644
--- a/site/en/guide/autodiff.ipynb
+++ b/site/en/guide/autodiff.ipynb
@@ -75,7 +75,7 @@
         "[backpropagation](https://en.wikipedia.org/wiki/Backpropagation) for training\n",
         "neural networks.\n",
         "\n",
-        "In this guide, you will explore ways to compute gradients with TensorFlow, especially in [eager execution](eager.ipynb)."
+        "In this guide, you will explore ways to compute gradients with TensorFlow, especially in eager execution."
       ]
     },
     {
@@ -746,9 +746,9 @@
         "id": "egypBxISAHhx"
       },
       "source": [
-        "## Getting a gradient of `None`\n",
+        "## Cases where `gradient` returns `None`\n",
         "\n",
-        "When a target is not connected to a source you will get a gradient of `None`.\n"
+        "When a target is not connected to a source, `gradient` will return `None`.\n"
       ]
     },
     {
diff --git a/site/en/guide/basic_training_loops.ipynb b/site/en/guide/basic_training_loops.ipynb
index f2c2a61afa6..a1558b1903e 100644
--- a/site/en/guide/basic_training_loops.ipynb
+++ b/site/en/guide/basic_training_loops.ipynb
@@ -70,7 +70,7 @@
       "source": [
         "In the previous guides, you have learned about [tensors](./tensor.ipynb), [variables](./variable.ipynb), [gradient tape](autodiff.ipynb), and [modules](./intro_to_modules.ipynb).  In this guide, you will fit these all together to train models.\n",
         "\n",
-        "TensorFlow also includes the [tf.Keras API](keras/overview.ipynb), a high-level neural network API that provides useful abstractions to reduce boilerplate.  However, in this guide, you will use basic classes."
+        "TensorFlow also includes the [tf.Keras API](https://www.tensorflow.org/guide/keras/overview), a high-level neural network API that provides useful abstractions to reduce boilerplate.  However, in this guide, you will use basic classes."
       ]
     },
     {
@@ -90,7 +90,11 @@
       },
       "outputs": [],
       "source": [
-        "import tensorflow as tf"
+        "import tensorflow as tf\n",
+        "\n",
+        "import matplotlib.pyplot as plt\n",
+        "\n",
+        "colors = plt.rcParams['axes.prop_cycle'].by_key()['color']"
       ]
     },
     {
@@ -142,16 +146,20 @@
         "TRUE_W = 3.0\n",
         "TRUE_B = 2.0\n",
         "\n",
-        "NUM_EXAMPLES = 1000\n",
+        "NUM_EXAMPLES = 201\n",
         "\n",
         "# A vector of random x values\n",
-        "x = tf.random.normal(shape=[NUM_EXAMPLES])\n",
+        "x = tf.linspace(-2,2, NUM_EXAMPLES)\n",
+        "x = tf.cast(x, tf.float32)\n",
+        "\n",
+        "def f(x):\n",
+        "  return x * TRUE_W + TRUE_B\n",
         "\n",
         "# Generate some noise\n",
         "noise = tf.random.normal(shape=[NUM_EXAMPLES])\n",
         "\n",
         "# Calculate y\n",
-        "y = x * TRUE_W + TRUE_B + noise"
+        "y = f(x) + noise"
       ]
     },
     {
@@ -163,9 +171,7 @@
       "outputs": [],
       "source": [
         "# Plot all the data\n",
-        "import matplotlib.pyplot as plt\n",
-        "\n",
-        "plt.scatter(x, y, c=\"b\")\n",
+        "plt.plot(x, y, '.')\n",
         "plt.show()"
       ]
     },
@@ -227,7 +233,7 @@
         "id": "rdpN_3ssG9D5"
       },
       "source": [
-        "The initial variables are set here in a fixed way, but Keras comes with any of a number of [initalizers](https://www.tensorflow.org/api_docs/python/tf/keras/initializers) you could use, with or without the rest of Keras."
+        "The initial variables are set here in a fixed way, but Keras comes with any of a number of [initializers](https://www.tensorflow.org/api_docs/python/tf/keras/initializers) you could use, with or without the rest of Keras."
       ]
     },
     {
@@ -271,8 +277,10 @@
       },
       "outputs": [],
       "source": [
-        "plt.scatter(x, y, c=\"b\")\n",
-        "plt.scatter(x, model(x), c=\"r\")\n",
+        "plt.plot(x, y, '.', label=\"Data\")\n",
+        "plt.plot(x, f(x), label=\"Ground truth\")\n",
+        "plt.plot(x, model(x), label=\"Predictions\")\n",
+        "plt.legend()\n",
         "plt.show()\n",
         "\n",
         "print(\"Current loss: %1.6f\" % loss(y, model(x)).numpy())"
@@ -341,10 +349,15 @@
         "model = MyModel()\n",
         "\n",
         "# Collect the history of W-values and b-values to plot later\n",
-        "Ws, bs = [], []\n",
+        "weights = []\n",
+        "biases = []\n",
         "epochs = range(10)\n",
         "\n",
         "# Define a training loop\n",
+        "def report(model, loss):\n",
+        "  return f\"W = {model.w.numpy():1.2f}, b = {model.b.numpy():1.2f}, loss={loss:2.5f}\"\n",
+        "\n",
+        "\n",
         "def training_loop(model, x, y):\n",
         "\n",
         "  for epoch in epochs:\n",
@@ -352,12 +365,21 @@
         "    train(model, x, y, learning_rate=0.1)\n",
         "\n",
         "    # Track this before I update\n",
-        "    Ws.append(model.w.numpy())\n",
-        "    bs.append(model.b.numpy())\n",
+        "    weights.append(model.w.numpy())\n",
+        "    biases.append(model.b.numpy())\n",
         "    current_loss = loss(y, model(x))\n",
         "\n",
-        "    print(\"Epoch %2d: W=%1.2f b=%1.2f, loss=%2.5f\" %\n",
-        "          (epoch, Ws[-1], bs[-1], current_loss))\n"
+        "    print(f\"Epoch {epoch:2d}:\")\n",
+        "    print(\"    \", report(model, current_loss))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8dKKLU4KkQEq"
+      },
+      "source": [
+        "Do the training"
       ]
     },
     {
@@ -368,21 +390,50 @@
       },
       "outputs": [],
       "source": [
-        "print(\"Starting: W=%1.2f b=%1.2f, loss=%2.5f\" %\n",
-        "      (model.w, model.b, loss(y, model(x))))\n",
+        "current_loss = loss(y, model(x))\n",
         "\n",
-        "# Do the training\n",
-        "training_loop(model, x, y)\n",
+        "print(f\"Starting:\")\n",
+        "print(\"    \", report(model, current_loss))\n",
         "\n",
-        "# Plot it\n",
-        "plt.plot(epochs, Ws, \"r\",\n",
-        "         epochs, bs, \"b\")\n",
+        "training_loop(model, x, y)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JPJgimg8kSA4"
+      },
+      "source": [
+        "Plot the evolution of the weights over time:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ND1fQw8sbTNr"
+      },
+      "outputs": [],
+      "source": [
+        "plt.plot(epochs, weights, label='Weights', color=colors[0])\n",
+        "plt.plot(epochs, [TRUE_W] * len(epochs), '--',\n",
+        "         label = \"True weight\", color=colors[0])\n",
         "\n",
-        "plt.plot([TRUE_W] * len(epochs), \"r--\",\n",
-        "         [TRUE_B] * len(epochs), \"b--\")\n",
+        "plt.plot(epochs, biases, label='bias', color=colors[1])\n",
+        "plt.plot(epochs, [TRUE_B] * len(epochs), \"--\",\n",
+        "         label=\"True bias\", color=colors[1])\n",
         "\n",
-        "plt.legend([\"W\", \"b\", \"True W\", \"True b\"])\n",
-        "plt.show()\n"
+        "plt.legend()\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "zhlwj1ojkcUP"
+      },
+      "source": [
+        "Visualize how the trained model performs"
       ]
     },
     {
@@ -393,9 +444,10 @@
       },
       "outputs": [],
       "source": [
-        "# Visualize how the trained model performs\n",
-        "plt.scatter(x, y, c=\"b\")\n",
-        "plt.scatter(x, model(x), c=\"r\")\n",
+        "plt.plot(x, y, '.', label=\"Data\")\n",
+        "plt.plot(x, f(x), label=\"Ground truth\")\n",
+        "plt.plot(x, model(x), label=\"Predictions\")\n",
+        "plt.legend()\n",
         "plt.show()\n",
         "\n",
         "print(\"Current loss: %1.6f\" % loss(model(x), y).numpy())"
@@ -523,7 +575,7 @@
         "\n",
         "This is, however, an extremely simple problem. For a more practical introduction, see [Custom training walkthrough](../tutorials/customization/custom_training_walkthrough.ipynb).\n",
         "\n",
-        "For more on using built-in Keras training loops, see [this guide](keras/train_and_evaluate.ipynb).  For more on training loops and Keras, see [this guide](keras/writing_a_training_loop_from_scratch.ipynb).  For writing custom distributed training loops, see [this guide](distributed_training.ipynb#using_tfdistributestrategy_with_basic_training_loops_loops)."
+        "For more on using built-in Keras training loops, see [this guide](https://www.tensorflow.org/guide/keras/train_and_evaluate).  For more on training loops and Keras, see [this guide](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch).  For writing custom distributed training loops, see [this guide](distributed_training.ipynb#using_tfdistributestrategy_with_basic_training_loops_loops)."
       ]
     }
   ],
@@ -531,8 +583,7 @@
     "colab": {
       "collapsed_sections": [
         "5rmpybwysXGV",
-        "iKD__8kFCKNt",
-        "vPnIVuaSJwWz"
+        "iKD__8kFCKNt"
       ],
       "name": "basic_training_loops.ipynb",
       "toc_visible": true
diff --git a/site/en/guide/basics.ipynb b/site/en/guide/basics.ipynb
new file mode 100644
index 00000000000..5457f162a0e
--- /dev/null
+++ b/site/en/guide/basics.ipynb
@@ -0,0 +1,968 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5rmpybwysXGV"
+      },
+      "source": [
+        "##### Copyright 2020 The TensorFlow Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "m8y3rGtQsYP2"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hrXv0rU9sIma"
+      },
+      "source": [
+        "# TensorFlow basics"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "7S0BwJ_8sLu7"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/guide/basics\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/guide/basics.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/guide/basics.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/guide/basics.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "iJyZUDbzBTIG"
+      },
+      "source": [
+        "This guide provides a quick overview of _TensorFlow basics_. Each section of this doc is an overview of a larger topic—you can find links to full guides at the end of each section.\n",
+        "\n",
+        "TensorFlow is an end-to-end platform for machine learning. It supports the following:\n",
+        "\n",
+        "* Multidimensional-array based numeric computation (similar to <a href=\"https://numpy.org/\" class=\"external\">NumPy</a>.)\n",
+        "* GPU and distributed processing\n",
+        "* Automatic differentiation\n",
+        "* Model construction, training, and export\n",
+        "* And more"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gvLegMMvBZYg"
+      },
+      "source": [
+        "## Tensors\n",
+        "\n",
+        "TensorFlow operates on multidimensional arrays or _tensors_ represented as `tf.Tensor` objects. Here is a two-dimensional tensor:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "6ZqX5RnbBS1f"
+      },
+      "outputs": [],
+      "source": [
+        "import tensorflow as tf\n",
+        "\n",
+        "x = tf.constant([[1., 2., 3.],\n",
+        "                 [4., 5., 6.]])\n",
+        "\n",
+        "print(x)\n",
+        "print(x.shape)\n",
+        "print(x.dtype)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "k-AOMqevQGN4"
+      },
+      "source": [
+        "The most important attributes of a `tf.Tensor` are its `shape` and `dtype`:\n",
+        "\n",
+        "* `Tensor.shape`: tells you the size of the tensor along each of its axes.\n",
+        "* `Tensor.dtype`: tells you the type of all the elements in the tensor."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bUkKeNWZCIJO"
+      },
+      "source": [
+        "TensorFlow implements standard mathematical operations on tensors, as well as many operations specialized for machine learning.\n",
+        "\n",
+        "For example:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "BM7xXNDsBfN5"
+      },
+      "outputs": [],
+      "source": [
+        "x + x"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ZLGqscTxB61v"
+      },
+      "outputs": [],
+      "source": [
+        "5 * x"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2ImJHd8VfnWq"
+      },
+      "outputs": [],
+      "source": [
+        "x @ tf.transpose(x)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "U9JZD6TYCZWu"
+      },
+      "outputs": [],
+      "source": [
+        "tf.concat([x, x, x], axis=0)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "seGBLeD9P_PI"
+      },
+      "outputs": [],
+      "source": [
+        "tf.nn.softmax(x, axis=-1)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "YZNZRv1ECjf8"
+      },
+      "outputs": [],
+      "source": [
+        "tf.reduce_sum(x)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "TNHnIjOVLJfA"
+      },
+      "source": [
+        "Note: Typically, anywhere a TensorFlow function expects a `Tensor` as input, the function will also accept anything that can be converted to a `Tensor` using `tf.convert_to_tensor`. See below for an example."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "i_XKgjDsL4GE"
+      },
+      "outputs": [],
+      "source": [
+        "tf.convert_to_tensor([1,2,3])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "wTBt-JUqLJDJ"
+      },
+      "outputs": [],
+      "source": [
+        "tf.reduce_sum([1,2,3])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8-mi5031DVxz"
+      },
+      "source": [
+        "Running large calculations on CPU can be slow. When properly configured, TensorFlow can use accelerator hardware like GPUs to execute operations very quickly."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "m97Gv5H6Dz0G"
+      },
+      "outputs": [],
+      "source": [
+        "if tf.config.list_physical_devices('GPU'):\n",
+        "  print(\"TensorFlow **IS** using the GPU\")\n",
+        "else:\n",
+        "  print(\"TensorFlow **IS NOT** using the GPU\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ln2FkLOqMX92"
+      },
+      "source": [
+        "Refer to the [Tensor guide](tensor.ipynb) for details."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "oVbomvMyEIVF"
+      },
+      "source": [
+        "## Variables\n",
+        "\n",
+        "Normal `tf.Tensor` objects are immutable. To store model weights (or other mutable state) in TensorFlow use a `tf.Variable`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "SO8_bP4UEzxS"
+      },
+      "outputs": [],
+      "source": [
+        "var = tf.Variable([0.0, 0.0, 0.0])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "aDLYFvu5FAFa"
+      },
+      "outputs": [],
+      "source": [
+        "var.assign([1, 2, 3])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9EpiOmxXFDSS"
+      },
+      "outputs": [],
+      "source": [
+        "var.assign_add([1, 1, 1])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tlvTpi1CMedC"
+      },
+      "source": [
+        "Refer to the [Variables guide](variable.ipynb) for details."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rG1Dhv2QFkV3"
+      },
+      "source": [
+        "## Automatic differentiation\n",
+        "\n",
+        "<a href=\"https://en.wikipedia.org/wiki/Gradient_descent\" class=\"external\">_Gradient descent_</a> and related algorithms are a cornerstone of modern machine learning.\n",
+        "\n",
+        "To enable this, TensorFlow implements automatic differentiation (autodiff), which uses calculus to compute gradients. Typically you'll use this to calculate the gradient of a model's _error_ or _loss_ with respect to its weights."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "cYKOi-z4GY9Y"
+      },
+      "outputs": [],
+      "source": [
+        "x = tf.Variable(1.0)\n",
+        "\n",
+        "def f(x):\n",
+        "  y = x**2 + 2*x - 5\n",
+        "  return y"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "IQz99cxMGoF_"
+      },
+      "outputs": [],
+      "source": [
+        "f(x)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ozLLop0cHeYl"
+      },
+      "source": [
+        "At `x = 1.0`, `y = f(x) = (1**2 + 2*1 - 5) = -2`.\n",
+        "\n",
+        "The derivative of `y` is `y' = f'(x) = (2*x + 2) = 4`. TensorFlow can calculate this automatically:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "N02NfWpHGvw8"
+      },
+      "outputs": [],
+      "source": [
+        "with tf.GradientTape() as tape:\n",
+        "  y = f(x)\n",
+        "\n",
+        "g_x = tape.gradient(y, x)  # g(x) = dy/dx\n",
+        "\n",
+        "g_x"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "s-DVYJfcIRPd"
+      },
+      "source": [
+        "This simplified example only takes the derivative with respect to a single scalar (`x`), but TensorFlow can compute the gradient with respect to any number of non-scalar tensors simultaneously."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ECK3I9bUMk_r"
+      },
+      "source": [
+        "Refer to the [Autodiff guide](autodiff.ipynb) for details."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "VglUM4M3KhNz"
+      },
+      "source": [
+        "## Graphs and tf.function\n",
+        "\n",
+        "While you can use TensorFlow interactively like any Python library, TensorFlow also provides tools for:\n",
+        "\n",
+        "* **Performance optimization**: to speed up training and inference.\n",
+        "* **Export**: so you can save your model when it's done training.\n",
+        "\n",
+        "These require that you use `tf.function` to separate your pure-TensorFlow code from Python."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "VitACyZWKJD_"
+      },
+      "outputs": [],
+      "source": [
+        "@tf.function\n",
+        "def my_func(x):\n",
+        "  print('Tracing.\\n')\n",
+        "  return tf.reduce_sum(x)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fBYDh-huNUBZ"
+      },
+      "source": [
+        "The first time you run the `tf.function`, although it executes in Python, it captures a complete, optimized graph representing the TensorFlow computations done within the function."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "vkOFSEkoM1bd"
+      },
+      "outputs": [],
+      "source": [
+        "x = tf.constant([1, 2, 3])\n",
+        "my_func(x)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "a3aWzt-rNsBa"
+      },
+      "source": [
+        "On subsequent calls TensorFlow only executes the optimized graph, skipping any non-TensorFlow steps. Below, note that `my_func` doesn't print _tracing_ since `print` is a Python function, not a TensorFlow function."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "23dMHWwwNIoa"
+      },
+      "outputs": [],
+      "source": [
+        "x = tf.constant([10, 9, 8])\n",
+        "my_func(x)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nSeTti6zki0n"
+      },
+      "source": [
+        "A graph may not be reusable for inputs with a different _signature_ (`shape` and `dtype`), so a new graph is generated instead:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "OWffqyhqlVPf"
+      },
+      "outputs": [],
+      "source": [
+        "x = tf.constant([10.0, 9.1, 8.2], dtype=tf.float32)\n",
+        "my_func(x)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "UWknAA_zNTOa"
+      },
+      "source": [
+        "These captured graphs provide two benefits:\n",
+        "\n",
+        "* In many cases they provide a significant speedup in execution (though not this trivial example).\n",
+        "* You can export these graphs, using `tf.saved_model`, to run on other systems like a [server](https://www.tensorflow.org/tfx/serving/docker) or a [mobile device](https://www.tensorflow.org/lite/guide), no Python installation required."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hLUJ6f2eMsA8"
+      },
+      "source": [
+        "Refer to [Intro to graphs](intro_to_graphs.ipynb) for more details."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "t_36xPDPPBqp"
+      },
+      "source": [
+        "## Modules, layers, and models"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "oDaT7kCpUgnJ"
+      },
+      "source": [
+        "`tf.Module` is a class for managing your `tf.Variable` objects, and the `tf.function` objects that operate on them. The `tf.Module` class is necessary to support two significant features:\n",
+        "\n",
+        "1. You can save and restore the values of your variables using `tf.train.Checkpoint`. This is useful during training as it is quick to save and restore a model's state.\n",
+        "2. You can import and export the `tf.Variable` values _and_ the `tf.function` graphs using `tf.saved_model`. This allows you to run your model independently of the Python program that created it.\n",
+        "\n",
+        "Here is a complete example exporting a simple `tf.Module` object:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "1MqEcZOqPBDV"
+      },
+      "outputs": [],
+      "source": [
+        "class MyModule(tf.Module):\n",
+        "  def __init__(self, value):\n",
+        "    self.weight = tf.Variable(value)\n",
+        "\n",
+        "  @tf.function\n",
+        "  def multiply(self, x):\n",
+        "    return x * self.weight"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "la2G82HfVfU0"
+      },
+      "outputs": [],
+      "source": [
+        "mod = MyModule(3)\n",
+        "mod.multiply(tf.constant([1, 2, 3]))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GaSJX7zQXCm4"
+      },
+      "source": [
+        "Save the `Module`:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "1MlfbEMjVzG4"
+      },
+      "outputs": [],
+      "source": [
+        "save_path = './saved'\n",
+        "tf.saved_model.save(mod, save_path)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "LgfoftD4XGJW"
+      },
+      "source": [
+        "The resulting SavedModel is independent of the code that created it. You can load a SavedModel from Python, other language bindings, or [TensorFlow Serving](https://www.tensorflow.org/tfx/serving/docker). You can also convert it to run with [TensorFlow Lite](https://www.tensorflow.org/lite/guide) or [TensorFlow JS](https://www.tensorflow.org/js/guide)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "pWuLOIKBWZYG"
+      },
+      "outputs": [],
+      "source": [
+        "reloaded = tf.saved_model.load(save_path)\n",
+        "reloaded.multiply(tf.constant([1, 2, 3]))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nxU6P1RGwHyC"
+      },
+      "source": [
+        "The `tf.keras.layers.Layer` and `tf.keras.Model` classes build on `tf.Module` providing additional functionality and convenience methods for building, training, and saving models. Some of these are demonstrated in the next section."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tQzt3yaWMzLf"
+      },
+      "source": [
+        "Refer to [Intro to modules](intro_to_modules.ipynb) for details."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Rk1IEG5aav7X"
+      },
+      "source": [
+        "## Training loops\n",
+        "\n",
+        "Now put this all together to build a basic model and train it from scratch.\n",
+        "\n",
+        "First, create some example data. This generates a cloud of points that loosely follows a quadratic curve:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "VcuFr7KPRPzn"
+      },
+      "outputs": [],
+      "source": [
+        "import matplotlib\n",
+        "from matplotlib import pyplot as plt\n",
+        "\n",
+        "matplotlib.rcParams['figure.figsize'] = [9, 6]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "sXN9E_xf-GiP"
+      },
+      "outputs": [],
+      "source": [
+        "x = tf.linspace(-2, 2, 201)\n",
+        "x = tf.cast(x, tf.float32)\n",
+        "\n",
+        "def f(x):\n",
+        "  y = x**2 + 2*x - 5\n",
+        "  return y\n",
+        "\n",
+        "y = f(x) + tf.random.normal(shape=[201])\n",
+        "\n",
+        "plt.plot(x.numpy(), y.numpy(), '.', label='Data')\n",
+        "plt.plot(x, f(x), label='Ground truth')\n",
+        "plt.legend();"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "De5LldboSWcW"
+      },
+      "source": [
+        "Create a quadratic model with randomly initialized weights and a bias:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Pypd0GB4SRhf"
+      },
+      "outputs": [],
+      "source": [
+        "class Model(tf.Module):\n",
+        "\n",
+        "  def __init__(self):\n",
+        "    # Randomly generate weight and bias terms\n",
+        "    rand_init = tf.random.uniform(shape=[3], minval=0., maxval=5., seed=22)\n",
+        "    # Initialize model parameters\n",
+        "    self.w_q = tf.Variable(rand_init[0])\n",
+        "    self.w_l = tf.Variable(rand_init[1])\n",
+        "    self.b = tf.Variable(rand_init[2])\n",
+        "  \n",
+        "  @tf.function\n",
+        "  def __call__(self, x):\n",
+        "    # Quadratic Model : quadratic_weight * x^2 + linear_weight * x + bias\n",
+        "    return self.w_q * (x**2) + self.w_l * x + self.b"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "36o7VjaesScg"
+      },
+      "source": [
+        "First, observe your model's performance before training:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "GkwToC5BWV1c"
+      },
+      "outputs": [],
+      "source": [
+        "quad_model = Model()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ReWhH40wTY5F"
+      },
+      "outputs": [],
+      "source": [
+        "def plot_preds(x, y, f, model, title):\n",
+        "  plt.figure()\n",
+        "  plt.plot(x, y, '.', label='Data')\n",
+        "  plt.plot(x, f(x), label='Ground truth')\n",
+        "  plt.plot(x, model(x), label='Predictions')\n",
+        "  plt.title(title)\n",
+        "  plt.legend()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Y0JtXQat-nlk"
+      },
+      "outputs": [],
+      "source": [
+        "plot_preds(x, y, f, quad_model, 'Before training')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hLzwD0-ascGf"
+      },
+      "source": [
+        "Now, define a loss for your model:\n",
+        "\n",
+        "Given that this model is intended to predict continuous values, the mean squared error (MSE) is a good choice for the loss function. Given a vector of predictions, $\\hat{y}$, and a vector of true targets, $y$, the MSE is defined as the mean of the squared differences between the predicted values and the ground truth.\n",
+        "\n",
+        "$MSE = \\frac{1}{m}\\sum_{i=1}^{m}(\\hat{y}_i -y_i)^2$"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "eCtJ1uuCseZd"
+      },
+      "outputs": [],
+      "source": [
+        "def mse_loss(y_pred, y):\n",
+        "  return tf.reduce_mean(tf.square(y_pred - y))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "7EWyDu3zot2w"
+      },
+      "source": [
+        "Write a basic training loop for the model. The loop will make use of the MSE loss function and its gradients with respect to the input in order to iteratively update the model's parameters. Using mini-batches for training provides both memory efficiency and faster convergence. The `tf.data.Dataset` API has useful functions for batching and shuffling."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "8kX_-zily2Ia"
+      },
+      "outputs": [],
+      "source": [
+        "batch_size = 32\n",
+        "dataset = tf.data.Dataset.from_tensor_slices((x, y))\n",
+        "dataset = dataset.shuffle(buffer_size=x.shape[0]).batch(batch_size)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "nOaES5gyTDtG"
+      },
+      "outputs": [],
+      "source": [
+        "# Set training parameters\n",
+        "epochs = 100\n",
+        "learning_rate = 0.01\n",
+        "losses = []\n",
+        "\n",
+        "# Format training loop\n",
+        "for epoch in range(epochs):\n",
+        "  for x_batch, y_batch in dataset:\n",
+        "    with tf.GradientTape() as tape:\n",
+        "      batch_loss = mse_loss(quad_model(x_batch), y_batch)\n",
+        "    # Update parameters with respect to the gradient calculations\n",
+        "    grads = tape.gradient(batch_loss, quad_model.variables)\n",
+        "    for g,v in zip(grads, quad_model.variables):\n",
+        "        v.assign_sub(learning_rate*g)\n",
+        "  # Keep track of model loss per epoch\n",
+        "  loss = mse_loss(quad_model(x), y)\n",
+        "  losses.append(loss)\n",
+        "  if epoch % 10 == 0:\n",
+        "    print(f'Mean squared error for step {epoch}: {loss.numpy():0.3f}')\n",
+        "\n",
+        "# Plot model results\n",
+        "print(\"\\n\")\n",
+        "plt.plot(range(epochs), losses)\n",
+        "plt.xlabel(\"Epoch\")\n",
+        "plt.ylabel(\"Mean Squared Error (MSE)\")\n",
+        "plt.title('MSE loss vs training iterations');"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dW5B2TTRsvxE"
+      },
+      "source": [
+        "Now, observe your model's performance after training:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Qcvzyg3eYLh8"
+      },
+      "outputs": [],
+      "source": [
+        "plot_preds(x, y, f, quad_model, 'After training')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hbtmFJIXb6qm"
+      },
+      "source": [
+        "That's working, but remember that implementations of common training utilities are available in the `tf.keras` module. So, consider using those before writing your own. To start with, the `Model.compile` and `Model.fit` methods implement a training loop for you:"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "cjx23MiztFmT"
+      },
+      "source": [
+        "Begin by creating a Sequential Model in Keras using `tf.keras.Sequential`. One of the simplest Keras layers is the dense layer, which can be instantiated with `tf.keras.layers.Dense`. The dense layer is able to learn multidimensional linear relationships of the form $\\mathrm{Y} = \\mathrm{W}\\mathrm{X} +  \\vec{b}$. In order to learn a nonlinear equation of the form, $w_1x^2 + w_2x + b$, the dense layer's input should be a data matrix with $x^2$ and $x$ as features. The lambda layer, `tf.keras.layers.Lambda`, can be used to perform this stacking transformation."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "5rt8HP2TZhEM"
+      },
+      "outputs": [],
+      "source": [
+        "new_model = tf.keras.Sequential([\n",
+        "    tf.keras.layers.Lambda(lambda x: tf.stack([x, x**2], axis=1)),\n",
+        "    tf.keras.layers.Dense(units=1, kernel_initializer=tf.random.normal)])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "73kCo1BtP3rQ"
+      },
+      "outputs": [],
+      "source": [
+        "new_model.compile(\n",
+        "    loss=tf.keras.losses.MSE,\n",
+        "    optimizer=tf.keras.optimizers.SGD(learning_rate=0.01))\n",
+        "\n",
+        "history = new_model.fit(x, y,\n",
+        "                        epochs=100,\n",
+        "                        batch_size=32,\n",
+        "                        verbose=0)\n",
+        "\n",
+        "new_model.save('./my_new_model.keras')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "u3q5d1SzvzTq"
+      },
+      "source": [
+        "Observe your Keras model's performance after training:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Mo7zRV7XZjv7"
+      },
+      "outputs": [],
+      "source": [
+        "plt.plot(history.history['loss'])\n",
+        "plt.xlabel('Epoch')\n",
+        "plt.ylim([0, max(plt.ylim())])\n",
+        "plt.ylabel('Loss [Mean Squared Error]')\n",
+        "plt.title('Keras training progress');"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "bB44a9YsvnfK"
+      },
+      "outputs": [],
+      "source": [
+        "plot_preds(x, y, f, new_model, 'After Training: Keras')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ng-BY_eGS0bn"
+      },
+      "source": [
+        "Refer to [Basic training loops](basic_training_loops.ipynb) and the [Keras guide](https://www.tensorflow.org/guide/keras) for more details."
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "basics.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/guide/checkpoint.ipynb b/site/en/guide/checkpoint.ipynb
index 59fae1257af..fb3b45437f7 100644
--- a/site/en/guide/checkpoint.ipynb
+++ b/site/en/guide/checkpoint.ipynb
@@ -139,7 +139,7 @@
         "## Saving from `tf.keras` training APIs\n",
         "\n",
         "See the [`tf.keras` guide on saving and\n",
-        "restoring](./keras/overview.ipynb#save_and_restore).\n",
+        "restoring](https://www.tensorflow.org/guide/keras/save_and_serialize).\n",
         "\n",
         "`tf.keras.Model.save_weights` saves a TensorFlow checkpoint. "
       ]
@@ -423,7 +423,7 @@
         "\n",
         "The optimizer is in red, regular variables are in blue, and the optimizer slot variables are in orange. The other nodes—for example, representing the `tf.train.Checkpoint`—are in black.\n",
         "\n",
-        "Slot variables are part of the optimizer's state, but are created for a specific variable. For example the `'m'` edges above correspond to momentum, which the Adam optimizer tracks for each variable. Slot variables are only saved in a checkpoint if the variable and the optimizer would both be saved, thus the dashed edges."
+        "Slot variables are part of the optimizer's state, but are created for a specific variable. For example, the `'m'` edges above correspond to momentum, which the Adam optimizer tracks for each variable. Slot variables are only saved in a checkpoint if the variable and the optimizer would both be saved, thus the dashed edges."
       ]
     },
     {
@@ -491,11 +491,11 @@
         "id": "KCcmJ-2j9RUP"
       },
       "source": [
-        "### Delayed restorations\n",
+        "### Deferred restorations\n",
         "\n",
-        "`Layer` objects in TensorFlow may delay the creation of variables to their first call, when input shapes are available. For example the shape of a `Dense` layer's kernel depends on both the layer's input and output shapes, and so the output shape required as a constructor argument is not enough information to create the variable on its own. Since calling a `Layer` also reads the variable's value, a restore must happen between the variable's creation and its first use.\n",
+        "`Layer` objects in TensorFlow may defer the creation of variables to their first call, when input shapes are available. For example, the shape of a `Dense` layer's kernel depends on both the layer's input and output shapes, and so the output shape required as a constructor argument is not enough information to create the variable on its own. Since calling a `Layer` also reads the variable's value, a restore must happen between the variable's creation and its first use.\n",
         "\n",
-        "To support this idiom, `tf.train.Checkpoint` queues restores which don't yet have a matching variable."
+        "To support this idiom, `tf.train.Checkpoint` defers restores which don't yet have a matching variable."
       ]
     },
     {
@@ -506,10 +506,10 @@
       },
       "outputs": [],
       "source": [
-        "delayed_restore = tf.Variable(tf.zeros([1, 5]))\n",
-        "print(delayed_restore.numpy())  # Not restored; still zeros\n",
-        "fake_layer.kernel = delayed_restore\n",
-        "print(delayed_restore.numpy())  # Restored"
+        "deferred_restore = tf.Variable(tf.zeros([1, 5]))\n",
+        "print(deferred_restore.numpy())  # Not restored; still zeros\n",
+        "fake_layer.kernel = deferred_restore\n",
+        "print(deferred_restore.numpy())  # Restored"
       ]
     },
     {
@@ -589,7 +589,9 @@
         "id": "5fxk_BnZ4W1b"
       },
       "source": [
-        "### List and dictionary tracking\n",
+        "### Object tracking\n",
+        "\n",
+        "Checkpoints save and restore the values of `tf.Variable` objects by \"tracking\" any variable or trackable object set in one of its attributes.  When executing a save, variables are gathered recursively from all of the reachable tracked objects.\n",
         "\n",
         "As with direct attribute assignments like `self.l1 = tf.keras.layers.Dense(5)`, assigning lists and dictionaries to attributes will track their contents."
       ]
@@ -647,7 +649,22 @@
         "id": "OxCIf2J6JyQ8"
       },
       "source": [
-        "The same tracking is automatically applied to subclasses of `tf.keras.Model`, and may be used for example to track lists of layers."
+        "Trackable objects include `tf.train.Checkpoint`, `tf.Module` and its subclasses (e.g. `keras.layers.Layer` and `keras.Model`), and recognized Python containers:\n",
+        "\n",
+        " * `dict` (and `collections.OrderedDict`)\n",
+        " * `list`\n",
+        " * `tuple` (and `collections.namedtuple`, `typing.NamedTuple`)\n",
+        "\n",
+        "Other container types are **not supported**, including:\n",
+        "\n",
+        " * `collections.defaultdict`\n",
+        " * `set`\n",
+        "\n",
+        "All other Python objects are **ignored**, including:\n",
+        "\n",
+        " * `int`\n",
+        " * `string`\n",
+        " * `float`\n"
       ]
     },
     {
diff --git a/site/en/guide/core/distribution.ipynb b/site/en/guide/core/distribution.ipynb
new file mode 100644
index 00000000000..c7f13b2f4db
--- /dev/null
+++ b/site/en/guide/core/distribution.ipynb
@@ -0,0 +1,700 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "FhGuhbZ6M5tl"
+      },
+      "source": [
+        "##### Copyright 2022 The TensorFlow Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "AwOEIRJC6Une"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "EIdT9iu_Z4Rb"
+      },
+      "source": [
+        "# Distributed training with Core APIs and DTensor"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bBIlTPscrIT9"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/guide/core/distribution\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/guide/core/distribution.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/guide/core/distribution.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/guide/core/distribution.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "SjAxxRpBzVYg"
+      },
+      "source": [
+        "## Introduction\n",
+        "\n",
+        "This notebook uses the [TensorFlow Core low-level APIs](https://www.tensorflow.org/guide/core) and [DTensor](https://www.tensorflow.org/guide/dtensor_overview) to demonstrate a data parallel distributed training example. Visit the [Core APIs overview](https://www.tensorflow.org/guide/core) to learn more about TensorFlow Core and its intended use cases. Refer to the [DTensor Overview](https://www.tensorflow.org/guide/dtensor_overview) guide and [Distributed Training with DTensors](https://www.tensorflow.org/tutorials/distribute/dtensor_ml_tutorial) tutorial to learn more about DTensor.\n",
+        "\n",
+        "This example uses the same model and optimizer shown in the [multilayer perceptrons](https://www.tensorflow.org/guide/core/mlp_core) tutorial. See this tutorial first to get comfortable with writing an end-to-end machine learning workflow with the Core APIs.\n",
+        "\n",
+        "Note: DTensor is still an experimental TensorFlow API which means that its features are available for testing, and it is intended for use in test environments only."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "d_OFkG0dyWCp"
+      },
+      "source": [
+        "## Overview of data parallel training with DTensor\n",
+        "\n",
+        "Before building an MLP that supports distribution, take a moment to explore the fundamentals of DTensor for data parallel training.\n",
+        "\n",
+        "DTensor allows you to run distributed training across devices to improve efficiency, reliability and scalability. DTensor distributes the program and tensors according to the sharding directives through a procedure called Single program, multiple data (SPMD) expansion. A variable of a `DTensor` aware layer is created as `dtensor.DVariable`, and the constructors of `DTensor` aware layer objects take additional `Layout` inputs in addition to the usual layer parameters.\n",
+        "\n",
+        "The main ideas for data parallel training are as follows:\n",
+        " - Model variables are replicated on N devices each.\n",
+        " - A global batch is split into N per-replica batches.\n",
+        " - Each per-replica batch is trained on the replica device.\n",
+        " - The gradient is reduced before weight up data is collectively performed on all replicas.\n",
+        " - Data parallel training provides nearly linear speed with respect to the number of devices"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nchsZfwEVtVs"
+      },
+      "source": [
+        "## Setup\n",
+        "\n",
+        "DTensor is part of TensorFlow 2.9.0 release."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "latuqlI_Yvoo"
+      },
+      "outputs": [],
+      "source": [
+        "#!pip install --quiet --upgrade --pre tensorflow"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "1rRo8oNqZ-Rj"
+      },
+      "outputs": [],
+      "source": [
+        "import matplotlib\n",
+        "from matplotlib import pyplot as plt\n",
+        "# Preset Matplotlib figure sizes.\n",
+        "matplotlib.rcParams['figure.figsize'] = [9, 6]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9xQKvCJ85kCQ"
+      },
+      "outputs": [],
+      "source": [
+        "import tensorflow as tf\n",
+        "import tensorflow_datasets as tfds\n",
+        "from tensorflow.experimental import dtensor\n",
+        "print(tf.__version__)\n",
+        "# Set random seed for reproducible results \n",
+        "tf.random.set_seed(22)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vDH9-sy4sfPf"
+      },
+      "source": [
+        "Configure 8 virtual CPUs for this experiment. DTensor can also be used with GPU or TPU devices. Given that this notebook uses virtual devices, the speedup gained from distributed training is not noticeable. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "H2iM-6J4s2D6"
+      },
+      "outputs": [],
+      "source": [
+        "def configure_virtual_cpus(ncpu):\n",
+        "  phy_devices = tf.config.list_physical_devices('CPU')\n",
+        "  tf.config.set_logical_device_configuration(phy_devices[0], [\n",
+        "        tf.config.LogicalDeviceConfiguration(),\n",
+        "    ] * ncpu)\n",
+        "\n",
+        "configure_virtual_cpus(8)\n",
+        "\n",
+        "DEVICES = [f'CPU:{i}' for i in range(8)]\n",
+        "devices = tf.config.list_logical_devices('CPU')\n",
+        "device_names = [d.name for d in devices]\n",
+        "device_names"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "F_72b0LCNbjx"
+      },
+      "source": [
+        "## The MNIST Dataset\n",
+        "\n",
+        "The dataset is available from [TensorFlow Datasets](https://www.tensorflow.org/datasets/catalog/mnist). Split the data into training and testing sets. Only use 5000 examples for training and testing to save time."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "8h4fV_JCfPIX"
+      },
+      "outputs": [],
+      "source": [
+        "train_data, test_data = tfds.load(\"mnist\", split=['train[:5000]', 'test[:5000]'], batch_size=128, as_supervised=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "twkJ35YB6tSi"
+      },
+      "source": [
+        "### Preprocessing the data\n",
+        "\n",
+        "Preprocess the data by reshaping it to be 2-dimensional and by rescaling it to fit into the unit interval, [0,1]."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "6Cmjhg0xCqbz"
+      },
+      "outputs": [],
+      "source": [
+        "def preprocess(x, y):\n",
+        "  # Reshaping the data\n",
+        "  x = tf.reshape(x, shape=[-1, 784])\n",
+        "  # Rescaling the data\n",
+        "  x = x/255\n",
+        "  return x, y\n",
+        "\n",
+        "train_data, test_data = train_data.map(preprocess), test_data.map(preprocess)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6o3CrycBXA2s"
+      },
+      "source": [
+        "## Build the MLP \n",
+        "\n",
+        "Build an MLP model with DTensor aware layers."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "OHW6Yvg2yS6H"
+      },
+      "source": [
+        "### The dense layer\n",
+        "\n",
+        "Start by creating a dense layer module that supports DTensor. The `dtensor.call_with_layout` function can be used to call a function that takes in a DTensor input and produces a DTensor output. This is useful for initializing a DTensor variable, `dtensor.DVariable`, with a TensorFlow supported function."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "IM0yJos25FG5"
+      },
+      "outputs": [],
+      "source": [
+        "class DenseLayer(tf.Module):\n",
+        "\n",
+        "  def __init__(self, in_dim, out_dim, weight_layout, activation=tf.identity):\n",
+        "    super().__init__()\n",
+        "    # Initialize dimensions and the activation function\n",
+        "    self.in_dim, self.out_dim = in_dim, out_dim\n",
+        "    self.activation = activation\n",
+        "\n",
+        "    # Initialize the DTensor weights using the Xavier scheme\n",
+        "    uniform_initializer = tf.function(tf.random.stateless_uniform)\n",
+        "    xavier_lim = tf.sqrt(6.)/tf.sqrt(tf.cast(self.in_dim + self.out_dim, tf.float32))\n",
+        "    self.w = dtensor.DVariable(\n",
+        "      dtensor.call_with_layout(\n",
+        "          uniform_initializer, weight_layout,\n",
+        "          shape=(self.in_dim, self.out_dim), seed=(22, 23),\n",
+        "          minval=-xavier_lim, maxval=xavier_lim))\n",
+        "        \n",
+        "    # Initialize the bias with the zeros\n",
+        "    bias_layout = weight_layout.delete([0])\n",
+        "    self.b = dtensor.DVariable(\n",
+        "      dtensor.call_with_layout(tf.zeros, bias_layout, shape=[out_dim]))\n",
+        "\n",
+        "  def __call__(self, x):\n",
+        "    # Compute the forward pass\n",
+        "    z = tf.add(tf.matmul(x, self.w), self.b)\n",
+        "    return self.activation(z)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "X-7MzpjgyHg6"
+      },
+      "source": [
+        "### The MLP sequential model\n",
+        "\n",
+        "Now create an MLP module that executes the dense layers sequentially."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "6XisRWiCyHAb"
+      },
+      "outputs": [],
+      "source": [
+        "class MLP(tf.Module):\n",
+        "\n",
+        "  def __init__(self, layers):\n",
+        "    self.layers = layers\n",
+        "   \n",
+        "  def __call__(self, x, preds=False): \n",
+        "    # Execute the model's layers sequentially\n",
+        "    for layer in self.layers:\n",
+        "      x = layer(x)\n",
+        "    return x"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "r5HZJ0kv-V3v"
+      },
+      "source": [
+        "Performing \"data-parallel\" training with DTensor is equivalent to `tf.distribute.MirroredStrategy`. To do this each device will run the same model on a shard of the data batch. So you'll need the following:\n",
+        "\n",
+        "* A `dtensor.Mesh` with a single `\"batch\"` dimension\n",
+        "* A `dtensor.Layout` for all the weights that replicates them across the mesh (using `dtensor.UNSHARDED` for each axis)\n",
+        "* A `dtensor.Layout` for the data that splits the batch dimension across the mesh\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "Create a DTensor mesh that consists of a single batch dimension, where each device becomes a replica that receives a shard from the global batch. Use this mesh to instantiate an MLP mode with the following architecture:\n",
+        "\n",
+        "Forward Pass: ReLU(784 x 700) x ReLU(700 x 500) x Softmax(500 x 10)\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "VmlACuki3oPi"
+      },
+      "outputs": [],
+      "source": [
+        "mesh = dtensor.create_mesh([(\"batch\", 8)], devices=DEVICES)\n",
+        "weight_layout = dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh)\n",
+        "\n",
+        "input_size = 784\n",
+        "hidden_layer_1_size = 700\n",
+        "hidden_layer_2_size = 500\n",
+        "hidden_layer_2_size = 10\n",
+        "\n",
+        "mlp_model = MLP([\n",
+        "    DenseLayer(in_dim=input_size, out_dim=hidden_layer_1_size, \n",
+        "               weight_layout=weight_layout,\n",
+        "               activation=tf.nn.relu),\n",
+        "    DenseLayer(in_dim=hidden_layer_1_size , out_dim=hidden_layer_2_size,\n",
+        "               weight_layout=weight_layout,\n",
+        "               activation=tf.nn.relu),\n",
+        "    DenseLayer(in_dim=hidden_layer_2_size, out_dim=hidden_layer_2_size, \n",
+        "               weight_layout=weight_layout)])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tyBATDoRmDkg"
+      },
+      "source": [
+        "### Training metrics\n",
+        "\n",
+        "Use the cross-entropy loss function and accuracy metric for training."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "rskOYA7FVCwg"
+      },
+      "outputs": [],
+      "source": [
+        "def cross_entropy_loss(y_pred, y):\n",
+        "  # Compute cross entropy loss with a sparse operation\n",
+        "  sparse_ce = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=y_pred)\n",
+        "  return tf.reduce_mean(sparse_ce)\n",
+        "\n",
+        "def accuracy(y_pred, y):\n",
+        "  # Compute accuracy after extracting class predictions\n",
+        "  class_preds = tf.argmax(y_pred, axis=1)\n",
+        "  is_equal = tf.equal(y, class_preds)\n",
+        "  return tf.reduce_mean(tf.cast(is_equal, tf.float32))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JSiNRhTOnKZr"
+      },
+      "source": [
+        "### Optimizer\n",
+        "\n",
+        "Using an optimizer can result in significantly faster convergence compared to standard gradient descent. The Adam optimizer is implemented below and has been configured to be compatible with DTensor. In order to use Keras optimizers with DTensor, refer to the experimental`tf.keras.dtensor.experimental.optimizers` module."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "-9kIAI_lfXDS"
+      },
+      "outputs": [],
+      "source": [
+        "class Adam(tf.Module):\n",
+        "\n",
+        "    def __init__(self, model_vars, learning_rate=1e-3, beta_1=0.9, beta_2=0.999, ep=1e-7):\n",
+        "      # Initialize optimizer parameters and variable slots\n",
+        "      self.model_vars = model_vars\n",
+        "      self.beta_1 = beta_1\n",
+        "      self.beta_2 = beta_2\n",
+        "      self.learning_rate = learning_rate\n",
+        "      self.ep = ep\n",
+        "      self.t = 1.\n",
+        "      self.v_dvar, self.s_dvar = [], []\n",
+        "      # Initialize optimizer variable slots\n",
+        "      for var in model_vars:\n",
+        "        v = dtensor.DVariable(dtensor.call_with_layout(tf.zeros, var.layout, shape=var.shape))\n",
+        "        s = dtensor.DVariable(dtensor.call_with_layout(tf.zeros, var.layout, shape=var.shape))\n",
+        "        self.v_dvar.append(v)\n",
+        "        self.s_dvar.append(s)\n",
+        "\n",
+        "    def apply_gradients(self, grads):\n",
+        "      # Update the model variables given their gradients\n",
+        "      for i, (d_var, var) in enumerate(zip(grads, self.model_vars)):\n",
+        "        self.v_dvar[i].assign(self.beta_1*self.v_dvar[i] + (1-self.beta_1)*d_var)\n",
+        "        self.s_dvar[i].assign(self.beta_2*self.s_dvar[i] + (1-self.beta_2)*tf.square(d_var))\n",
+        "        v_dvar_bc = self.v_dvar[i]/(1-(self.beta_1**self.t))\n",
+        "        s_dvar_bc = self.s_dvar[i]/(1-(self.beta_2**self.t))\n",
+        "        var.assign_sub(self.learning_rate*(v_dvar_bc/(tf.sqrt(s_dvar_bc) + self.ep)))\n",
+        "      self.t += 1.\n",
+        "      return "
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "w54b7GtLfn1j"
+      },
+      "source": [
+        "### Data packing\n",
+        "\n",
+        "Start by writing a helper function for transferring data to the device. This function should use `dtensor.pack` to send (and only send) the shard of the global batch that is intended for a replica to the device backing the replica. For simplicity, assume a single-client application.\n",
+        "\n",
+        "Next, write a function that uses this helper function to pack the training data batches into DTensors sharded along the batch (first) axis. This ensures that DTensor evenly distributes the training data to the 'batch' mesh dimension. Note that in DTensor, the batch size always refers to the global batch size; therefore, the batch size should be chosen such that it can be divided evenly by the size of the batch mesh dimension. Additional DTensor APIs to simplify `tf.data` integration are planned, so please stay tuned."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "3Rx82djZ6ITm"
+      },
+      "outputs": [],
+      "source": [
+        "def repack_local_tensor(x, layout):\n",
+        "  # Repacks a local Tensor-like to a DTensor with layout\n",
+        "  # This function assumes a single-client application\n",
+        "  x = tf.convert_to_tensor(x)\n",
+        "  sharded_dims = []\n",
+        "\n",
+        "  # For every sharded dimension, use tf.split to split the along the dimension.\n",
+        "  # The result is a nested list of split-tensors in queue[0].\n",
+        "  queue = [x]\n",
+        "  for axis, dim in enumerate(layout.sharding_specs):\n",
+        "    if dim == dtensor.UNSHARDED:\n",
+        "      continue\n",
+        "    num_splits = layout.shape[axis]\n",
+        "    queue = tf.nest.map_structure(lambda x: tf.split(x, num_splits, axis=axis), queue)\n",
+        "    sharded_dims.append(dim)\n",
+        "\n",
+        "  # Now you can build the list of component tensors by looking up the location in\n",
+        "  # the nested list of split-tensors created in queue[0].\n",
+        "  components = []\n",
+        "  for locations in layout.mesh.local_device_locations():\n",
+        "    t = queue[0]\n",
+        "    for dim in sharded_dims:\n",
+        "      split_index = locations[dim]  # Only valid on single-client mesh.\n",
+        "      t = t[split_index]\n",
+        "    components.append(t)\n",
+        "\n",
+        "  return dtensor.pack(components, layout)\n",
+        "\n",
+        "def repack_batch(x, y, mesh):\n",
+        "  # Pack training data batches into DTensors along the batch axis\n",
+        "  x = repack_local_tensor(x, layout=dtensor.Layout(['batch', dtensor.UNSHARDED], mesh))\n",
+        "  y = repack_local_tensor(y, layout=dtensor.Layout(['batch'], mesh))\n",
+        "  return x, y"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "osEK3rqpYfKd"
+      },
+      "source": [
+        "### Training\n",
+        "\n",
+        "Write a traceable function that executes a single training step given a batch of data. This function does not require any special DTensor annotations. Also write a function that executes a test step and returns the appropriate performance metrics."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ZICEsDGuSbDD"
+      },
+      "outputs": [],
+      "source": [
+        "@tf.function\n",
+        "def train_step(model, x_batch, y_batch, loss, metric, optimizer):\n",
+        "  # Execute a single training step\n",
+        "  with tf.GradientTape() as tape:\n",
+        "    y_pred = model(x_batch)\n",
+        "    batch_loss = loss(y_pred, y_batch)\n",
+        "  # Compute gradients and update the model's parameters\n",
+        "  grads = tape.gradient(batch_loss, model.trainable_variables)\n",
+        "  optimizer.apply_gradients(grads)\n",
+        "  # Return batch loss and accuracy\n",
+        "  batch_acc = metric(y_pred, y_batch)\n",
+        "  return batch_loss, batch_acc\n",
+        "\n",
+        "@tf.function\n",
+        "def test_step(model, x_batch, y_batch, loss, metric):\n",
+        "  # Execute a single testing step\n",
+        "  y_pred = model(x_batch)\n",
+        "  batch_loss = loss(y_pred, y_batch)\n",
+        "  batch_acc = metric(y_pred, y_batch)\n",
+        "  return batch_loss, batch_acc"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "RjIDVTwwX-Mr"
+      },
+      "source": [
+        "Now, train the MLP model for 3 epochs with a batch size of 128."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "oC85kuZgmh3q"
+      },
+      "outputs": [],
+      "source": [
+        "# Initialize the training loop parameters and structures\n",
+        "epochs = 3\n",
+        "batch_size = 128\n",
+        "train_losses, test_losses = [], []\n",
+        "train_accs, test_accs = [], []\n",
+        "optimizer = Adam(mlp_model.trainable_variables)\n",
+        "\n",
+        "# Format training loop\n",
+        "for epoch in range(epochs):\n",
+        "  batch_losses_train, batch_accs_train = [], []\n",
+        "  batch_losses_test, batch_accs_test = [], []\n",
+        "\n",
+        "  # Iterate through training data\n",
+        "  for x_batch, y_batch in train_data:\n",
+        "    x_batch, y_batch = repack_batch(x_batch, y_batch, mesh)\n",
+        "    batch_loss, batch_acc = train_step(mlp_model, x_batch, y_batch, cross_entropy_loss, accuracy, optimizer)\n",
+        "   # Keep track of batch-level training performance\n",
+        "    batch_losses_train.append(batch_loss)\n",
+        "    batch_accs_train.append(batch_acc)\n",
+        "\n",
+        "  # Iterate through testing data\n",
+        "  for x_batch, y_batch in test_data:\n",
+        "    x_batch, y_batch = repack_batch(x_batch, y_batch, mesh)\n",
+        "    batch_loss, batch_acc = test_step(mlp_model, x_batch, y_batch, cross_entropy_loss, accuracy)\n",
+        "    # Keep track of batch-level testing\n",
+        "    batch_losses_test.append(batch_loss)\n",
+        "    batch_accs_test.append(batch_acc)\n",
+        "\n",
+        "# Keep track of epoch-level model performance\n",
+        "  train_loss, train_acc = tf.reduce_mean(batch_losses_train), tf.reduce_mean(batch_accs_train)\n",
+        "  test_loss, test_acc = tf.reduce_mean(batch_losses_test), tf.reduce_mean(batch_accs_test)\n",
+        "  train_losses.append(train_loss)\n",
+        "  train_accs.append(train_acc)\n",
+        "  test_losses.append(test_loss)\n",
+        "  test_accs.append(test_acc)\n",
+        "  print(f\"Epoch: {epoch}\")\n",
+        "  print(f\"Training loss: {train_loss.numpy():.3f}, Training accuracy: {train_acc.numpy():.3f}\")\n",
+        "  print(f\"Testing loss: {test_loss.numpy():.3f}, Testing accuracy: {test_acc.numpy():.3f}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "j_RVmt43G12R"
+      },
+      "source": [
+        "### Performance evaluation\n",
+        "\n",
+        "Start by writing a plotting function to visualize the model's loss and accuracy during training. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "VXTCYVtNDjAM"
+      },
+      "outputs": [],
+      "source": [
+        "def plot_metrics(train_metric, test_metric, metric_type):\n",
+        "  # Visualize metrics vs training Epochs\n",
+        "  plt.figure()\n",
+        "  plt.plot(range(len(train_metric)), train_metric, label = f\"Training {metric_type}\")\n",
+        "  plt.plot(range(len(test_metric)), test_metric, label = f\"Testing {metric_type}\")\n",
+        "  plt.xlabel(\"Epochs\")\n",
+        "  plt.ylabel(metric_type)\n",
+        "  plt.legend()\n",
+        "  plt.title(f\"{metric_type} vs Training Epochs\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "407qok7q2JIO"
+      },
+      "outputs": [],
+      "source": [
+        "plot_metrics(train_losses, test_losses, \"Cross entropy loss\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "8H_TgxV92NfX"
+      },
+      "outputs": [],
+      "source": [
+        "plot_metrics(train_accs, test_accs, \"Accuracy\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DHO_u-3w4YRF"
+      },
+      "source": [
+        "## Saving your model\n",
+        "\n",
+        "The integration of `tf.saved_model` and DTensor is still under development. As of TensorFlow 2.9.0, tf.saved_model only accepts DTensor models with fully replicated variables. As a workaround, you can convert a DTensor model to a fully replicated one by reloading a checkpoint. However, after a model is saved, all DTensor annotations are lost and the saved signatures can only be used with regular Tensors. This tutorial will be updated to showcase the integration once it is solidified.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "VFLfEH4ManbW"
+      },
+      "source": [
+        "## Conclusion\n",
+        "\n",
+        "This notebook provided an overview of distributed training with DTensor and the TensorFlow Core APIs. Here are a few more tips that may help:\n",
+        "\n",
+        "- The [TensorFlow Core APIs](https://www.tensorflow.org/guide/core) can be used to build highly-configurable machine learning workflows with support for distributed training.\n",
+        "- The [DTensor concepts](https://www.tensorflow.org/guide/dtensor_overview) guide and [Distributed training with DTensors](https://www.tensorflow.org/tutorials/distribute/dtensor_ml_tutorial) tutorial contain the most up-to-date information about DTensor and its integrations.\n",
+        "\n",
+        "For more examples of using the TensorFlow Core APIs, check out the [guide](https://www.tensorflow.org/guide/core). If you want to learn more about loading and preparing data, see the tutorials on [image data loading](https://www.tensorflow.org/tutorials/load_data/images) or [CSV data loading](https://www.tensorflow.org/tutorials/load_data/csv)."
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [
+        "FhGuhbZ6M5tl"
+      ],
+      "name": "distribution.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/guide/core/index.md b/site/en/guide/core/index.md
new file mode 100644
index 00000000000..7f44e11b018
--- /dev/null
+++ b/site/en/guide/core/index.md
@@ -0,0 +1,112 @@
+# TensorFlow Core APIs overview
+
+The TensorFlow Core APIs provide a set of comprehensive, composable, and
+extensible low-level APIs for high-performance (distributed and accelerated)
+computation, primarily aimed at building machine learning (ML) models as well as
+authoring ML workflow tools and frameworks within the TensorFlow platform. These
+APIs provide a foundation for creating highly configurable models with
+fine-grained control and new frameworks from the ground up.
+
+The Core APIs can be used as an alternative to high-level machine learning APIs
+like Keras. These high-level APIs are best suited for general machine learning
+needs. They offer a variety of modules that abstract away the complexities of ML
+while also offering functionalities for customization through subclassing. If
+you are looking for an overview of TensorFlow using Keras, see the Quickstarts
+and Keras sections in the [tutorials](https://www.tensorflow.org/tutorials).
+
+## Who should use the Core APIs
+
+The TensorFlow Core low-level APIs are designed with the following ML developers
+in mind:
+
+*   Researchers building complex models with high levels of configurability
+*   Developers interested in using TensorFlow as a high-performance scientific
+    computing platform
+*   Framework authors building tools on top of the TensorFlow platform
+*   High-level API users interested in:
+    *   Adding additional functionalities to their machine learning workflows
+        such as custom layers, losses, models, and optimizers
+    *   Learning more about the inner workings of their models
+
+## Core API applications
+
+The TensorFlow Core APIs provide access to low level functionality within the
+TensorFlow ecosystem. This API provides more flexibility and control for
+building ML models, applications, and tools, compared to high-level APIs, such
+as Keras.
+
+### Build models and workflows
+
+The Core APIs are most commonly used to build highly customizable and optimized
+machine learning models and workflows. Here are some of the ways that the
+TensorFlow Core APIs can improve your machine learning models and workflow
+development:
+
+<img src="https://www.tensorflow.org/site-assets/images/marketing/learn/tfx-
+transform.svg" alt="TensorFlow" align="right"/>
+
+*   Building non-traditional models or layers that do not fully fit the
+    structures supported by high-level APIs
+*   Building custom layers, losses, models, and optimizers within Keras
+*   Implementing new optimization techniques to expedite convergence during
+    training
+*   Creating custom metrics for performance evaluation
+*   Designing highly-configurable training loops with support for features like
+    batching, cross-validation, and distribution strategies
+
+### Build frameworks and tools
+
+The TensorFlow Core APIs can also serve as the building blocks for new
+high-level frameworks. Here are some examples of tools and frameworks that are
+created with the low-level APIs:
+<img src="https://www.tensorflow.org/static/site-assets/images/marketing/icon/learn-ml/human-ai.png" alt="TensorFlow" width=150 align="right"/>
+
+*   [Keras](https://keras.io): deep learning for humans
+*   [TensorFlow Model Optimization Toolkit](https://www.tensorflow.org/model_optimization):
+    a suite of tools to optimize ML models for deployment and execution
+*   [TensorFlow Graphics](https://www.tensorflow.org/graphics): a library for
+    making useful graphics functions widely accessible
+
+### Build for scientific computing
+
+The TensorFlow Core APIs can also be applied outside the realm of machine
+learning. Here are a few general-purpose use cases of TensorFlow for scientific
+computing:
+<img src="https://www.tensorflow.org/static/site-assets/images/marketing/icon/learn-ml/math-concepts.png" alt="TensorFlow" width=100 align="right"/>
+
+*   Physics simulations for solid mechanics and
+    [fluid dynamics](https://arxiv.org/abs/2108.11076) problems
+*   Graphics rendering applications like
+    [ray tracing](https://github.com/BachiLi/redner)
+*   Solving
+    [constrained optimization problems](https://github.com/google-research/tensorflow_constrained_optimization/blob/master/README.md)
+
+## Core API components
+
+Here are some of the fundamental components that comprise TensorFlow Core’s low-
+level APIs. Note that this is not an all-encompassing list:
+
+<img src="https://www.tensorflow.org/static/site-assets/images/marketing/resources/edu-hero.svg" alt="TensorFlow" width=300
+align="right"/>
+
+*   Data structures : `tf.Tensor`, `tf.Variable`, `tf.TensorArray`
+*   Primitive APIs: `tf.shape`,
+    [slicing](https://www.tensorflow.org/guide/tensor_slicing), `tf.concat`,
+    `tf.bitwise`
+*   Numerical: `tf.math`, `tf.linalg`, `tf.random`
+*   Functional components: `tf.function`, `tf.GradientTape`
+*   Distribution: [DTensor](https://www.tensorflow.org/guide/dtensor_overview)
+*   Export: `tf.saved_model`
+
+## Next steps
+
+The *Build with Core* documentation provides tutorials of basic machine learning
+concepts from scratch. The tutorials in this section help you get comfortable
+with writing low-level code with Core APIs that you can then apply to more
+complex use cases of your own.
+
+Note: You should not use the Core APIs to simply re-implement high-level APIs,
+and it is possible to use high-level APIs, such as Keras, with the Core APIs.
+
+To get started using and learning more about the Core APIs, check out the
+[Quickstart for TensorFlow Core](https://www.tensorflow.org/guide/core/quickstart_core).
diff --git a/site/en/guide/core/logistic_regression_core.ipynb b/site/en/guide/core/logistic_regression_core.ipynb
new file mode 100644
index 00000000000..5a9af324ad5
--- /dev/null
+++ b/site/en/guide/core/logistic_regression_core.ipynb
@@ -0,0 +1,935 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "FhGuhbZ6M5tl"
+      },
+      "source": [
+        "##### Copyright 2022 The TensorFlow Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "AwOEIRJC6Une"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "EIdT9iu_Z4Rb"
+      },
+      "source": [
+        "# Logistic regression for binary classification with Core APIs"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bBIlTPscrIT9"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/guide/core/logistic_regression_core\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/guide/core/logistic_regression_core.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/guide/core/logistic_regression_core.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/guide/core/logistic_regression_core.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DauaqJ7WhIhO"
+      },
+      "source": [
+        "This guide demonstrates how to use the [TensorFlow Core low-level APIs](https://www.tensorflow.org/guide/core) to perform [binary classification](https://developers.google.com/machine-learning/glossary#binary_classification) with [logistic regression](https://developers.google.com/machine-learning/crash-course/logistic-regression/). It uses the [Wisconsin Breast Cancer Dataset](https://archive.ics.uci.edu/ml/datasets/breast+cancer+wisconsin+(original)) for tumor classification.\n",
+        "\n",
+        "[Logistic regression](https://developers.google.com/machine-learning/crash-course/logistic-regression/) is one of the most popular algorithms for binary classification. Given a set of examples with features, the goal of logistic regression is to output values between 0 and 1, which can be interpreted as the probabilities of each example belonging to a particular  class. "
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nchsZfwEVtVs"
+      },
+      "source": [
+        "## Setup\n",
+        "\n",
+        "This tutorial uses [pandas](https://pandas.pydata.org) for reading a CSV file into a [DataFrame](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html), [seaborn](https://seaborn.pydata.org) for plotting a pairwise relationship in a dataset, [Scikit-learn](https://scikit-learn.org/) for computing a confusion matrix, and [matplotlib](https://matplotlib.org/) for creating visualizations."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "5lZoUK6AVTos"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install -q seaborn"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "1rRo8oNqZ-Rj"
+      },
+      "outputs": [],
+      "source": [
+        "import tensorflow as tf\n",
+        "import pandas as pd\n",
+        "import matplotlib\n",
+        "from matplotlib import pyplot as plt\n",
+        "import seaborn as sns\n",
+        "import sklearn.metrics as sk_metrics\n",
+        "import tempfile\n",
+        "import os\n",
+        "\n",
+        "# Preset matplotlib figure sizes.\n",
+        "matplotlib.rcParams['figure.figsize'] = [9, 6]\n",
+        "\n",
+        "print(tf.__version__)\n",
+        "# To make the results reproducible, set the random seed value.\n",
+        "tf.random.set_seed(22)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gFh9ne3FZ-On"
+      },
+      "source": [
+        "## Load the data\n",
+        "\n",
+        "Next, load the [Wisconsin Breast Cancer Dataset](https://archive.ics.uci.edu/ml/datasets/breast+cancer+wisconsin+(original)) from the [UCI Machine Learning Repository](https://archive.ics.uci.edu/ml/). This dataset contains various features such as a tumor's radius, texture, and concavity."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "CiX2FI4gZtTt"
+      },
+      "outputs": [],
+      "source": [
+        "url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data'\n",
+        "\n",
+        "features = ['radius', 'texture', 'perimeter', 'area', 'smoothness', 'compactness',\n",
+        "            'concavity', 'concave_poinits', 'symmetry', 'fractal_dimension']\n",
+        "column_names = ['id', 'diagnosis']\n",
+        "\n",
+        "for attr in ['mean', 'ste', 'largest']:\n",
+        "  for feature in features:\n",
+        "    column_names.append(feature + \"_\" + attr)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "A3VR1aTP92nV"
+      },
+      "source": [
+        "Read the dataset into a pandas [DataFrame]() using [`pandas.read_csv`](https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html):"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "uvR2Bzb691lJ"
+      },
+      "outputs": [],
+      "source": [
+        "dataset = pd.read_csv(url, names=column_names)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "YB9eq6Zq-IZ4"
+      },
+      "outputs": [],
+      "source": [
+        "dataset.info()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0_Z1V6Dg-La_"
+      },
+      "source": [
+        "Display the first five rows:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "hWxktwbv-KPp"
+      },
+      "outputs": [],
+      "source": [
+        "dataset.head()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "s4-Wn2jzVC1W"
+      },
+      "source": [
+        "Split the dataset into training and test sets using [`pandas.DataFrame.sample`](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sample.html), [`pandas.DataFrame.drop`](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.drop.html) and [`pandas.DataFrame.iloc`](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.iloc.html). Make sure to split the features from the target labels. The test set is used to evaluate your model's generalizability to unseen data."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "m2O60B-IVG9Q"
+      },
+      "outputs": [],
+      "source": [
+        "train_dataset = dataset.sample(frac=0.75, random_state=1)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "i06vHFv_QB24"
+      },
+      "outputs": [],
+      "source": [
+        "len(train_dataset)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "19JaochhaQ3m"
+      },
+      "outputs": [],
+      "source": [
+        "test_dataset = dataset.drop(train_dataset.index)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "LmHRcbAfaSag"
+      },
+      "outputs": [],
+      "source": [
+        "len(test_dataset)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "w6JxBhBc_wwO"
+      },
+      "outputs": [],
+      "source": [
+        "# The `id` column can be dropped since each row is unique\n",
+        "x_train, y_train = train_dataset.iloc[:, 2:], train_dataset.iloc[:, 1]\n",
+        "x_test, y_test = test_dataset.iloc[:, 2:], test_dataset.iloc[:, 1]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3MWuJTKEDM-f"
+      },
+      "source": [
+        "## Preprocess the data\n",
+        "\n",
+        "This dataset contains the mean, standard error, and largest values for each of the 10 tumor measurements collected per example. The `\"diagnosis\"` target column is a categorical variable with `'M'` indicating a malignant tumor and `'B'` indicating a benign tumor diagnosis. This column needs to be converted into a numerical binary format for model training.\n",
+        "\n",
+        "The [`pandas.Series.map`](https://pandas.pydata.org/docs/reference/api/pandas.Series.map.html) function is useful for mapping binary values to the categories.\n",
+        "\n",
+        "The dataset should also be converted to a tensor with the `tf.convert_to_tensor` function after the preprocessing is complete."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "JEJHhN65a2VV"
+      },
+      "outputs": [],
+      "source": [
+        "y_train, y_test = y_train.map({'B': 0, 'M': 1}), y_test.map({'B': 0, 'M': 1})\n",
+        "x_train, y_train = tf.convert_to_tensor(x_train, dtype=tf.float32), tf.convert_to_tensor(y_train, dtype=tf.float32)\n",
+        "x_test, y_test = tf.convert_to_tensor(x_test, dtype=tf.float32), tf.convert_to_tensor(y_test, dtype=tf.float32)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "J4ubs136WLNp"
+      },
+      "source": [
+        "Use [`seaborn.pairplot`](https://seaborn.pydata.org/generated/seaborn.pairplot.html) to review the joint distribution of a few pairs of mean-based features from the training set and observe how they relate to the target:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "oRKO_x8gWKv-"
+      },
+      "outputs": [],
+      "source": [
+        "sns.pairplot(train_dataset.iloc[:, 1:6], hue = 'diagnosis', diag_kind='kde');"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5YOG5iKYKW_3"
+      },
+      "source": [
+        "This pairplot demonstrates that certain features such as radius, perimeter and area are highly correlated. This is expected since the tumor radius is directly involved in the computation of both perimeter and area. Additionally, note that malignant diagnoses seem to be more right-skewed for many of the features.\n",
+        "\n",
+        "Make sure to also check the overall statistics. Note how each feature covers a vastly different range of values."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "yi2FzC3T21jR"
+      },
+      "outputs": [],
+      "source": [
+        "train_dataset.describe().transpose()[:10]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_8pDCIFjMla8"
+      },
+      "source": [
+        "Given the inconsistent ranges, it is beneficial to standardize the data such that each feature has a zero mean and unit variance. This process is called [normalization](https://developers.google.com/machine-learning/glossary#normalization)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "FrzKNFNjLQDl"
+      },
+      "outputs": [],
+      "source": [
+        "class Normalize(tf.Module):\n",
+        "  def __init__(self, x):\n",
+        "    # Initialize the mean and standard deviation for normalization\n",
+        "    self.mean = tf.Variable(tf.math.reduce_mean(x, axis=0))\n",
+        "    self.std = tf.Variable(tf.math.reduce_std(x, axis=0))\n",
+        "\n",
+        "  def norm(self, x):\n",
+        "    # Normalize the input\n",
+        "    return (x - self.mean)/self.std\n",
+        "\n",
+        "  def unnorm(self, x):\n",
+        "    # Unnormalize the input\n",
+        "    return (x * self.std) + self.mean\n",
+        "\n",
+        "norm_x = Normalize(x_train)\n",
+        "x_train_norm, x_test_norm = norm_x.norm(x_train), norm_x.norm(x_test)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6o3CrycBXA2s"
+      },
+      "source": [
+        "## Logistic regression\n",
+        "\n",
+        "Before building a logistic regression model, it is crucial to understand the method's differences compared to traditional linear regression.\n",
+        "\n",
+        "### Logistic regression fundamentals\n",
+        "\n",
+        "Linear regression returns a linear combination of its inputs; this output is unbounded. The output of a [logistic regression](https://developers.google.com/machine-learning/glossary#logistic_regression) is in the `(0, 1)` range. For each example, it represents the probability that the example belongs to the _positive_ class.\n",
+        "\n",
+        "Logistic regression maps the continuous outputs of traditional linear regression, `(-∞, ∞)`, to probabilities, `(0, 1)`. This transformation is also symmetric so that flipping the sign of the linear output results in the inverse of the original probability.\n",
+        "\n",
+        "Let $Y$ denote the probability of being in class `1` (the tumor is malignant). The desired mapping can be achieved by interpreting the linear regression output as the [log odds](https://developers.google.com/machine-learning/glossary#log-odds) ratio of being in class `1` as opposed to class `0`:\n",
+        "\n",
+        "$$\\ln(\\frac{Y}{1-Y}) = wX + b$$\n",
+        "\n",
+        "By setting $wX + b = z$, this equation can then be solved for $Y$:\n",
+        "\n",
+        "$$Y = \\frac{e^{z}}{1 + e^{z}} = \\frac{1}{1 + e^{-z}}$$\n",
+        "\n",
+        "The expression $\\frac{1}{1 + e^{-z}}$ is known as the [sigmoid function](https://developers.google.com/machine-learning/glossary#sigmoid_function) $\\sigma(z)$. Hence, the equation for logistic regression can be written as $Y = \\sigma(wX + b)$.\n",
+        "\n",
+        "The dataset in this tutorial deals with a high-dimensional feature matrix. Therefore, the above equation must be rewritten in a matrix vector form as follows:\n",
+        "\n",
+        "$${\\mathrm{Y}} = \\sigma({\\mathrm{X}}w + b)$$\n",
+        "\n",
+        "where:\n",
+        "\n",
+        "* $\\underset{m\\times 1}{\\mathrm{Y}}$: a target vector\n",
+        "* $\\underset{m\\times n}{\\mathrm{X}}$: a feature matrix\n",
+        "* $\\underset{n\\times 1}w$: a weight vector\n",
+        "* $b$: a bias\n",
+        "* $\\sigma$: a sigmoid function applied to each element of the output vector\n",
+        "\n",
+        "Start by visualizing the sigmoid function, which transforms the linear output, `(-∞, ∞)`, to fall between `0` and `1`. The sigmoid function is available in `tf.math.sigmoid`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ThHaV_RmucZl"
+      },
+      "outputs": [],
+      "source": [
+        "x = tf.linspace(-10, 10, 500)\n",
+        "x = tf.cast(x, tf.float32)\n",
+        "f = lambda x : (1/20)*x + 0.6\n",
+        "plt.plot(x, tf.math.sigmoid(x))\n",
+        "plt.ylim((-0.1,1.1))\n",
+        "plt.title(\"Sigmoid function\");"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "VMXEhrZuKECV"
+      },
+      "source": [
+        "### The log loss function\n",
+        "\n",
+        "The  [log loss](https://developers.google.com/machine-learning/glossary#Log_Loss), or binary cross-entropy loss, is the ideal loss function for a binary classification problem with logistic regression. For each example, the log loss quantifies the similarity between a predicted probability and the example's true value. It is determined by the following equation:\n",
+        "\n",
+        "$$L = -\\frac{1}{m}\\sum_{i=1}^{m}y_i\\cdot\\log(\\hat{y}_i) + (1- y_i)\\cdot\\log(1 - \\hat{y}_i)$$\n",
+        "\n",
+        "where:\n",
+        "\n",
+        "* $\\hat{y}$: a vector of predicted probabilities\n",
+        "* $y$: a vector of true targets\n",
+        "\n",
+        "You can use the `tf.nn.sigmoid_cross_entropy_with_logits` function to compute the log loss. This function automatically applies the sigmoid activation to the regression output:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "JVBInnSqS36W"
+      },
+      "outputs": [],
+      "source": [
+        "def log_loss(y_pred, y):\n",
+        "  # Compute the log loss function\n",
+        "  ce = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=y_pred)\n",
+        "  return tf.reduce_mean(ce)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Q_mutLj0KNUb"
+      },
+      "source": [
+        "### The gradient descent update rule\n",
+        "\n",
+        "The TensorFlow Core APIs support automatic differentiation with `tf.GradientTape`. If you are curious about the mathematics behind the logistic regression [gradient updates](https://developers.google.com/machine-learning/glossary#gradient_descent), here is a short explanation:\n",
+        "\n",
+        "In the above equation for the log loss, recall that each $\\hat{y}_i$ can be rewritten in terms of the inputs as $\\sigma({\\mathrm{X_i}}w + b)$.\n",
+        "\n",
+        "The goal is to find a $w^*$ and $b^*$ that minimize the log loss:\n",
+        "\n",
+        "$$L = -\\frac{1}{m}\\sum_{i=1}^{m}y_i\\cdot\\log(\\sigma({\\mathrm{X_i}}w + b)) + (1- y_i)\\cdot\\log(1 - \\sigma({\\mathrm{X_i}}w + b))$$\n",
+        "\n",
+        "By taking the gradient $L$ with respect to $w$, you get the following:\n",
+        "\n",
+        "$$\\frac{\\partial L}{\\partial w} = \\frac{1}{m}(\\sigma({\\mathrm{X}}w + b) - y)X$$\n",
+        "\n",
+        "By taking the gradient $L$ with respect to $b$, you get the following:\n",
+        "\n",
+        "$$\\frac{\\partial L}{\\partial b} = \\frac{1}{m}\\sum_{i=1}^{m}\\sigma({\\mathrm{X_i}}w + b) - y_i$$"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "uTCndUecKZho"
+      },
+      "source": [
+        "Now, build the logistic regression model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "c0sXM7qLlKfZ"
+      },
+      "outputs": [],
+      "source": [
+        "class LogisticRegression(tf.Module):\n",
+        "\n",
+        "  def __init__(self):\n",
+        "    self.built = False\n",
+        "    \n",
+        "  def __call__(self, x, train=True):\n",
+        "    # Initialize the model parameters on the first call\n",
+        "    if not self.built:\n",
+        "      # Randomly generate the weights and the bias term\n",
+        "      rand_w = tf.random.uniform(shape=[x.shape[-1], 1], seed=22)\n",
+        "      rand_b = tf.random.uniform(shape=[], seed=22)\n",
+        "      self.w = tf.Variable(rand_w)\n",
+        "      self.b = tf.Variable(rand_b)\n",
+        "      self.built = True\n",
+        "    # Compute the model output\n",
+        "    z = tf.add(tf.matmul(x, self.w), self.b)\n",
+        "    z = tf.squeeze(z, axis=1)\n",
+        "    if train:\n",
+        "      return z\n",
+        "    return tf.sigmoid(z)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "eObQu9fDnXGL"
+      },
+      "source": [
+        "To validate, make sure the untrained model outputs values in the range of `(0, 1)` for a small subset of the training data."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "5bIovC0Z4QHJ"
+      },
+      "outputs": [],
+      "source": [
+        "log_reg = LogisticRegression()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "QJ2ievISyf0p"
+      },
+      "outputs": [],
+      "source": [
+        "y_pred = log_reg(x_train_norm[:5], train=False)\n",
+        "y_pred.numpy()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PribnwDHUksC"
+      },
+      "source": [
+        "Next, write an accuracy function to calculate the proportion of correct classifications during training. In order to retrieve the classifications from the predicted probabilities, set a threshold for which all probabilities higher than the threshold belong to class `1`. This is a configurable hyperparameter that can be set to `0.5` as a default."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ssnVcKg7oMe6"
+      },
+      "outputs": [],
+      "source": [
+        "def predict_class(y_pred, thresh=0.5):\n",
+        "  # Return a tensor with  `1` if `y_pred` > `0.5`, and `0` otherwise\n",
+        "  return tf.cast(y_pred > thresh, tf.float32)\n",
+        "\n",
+        "def accuracy(y_pred, y):\n",
+        "  # Return the proportion of matches between `y_pred` and `y`\n",
+        "  y_pred = tf.math.sigmoid(y_pred)\n",
+        "  y_pred_class = predict_class(y_pred)\n",
+        "  check_equal = tf.cast(y_pred_class == y,tf.float32)\n",
+        "  acc_val = tf.reduce_mean(check_equal)\n",
+        "  return acc_val"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "J_0KHQ25_2dF"
+      },
+      "source": [
+        "### Train the model\n",
+        "\n",
+        "Using mini-batches for training provides both memory efficiency and faster convergence. The `tf.data.Dataset` API has useful functions for batching and shuffling. The API enables you to build complex input pipelines from simple, reusable pieces. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "vJD7-4U0etqa"
+      },
+      "outputs": [],
+      "source": [
+        "batch_size = 64\n",
+        "train_dataset = tf.data.Dataset.from_tensor_slices((x_train_norm, y_train))\n",
+        "train_dataset = train_dataset.shuffle(buffer_size=x_train.shape[0]).batch(batch_size)\n",
+        "test_dataset = tf.data.Dataset.from_tensor_slices((x_test_norm, y_test))\n",
+        "test_dataset = test_dataset.shuffle(buffer_size=x_test.shape[0]).batch(batch_size)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "sLiWZZPBSDip"
+      },
+      "source": [
+        "Now write a training loop for the logistic regression model. The loop utilizes the log loss function and its gradients with respect to the input in order to iteratively update the model's parameters."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "jNC3D1DGsGgK"
+      },
+      "outputs": [],
+      "source": [
+        "# Set training parameters\n",
+        "epochs = 200\n",
+        "learning_rate = 0.01\n",
+        "train_losses, test_losses = [], []\n",
+        "train_accs, test_accs = [], []\n",
+        "\n",
+        "# Set up the training loop and begin training\n",
+        "for epoch in range(epochs):\n",
+        "  batch_losses_train, batch_accs_train = [], []\n",
+        "  batch_losses_test, batch_accs_test = [], []\n",
+        "\n",
+        "  # Iterate over the training data\n",
+        "  for x_batch, y_batch in train_dataset:\n",
+        "    with tf.GradientTape() as tape:\n",
+        "      y_pred_batch = log_reg(x_batch)\n",
+        "      batch_loss = log_loss(y_pred_batch, y_batch)\n",
+        "    batch_acc = accuracy(y_pred_batch, y_batch)\n",
+        "    # Update the parameters with respect to the gradient calculations\n",
+        "    grads = tape.gradient(batch_loss, log_reg.variables)\n",
+        "    for g,v in zip(grads, log_reg.variables):\n",
+        "      v.assign_sub(learning_rate * g)\n",
+        "    # Keep track of batch-level training performance\n",
+        "    batch_losses_train.append(batch_loss)\n",
+        "    batch_accs_train.append(batch_acc)\n",
+        "\n",
+        "  # Iterate over the testing data\n",
+        "  for x_batch, y_batch in test_dataset:\n",
+        "    y_pred_batch = log_reg(x_batch)\n",
+        "    batch_loss = log_loss(y_pred_batch, y_batch)\n",
+        "    batch_acc = accuracy(y_pred_batch, y_batch)\n",
+        "    # Keep track of batch-level testing performance\n",
+        "    batch_losses_test.append(batch_loss)\n",
+        "    batch_accs_test.append(batch_acc)\n",
+        "\n",
+        "  # Keep track of epoch-level model performance\n",
+        "  train_loss, train_acc = tf.reduce_mean(batch_losses_train), tf.reduce_mean(batch_accs_train)\n",
+        "  test_loss, test_acc = tf.reduce_mean(batch_losses_test), tf.reduce_mean(batch_accs_test)\n",
+        "  train_losses.append(train_loss)\n",
+        "  train_accs.append(train_acc)\n",
+        "  test_losses.append(test_loss)\n",
+        "  test_accs.append(test_acc)\n",
+        "  if epoch % 20 == 0:\n",
+        "    print(f\"Epoch: {epoch}, Training log loss: {train_loss:.3f}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "NoLiAg7fYft7"
+      },
+      "source": [
+        "### Performance evaluation\n",
+        "\n",
+        "Observe the changes in your model's loss and accuracy over time. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "mv3oCQPvWhr0"
+      },
+      "outputs": [],
+      "source": [
+        "plt.plot(range(epochs), train_losses, label = \"Training loss\")\n",
+        "plt.plot(range(epochs), test_losses, label = \"Testing loss\")\n",
+        "plt.xlabel(\"Epoch\")\n",
+        "plt.ylabel(\"Log loss\")\n",
+        "plt.legend()\n",
+        "plt.title(\"Log loss vs training iterations\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "D2HDVGLPODIE"
+      },
+      "outputs": [],
+      "source": [
+        "plt.plot(range(epochs), train_accs, label = \"Training accuracy\")\n",
+        "plt.plot(range(epochs), test_accs, label = \"Testing accuracy\")\n",
+        "plt.xlabel(\"Epoch\")\n",
+        "plt.ylabel(\"Accuracy (%)\")\n",
+        "plt.legend()\n",
+        "plt.title(\"Accuracy vs training iterations\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "jonKhUzuPyfa"
+      },
+      "outputs": [],
+      "source": [
+        "print(f\"Final training log loss: {train_losses[-1]:.3f}\")\n",
+        "print(f\"Final testing log Loss: {test_losses[-1]:.3f}\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "d3DF4qyrPyke"
+      },
+      "outputs": [],
+      "source": [
+        "print(f\"Final training accuracy: {train_accs[-1]:.3f}\")\n",
+        "print(f\"Final testing accuracy: {test_accs[-1]:.3f}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yrj1TbOJasjA"
+      },
+      "source": [
+        "The model demonstrates a high accuracy and a low loss when it comes to classifying tumors in the training dataset and also generalizes well to the unseen test data. To go one step further, you can explore error rates that give more insight beyond the overall accuracy score. The two most popular error rates for binary classification problems are the false positive rate (FPR) and the false negative rate (FNR).\n",
+        "\n",
+        "For this problem, the FPR is the proportion of malignant tumor predictions amongst tumors that are actually benign. Conversely, the FNR is the proportion of benign tumor predictions among tumors that are actually malignant.\n",
+        "\n",
+        "Compute a confusion matrix using [`sklearn.metrics.confusion_matrix`](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html#sklearn.metrics.confusion_matrix), which evaluates the accuracy of the classification, and use matplotlib to display the matrix:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "OJO7YkA8ZDMU"
+      },
+      "outputs": [],
+      "source": [
+        "def show_confusion_matrix(y, y_classes, typ):\n",
+        "  # Compute the confusion matrix and normalize it\n",
+        "  plt.figure(figsize=(10,10))\n",
+        "  confusion = sk_metrics.confusion_matrix(y.numpy(), y_classes.numpy())\n",
+        "  confusion_normalized = confusion / confusion.sum(axis=1, keepdims=True)\n",
+        "  axis_labels = range(2)\n",
+        "  ax = sns.heatmap(\n",
+        "      confusion_normalized, xticklabels=axis_labels, yticklabels=axis_labels,\n",
+        "      cmap='Blues', annot=True, fmt='.4f', square=True)\n",
+        "  plt.title(f\"Confusion matrix: {typ}\")\n",
+        "  plt.ylabel(\"True label\")\n",
+        "  plt.xlabel(\"Predicted label\")\n",
+        "\n",
+        "y_pred_train, y_pred_test = log_reg(x_train_norm, train=False), log_reg(x_test_norm, train=False)\n",
+        "train_classes, test_classes = predict_class(y_pred_train), predict_class(y_pred_test)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "OQ5DFcleiDFm"
+      },
+      "outputs": [],
+      "source": [
+        "show_confusion_matrix(y_train, train_classes, 'Training')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "gtfcsAp_iCNR"
+      },
+      "outputs": [],
+      "source": [
+        "show_confusion_matrix(y_test, test_classes, 'Testing')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DlivxaDmTnGq"
+      },
+      "source": [
+        "Observe the error rate measurements and interpret their significance in the context of this example. In many medical testing studies such as cancer detection, having a high false positive rate to ensure a low false negative rate is perfectly acceptable and in fact encouraged since the risk of missing a malignant tumor diagnosis (false negative) is a lot worse than misclassifying a benign tumor as malignant (false positive).\n",
+        "\n",
+        "In order to control for the FPR and FNR, try changing the threshold hyperparameter before classifying the probability predictions. A lower threshold increases the model's overall chances of making a malignant tumor classification. This inevitably increases the number of false positives and the FPR but it also helps to decrease the number of false negatives and the FNR."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "7ADEN2rb4Nhj"
+      },
+      "source": [
+        "## Save the model\n",
+        "\n",
+        "Start by making an export module that takes in raw data and performs the following operations:\n",
+        "- Normalization\n",
+        "- Probability prediction\n",
+        "- Class prediction\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "6KPRHCzg4ZxH"
+      },
+      "outputs": [],
+      "source": [
+        "class ExportModule(tf.Module):\n",
+        "  def __init__(self, model, norm_x, class_pred):\n",
+        "    # Initialize pre- and post-processing functions\n",
+        "    self.model = model\n",
+        "    self.norm_x = norm_x\n",
+        "    self.class_pred = class_pred\n",
+        "\n",
+        "  @tf.function(input_signature=[tf.TensorSpec(shape=[None, None], dtype=tf.float32)])\n",
+        "  def __call__(self, x):\n",
+        "    # Run the `ExportModule` for new data points\n",
+        "    x = self.norm_x.norm(x)\n",
+        "    y = self.model(x, train=False)\n",
+        "    y = self.class_pred(y)\n",
+        "    return y "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2YzRclo5-yjO"
+      },
+      "outputs": [],
+      "source": [
+        "log_reg_export = ExportModule(model=log_reg,\n",
+        "                              norm_x=norm_x,\n",
+        "                              class_pred=predict_class)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gtofGIBN_qFd"
+      },
+      "source": [
+        "If you want to save the model at its current state, you can do so with the `tf.saved_model.save` function. To load a saved model and make predictions, use the `tf.saved_model.load` function."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "a4Qum1Ts_pmF"
+      },
+      "outputs": [],
+      "source": [
+        "models = tempfile.mkdtemp()\n",
+        "save_path = os.path.join(models, 'log_reg_export')\n",
+        "tf.saved_model.save(log_reg_export, save_path)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "3KPILr1i_M_c"
+      },
+      "outputs": [],
+      "source": [
+        "log_reg_loaded = tf.saved_model.load(save_path)\n",
+        "test_preds = log_reg_loaded(x_test)\n",
+        "test_preds[:10].numpy()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vgGQuV-yqYZH"
+      },
+      "source": [
+        "## Conclusion\n",
+        "\n",
+        "This notebook introduced a few techniques to handle a logistic regression problem. Here are a few more tips that may help:\n",
+        "\n",
+        "- The [TensorFlow Core APIs](https://www.tensorflow.org/guide/core) can be used to build machine learning workflows with high levels of configurability\n",
+        "- Analyzing error rates is a great way to gain more insight about a classification model's performance beyond its overall accuracy score.\n",
+        "- Overfitting is another common problem for logistic regression models, though it wasn't a problem for this tutorial. Visit the [Overfit and underfit](../../tutorials/keras/overfit_and_underfit.ipynb) tutorial for more help with this.\n",
+        "\n",
+        "For more examples of using the TensorFlow Core APIs, check out the [guide](https://www.tensorflow.org/guide/core). If you want to learn more about loading and preparing data, see the tutorials on [image data loading](../../tutorials/load_data/images.ipynb) or [CSV data loading](../../tutorials/load_data/csv.ipynb)."
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "logistic_regression_core.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/guide/core/matrix_core.ipynb b/site/en/guide/core/matrix_core.ipynb
new file mode 100644
index 00000000000..1d7d35ed047
--- /dev/null
+++ b/site/en/guide/core/matrix_core.ipynb
@@ -0,0 +1,731 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "FhGuhbZ6M5tl"
+      },
+      "source": [
+        "##### Copyright 2022 The TensorFlow Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "AwOEIRJC6Une"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "EIdT9iu_Z4Rb"
+      },
+      "source": [
+        "# Matrix approximation with Core APIs"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bBIlTPscrIT9"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/guide/core/matrix_core\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/guide/core/matrix_core.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/guide/core/matrix_core.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/guide/core/matrix_core.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "qGw8TF2vtzru"
+      },
+      "source": [
+        "## Introduction \n",
+        "\n",
+        "This notebook uses the [TensorFlow Core low-level APIs](https://www.tensorflow.org/guide/core) to showcase TensorFlow's capabilities as a high-performance scientific computing platform. Visit the [Core APIs overview](https://www.tensorflow.org/guide/core) to learn more about TensorFlow Core and its intended use cases.\n",
+        "\n",
+        "This tutorial explores the technique of [singular value decomposition](https://developers.google.com/machine-learning/recommendation/collaborative/matrix) (SVD) and its applications for low-rank approximation problems. The SVD is used to factorize real or complex matrices and has a variety of use cases in data science such as image compression. The images for this tutorial come from Google Brain's [Imagen](https://imagen.research.google/) project. "
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5_FdwaovEkCC"
+      },
+      "source": [
+        ">![svd_intro](http://tensorflow.org/images/core/svd_intro.png)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nchsZfwEVtVs"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "1rRo8oNqZ-Rj"
+      },
+      "outputs": [],
+      "source": [
+        "import matplotlib\n",
+        "from matplotlib.image import imread\n",
+        "from matplotlib import pyplot as plt\n",
+        "import requests\n",
+        "# Preset Matplotlib figure sizes.\n",
+        "matplotlib.rcParams['figure.figsize'] = [16, 9]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9xQKvCJ85kCQ"
+      },
+      "outputs": [],
+      "source": [
+        "import tensorflow as tf\n",
+        "print(tf.__version__)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "so_ewq3gAoEI"
+      },
+      "source": [
+        "## SVD fundamentals\n",
+        "\n",
+        "The singular value decomposition of a matrix, ${\\mathrm{A}}$, is determined by the following factorization:\n",
+        "\n",
+        "$${\\mathrm{A}} = {\\mathrm{U}} \\Sigma {\\mathrm{V}}^T$$\n",
+        "\n",
+        "where\n",
+        "\n",
+        "* $\\underset{m \\times n}{\\mathrm{A}}$: input matrix where $m \\geq n$\n",
+        "* $\\underset{m \\times n}{\\mathrm{U}}$: orthogonal matrix, ${\\mathrm{U}}^T{\\mathrm{U}} = {\\mathrm{I}}$, with each column, $u_i$, denoting a left singular vector of ${\\mathrm{A}}$\n",
+        "* $\\underset{n \\times n}{\\Sigma}$: diagonal matrix with each diagonal entry, $\\sigma_i$, denoting a singular value of ${\\mathrm{A}}$\n",
+        "* $\\underset{n \\times n}{{\\mathrm{V}}^T}$: orthogonal matrix, ${\\mathrm{V}}^T{\\mathrm{V}} = {\\mathrm{I}}$, with each row, $v_i$, denoting a right singular vector of ${\\mathrm{A}}$\n",
+        "\n",
+        "When $m < n$, ${\\mathrm{U}}$ and $\\Sigma$ both have dimension $(m \\times m)$, and ${\\mathrm{V}}^T$ has dimension $(m \\times n)$."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "enGGGXCQKNv8"
+      },
+      "source": [
+        ">![svd_full](http://tensorflow.org/images/core/svd_full.png)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "NlP-cBdSKLtc"
+      },
+      "source": [
+        "TensorFlow's linear algebra package has a function, `tf.linalg.svd`, which can be used to compute the singular value decomposition of one or more matrices. Start by defining a simple matrix and computing its SVD factorization.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "C3QAcgyoeIpv"
+      },
+      "outputs": [],
+      "source": [
+        "A = tf.random.uniform(shape=[40,30])\n",
+        "# Compute the SVD factorization\n",
+        "s, U, V = tf.linalg.svd(A)\n",
+        "# Define Sigma and V Transpose\n",
+        "S = tf.linalg.diag(s)\n",
+        "V_T = tf.transpose(V)\n",
+        "# Reconstruct the original matrix\n",
+        "A_svd = U@S@V_T\n",
+        "# Visualize \n",
+        "plt.bar(range(len(s)), s);\n",
+        "plt.xlabel(\"Singular value rank\")\n",
+        "plt.ylabel(\"Singular value\")\n",
+        "plt.title(\"Bar graph of singular values\");"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6H_C9WhFACm4"
+      },
+      "source": [
+        "The `tf.einsum` function can be used to directly compute the matrix reconstruction from the outputs of `tf.linalg.svd`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "TPE6QeMtADUn"
+      },
+      "outputs": [],
+      "source": [
+        "A_svd = tf.einsum('s,us,vs -> uv',s,U,V)\n",
+        "print('\\nReconstructed Matrix, A_svd', A_svd)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "x1m6JIsM9DLP"
+      },
+      "source": [
+        "## Low rank approximation with the SVD\n",
+        "\n",
+        "The rank of a  matrix, ${\\mathrm{A}}$, is determined by the dimension of the vector space spanned by its columns. \n",
+        "The SVD can be used to approximate a matrix with a lower rank, which ultimately decreases the dimensionality of data required to store the information represented by the matrix.\n",
+        "\n",
+        "The rank-r approximation of ${\\mathrm{A}}$ in terms of the SVD is defined by the formula:\n",
+        "\n",
+        "$${\\mathrm{A_r}} = {\\mathrm{U_r}} \\Sigma_r {\\mathrm{V_r}}^T$$\n",
+        "\n",
+        "where\n",
+        "\n",
+        "* $\\underset{m \\times r}{\\mathrm{U_r}}$: matrix consisting of the first $r$ columns of ${\\mathrm{U}}$\n",
+        "* $\\underset{r \\times r}{\\Sigma_r}$:  diagonal matrix consisting of the first  $r$ singular values in $\\Sigma$\n",
+        "* $\\underset{r \\times n}{\\mathrm{V_r}}^T$: matrix consisting of the first $r$ rows of ${\\mathrm{V}}^T$"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nJWMJu36QyUV"
+      },
+      "source": [
+        ">![svd_approx](http://tensorflow.org/images/core/svd_approx.png)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "TkiVUxeaQybq"
+      },
+      "source": [
+        "Start by writing a function to compute the rank-r approximation of a given matrix. This low-rank approximation procedure is used for image compression; therefore, it is also helpful to compute the physical data sizes for each approximation. For simplicity, assume that data size for an rank-r approximated matrix is equal to the total number of elements required to compute the approximation. Next, write a function to visualize the original matrix, $\\mathrm{A}$ its rank-r approximation, $\\mathrm{A}_r$ and the error matrix, $|\\mathrm{A} - \\mathrm{A}_r|$."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2oY3pMPagJrO"
+      },
+      "outputs": [],
+      "source": [
+        "def rank_r_approx(s, U, V, r, verbose=False):\n",
+        "  # Compute the matrices necessary for a rank-r approximation\n",
+        "  s_r, U_r, V_r = s[..., :r], U[..., :, :r], V[..., :, :r] # ... implies any number of extra batch axes\n",
+        "  # Compute the low-rank approximation and its size\n",
+        "  A_r = tf.einsum('...s,...us,...vs->...uv',s_r,U_r,V_r)\n",
+        "  A_r_size = tf.size(U_r) + tf.size(s_r) + tf.size(V_r)\n",
+        "  if verbose:\n",
+        "    print(f\"Approximation Size: {A_r_size}\")\n",
+        "  return A_r, A_r_size\n",
+        "\n",
+        "def viz_approx(A, A_r):\n",
+        "  # Plot A, A_r, and A - A_r\n",
+        "  vmin, vmax = 0, tf.reduce_max(A)\n",
+        "  fig, ax = plt.subplots(1,3)\n",
+        "  mats = [A, A_r, abs(A - A_r)]\n",
+        "  titles = ['Original A', 'Approximated A_r', 'Error |A - A_r|']\n",
+        "  for i, (mat, title) in enumerate(zip(mats, titles)):\n",
+        "    ax[i].pcolormesh(mat, vmin=vmin, vmax=vmax)\n",
+        "    ax[i].set_title(title)\n",
+        "    ax[i].axis('off')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "O3ZRkYCkX2FQ"
+      },
+      "outputs": [],
+      "source": [
+        "print(f\"Original Size of A: {tf.size(A)}\")\n",
+        "s, U, V = tf.linalg.svd(A)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "S1DR83VMX4cM"
+      },
+      "outputs": [],
+      "source": [
+        "# Rank-15 approximation\n",
+        "A_15, A_15_size = rank_r_approx(s, U, V, 15, verbose = True)\n",
+        "viz_approx(A, A_15)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "KgFT70XFX57E"
+      },
+      "outputs": [],
+      "source": [
+        "# Rank-3 approximation\n",
+        "A_3, A_3_size = rank_r_approx(s, U, V, 3, verbose = True)\n",
+        "viz_approx(A, A_3)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DS4XoSlTJgX0"
+      },
+      "source": [
+        "As expected, using lower ranks results in less-accurate approximations. However, the quality of these low-rank approximations are often good enough in real world scenarios. Also note that the main goal of low-rank approximation with SVD \n",
+        "is to reduce the dimensionality of the data but not to reduce the disk space of the data itself. However, as the input matrices become higher-dimensional, many low-rank approximations also end up benefiting from reduced data size. This reduction benefit is why the process is applicable for image compression problems."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "IhsaiOnnZs6M"
+      },
+      "source": [
+        "## Image loading\n",
+        "\n",
+        "The following image is available on the [Imagen](https://imagen.research.google/) home page. Imagen is a text-to-image diffusion model developed by Google Research's Brain team. An AI created this image based on the prompt: \"A photo of a Corgi dog riding a bike in Times Square. It is wearing sunglasses and a beach hat.\" How cool is that! You can also change the url below to any .jpg link to load in a custom image of choice. \n",
+        "\n",
+        "Start by reading in and visualizing the image. After reading a JPEG file, Matplotlib outputs a matrix, ${\\mathrm{I}}$, of shape $(m \\times n \\times 3)$ which represents a 2-dimensional image with 3 color channels for red, green and blue respectively."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "OVsZOQUAZ2C7"
+      },
+      "outputs": [],
+      "source": [
+        "img_link = \"https://imagen.research.google/main_gallery_images/a-photo-of-a-corgi-dog-riding-a-bike-in-times-square.jpg\"\n",
+        "img_path = requests.get(img_link, stream=True).raw\n",
+        "I = imread(img_path, 0)\n",
+        "print(\"Input Image Shape:\", I.shape)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Qvs7uftcZ54x"
+      },
+      "outputs": [],
+      "source": [
+        "def show_img(I):\n",
+        "  # Display the image in matplotlib\n",
+        "  img = plt.imshow(I)\n",
+        "  plt.axis('off')\n",
+        "  return"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ZbesXO3HZ6Qs"
+      },
+      "outputs": [],
+      "source": [
+        "show_img(I)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tdnUBVg_JoOa"
+      },
+      "source": [
+        "## The image compression algorithm\n",
+        "\n",
+        "Now, use the SVD to compute low-rank approximations of the sample image. Recall that the image is of shape $(1024 \\times 1024 \\times 3)$ and that the theory SVD only applies for 2-dimensional matrices. This means that the sample image has to be batched into 3 equal-size matrices that correspond to each of the 3 color channels. This can be done so by transposing the matrix to be of shape $(3 \\times 1024 \\times 1024)$. In order to clearly visualize the approximation error, rescale the RGB values of the image from $[0,255]$ to $[0,1]$. Remember to clip the approximated values to fall within this interval before visualizing them. The `tf.clip_by_value` function is useful for this."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "i7DDp0h7oSIk"
+      },
+      "outputs": [],
+      "source": [
+        "def compress_image(I, r, verbose=False):\n",
+        "  # Compress an image with the SVD given a rank \n",
+        "  I_size = tf.size(I)\n",
+        "  print(f\"Original size of image: {I_size}\")\n",
+        "  # Compute SVD of image\n",
+        "  I = tf.convert_to_tensor(I)/255\n",
+        "  I_batched = tf.transpose(I, [2, 0, 1]) # einops.rearrange(I, 'h w c -> c h w')\n",
+        "  s, U, V = tf.linalg.svd(I_batched)\n",
+        "  # Compute low-rank approximation of image across each RGB channel\n",
+        "  I_r, I_r_size = rank_r_approx(s, U, V, r)\n",
+        "  I_r = tf.transpose(I_r, [1, 2, 0]) # einops.rearrange(I_r, 'c h w -> h w c')\n",
+        "  I_r_prop = (I_r_size / I_size)\n",
+        "  if verbose:\n",
+        "    # Display compressed image and attributes\n",
+        "    print(f\"Number of singular values used in compression: {r}\")\n",
+        "    print(f\"Compressed image size: {I_r_size}\")\n",
+        "    print(f\"Proportion of original size: {I_r_prop:.3f}\")\n",
+        "    ax_1 = plt.subplot(1,2,1)\n",
+        "    show_img(tf.clip_by_value(I_r,0.,1.))\n",
+        "    ax_1.set_title(\"Approximated image\")\n",
+        "    ax_2 = plt.subplot(1,2,2)\n",
+        "    show_img(tf.clip_by_value(0.5+abs(I-I_r),0.,1.))\n",
+        "    ax_2.set_title(\"Error\")\n",
+        "  return I_r, I_r_prop"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "RGQ_rTyKDX9F"
+      },
+      "source": [
+        "Now, compute rank-r approximations for the following ranks : 100, 50, 10"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "7GlKkVLGDjre"
+      },
+      "outputs": [],
+      "source": [
+        "I_100, I_100_prop = compress_image(I, 100, verbose=True)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "XdvUkF5_E75D"
+      },
+      "outputs": [],
+      "source": [
+        "I_50, I_50_prop = compress_image(I, 50, verbose=True)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "MsCNZ8416Sbk"
+      },
+      "outputs": [],
+      "source": [
+        "I_10, I_10_prop = compress_image(I, 10, verbose=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "RfYYBhcuNkvH"
+      },
+      "source": [
+        "## Evaluating approximations\n",
+        "\n",
+        "There are a variety of interesting methods to measure the effectiveness and have more control over matrix approximations."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "D2Lotde9Zg7v"
+      },
+      "source": [
+        "### Compression factor vs rank\n",
+        "\n",
+        "For each of the above approximations, observe how the data sizes change with the rank."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "O1ariNQe6Wbl"
+      },
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(11,6))\n",
+        "plt.plot([100, 50, 10], [I_100_prop, I_50_prop, I_10_prop])\n",
+        "plt.xlabel(\"Rank\")\n",
+        "plt.ylabel(\"Proportion of original image size\")\n",
+        "plt.title(\"Compression factor vs rank\");"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dvHcLRj2QoDg"
+      },
+      "source": [
+        "Based on this plot, there is a linear relationship between an approximated image's compression factor and its rank. To explore this further, recall that the data size of an approximated matrix, ${\\mathrm{A}}_r$, is defined as the total number of elements required for its computation. The following equations can be used to find the relationship between compression factor and rank:\n",
+        "\n",
+        "$$x = (m \\times r) + r + (r \\times n) = r \\times (m + n + 1)$$\n",
+        "\n",
+        "$$c = \\large \\frac{x}{y} = \\frac{r \\times (m + n + 1)}{m \\times n}$$\n",
+        "\n",
+        "where\n",
+        "\n",
+        "* $x$: size of ${\\mathrm{A_r}}$\n",
+        "* $y$: size of ${\\mathrm{A}}$\n",
+        "* $c = \\frac{x}{y}$: compression factor\n",
+        "* $r$: rank of the approximation\n",
+        "* $m$ and $n$: row and column dimensions of ${\\mathrm{A}}$\n",
+        "\n",
+        "In order to find the rank, $r$, that is necessary to compress an image to a desired factor, $c$, the above equation can be rearranged to solve for $r$:\n",
+        "\n",
+        "$$r = ⌊{\\large\\frac{c \\times m \\times n}{m + n + 1}}⌋$$\n",
+        "\n",
+        "Note that this formula is independent of the color channel dimension since each of the RGB approximations do not affect each other. Now, write a function to compress an input image given a desired compression factor."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "viVO-I60QynI"
+      },
+      "outputs": [],
+      "source": [
+        "def compress_image_with_factor(I, compression_factor, verbose=False):\n",
+        "  # Returns a compressed image based on a desired compression factor\n",
+        "  m,n,o = I.shape\n",
+        "  r = int((compression_factor * m * n)/(m + n + 1))\n",
+        "  I_r, I_r_prop = compress_image(I, r, verbose=verbose)\n",
+        "  return I_r"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gWSv58J6LSRQ"
+      },
+      "source": [
+        "Compress an image to 15% of its original size."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "HVeeloIwQ1b6"
+      },
+      "outputs": [],
+      "source": [
+        "compression_factor = 0.15\n",
+        "I_r_img = compress_image_with_factor(I, compression_factor, verbose=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "LkeRyms7jZMd"
+      },
+      "source": [
+        "### Cumulative sum of singular values\n",
+        "\n",
+        "The cumulative sum of singular values can be a useful indicator for the amount of energy captured by a rank-r approximation. Visualize the RGB-averaged cumulative proportion of singular values in the sample image. The `tf.cumsum` function can be useful for this."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "CteJ6VbKlndu"
+      },
+      "outputs": [],
+      "source": [
+        "def viz_energy(I):\n",
+        "  # Visualize the energy captured based on rank\n",
+        "  # Computing SVD\n",
+        "  I = tf.convert_to_tensor(I)/255\n",
+        "  I_batched = tf.transpose(I, [2, 0, 1]) \n",
+        "  s, U, V = tf.linalg.svd(I_batched)\n",
+        "  # Plotting average proportion across RGB channels \n",
+        "  props_rgb = tf.map_fn(lambda x: tf.cumsum(x)/tf.reduce_sum(x), s)\n",
+        "  props_rgb_mean = tf.reduce_mean(props_rgb, axis=0)\n",
+        "  plt.figure(figsize=(11,6))\n",
+        "  plt.plot(range(len(I)), props_rgb_mean, color='k')\n",
+        "  plt.xlabel(\"Rank / singular value number\")\n",
+        "  plt.ylabel(\"Cumulative proportion of singular values\")\n",
+        "  plt.title(\"RGB-averaged proportion of energy captured by the first 'r' singular values\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Vl9PKow-GgCp"
+      },
+      "outputs": [],
+      "source": [
+        "viz_energy(I)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vQtwimKuQP19"
+      },
+      "source": [
+        "It looks like over 90% of the energy in this image is captured within the first 100 singular values. Now, write a function to compress an input image given a desired energy retention factor."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "fum5Cvm7R5vH"
+      },
+      "outputs": [],
+      "source": [
+        "def compress_image_with_energy(I, energy_factor, verbose=False):\n",
+        "  # Returns a compressed image based on a desired energy factor\n",
+        "  # Computing SVD\n",
+        "  I_rescaled = tf.convert_to_tensor(I)/255\n",
+        "  I_batched = tf.transpose(I_rescaled, [2, 0, 1]) \n",
+        "  s, U, V = tf.linalg.svd(I_batched)\n",
+        "  # Extracting singular values\n",
+        "  props_rgb = tf.map_fn(lambda x: tf.cumsum(x)/tf.reduce_sum(x), s)\n",
+        "  props_rgb_mean = tf.reduce_mean(props_rgb, axis=0)\n",
+        "  # Find closest r that corresponds to the energy factor\n",
+        "  r = tf.argmin(tf.abs(props_rgb_mean - energy_factor)) + 1\n",
+        "  actual_ef = props_rgb_mean[r]\n",
+        "  I_r, I_r_prop = compress_image(I, r, verbose=verbose)\n",
+        "  print(f\"Proportion of energy captured by the first {r} singular values: {actual_ef:.3f}\")\n",
+        "  return I_r"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Y_rChG0OLby1"
+      },
+      "source": [
+        "Compress an image to retain 75% of its energy."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "xDXBaZQ4c5jF"
+      },
+      "outputs": [],
+      "source": [
+        "energy_factor = 0.75\n",
+        "I_r_img = compress_image_with_energy(I, energy_factor, verbose=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2tmqTW0CYX-v"
+      },
+      "source": [
+        "### Error and singular values\n",
+        "\n",
+        "There is also an interesting relationship between the approximation error and the singular values. It turns out that the squared Frobenius norm of the approximation is equal to the sum of the squares of its singular values that were left out:\n",
+        "\n",
+        "$${||A - A_r||}^2 = \\sum_{i=r+1}^{R}σ_i^2$$\n",
+        "\n",
+        "Test out this relationship with a rank-10 approximation of the example matrix in the beginning of this tutorial."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "hctOvN8BckiS"
+      },
+      "outputs": [],
+      "source": [
+        "s, U, V = tf.linalg.svd(A)\n",
+        "A_10, A_10_size = rank_r_approx(s, U, V, 10)\n",
+        "squared_norm = tf.norm(A - A_10)**2\n",
+        "s_squared_sum = tf.reduce_sum(s[10:]**2)\n",
+        "print(f\"Squared Frobenius norm: {squared_norm:.3f}\")\n",
+        "print(f\"Sum of squared singular values left out: {s_squared_sum:.3f}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vgGQuV-yqYZH"
+      },
+      "source": [
+        "## Conclusion\n",
+        "\n",
+        "This notebook introduced the process of implementing the singular value decomposition with TensorFlow and applying it to write an image compression algorithm. Here are a few more tips that may help:\n",
+        "\n",
+        "*   The [TensorFlow Core APIs](https://www.tensorflow.org/guide/core) can be utilized for a variety of high-performance scientific computing use cases.\n",
+        "*   To learn more about TensorFlow's linear algebra functionalities, visit the docs for the [linalg module](https://www.tensorflow.org/api_docs/python/tf/linalg).\n",
+        "*   The SVD can also be applied to build [recommendation systems](https://developers.google.com/machine-learning/recommendation/labs/movie-rec-programming-exercise).\n",
+        "\n",
+        "\n",
+        "For more examples of using the TensorFlow Core APIs, check out the [guide](https://www.tensorflow.org/guide/core). If you want learn more about loading and preparing data, see the tutorials on [image data loading](https://www.tensorflow.org/tutorials/load_data/images) or [CSV data loading](https://www.tensorflow.org/tutorials/load_data/csv)."
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "matrix_core.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/guide/core/mlp_core.ipynb b/site/en/guide/core/mlp_core.ipynb
new file mode 100644
index 00000000000..a5975c20c6e
--- /dev/null
+++ b/site/en/guide/core/mlp_core.ipynb
@@ -0,0 +1,964 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "FhGuhbZ6M5tl"
+      },
+      "source": [
+        "##### Copyright 2022 The TensorFlow Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "AwOEIRJC6Une"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "EIdT9iu_Z4Rb"
+      },
+      "source": [
+        "# Multilayer perceptrons for digit recognition with Core APIs"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bBIlTPscrIT9"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/guide/core/mlp_core\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/guide/core/mlp_core.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/guide/core/mlp_core.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/guide/core/mlp_core.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "SjAxxRpBzVYg"
+      },
+      "source": [
+        "This notebook uses the [TensorFlow Core low-level APIs](https://www.tensorflow.org/guide/core) to build an end-to-end machine learning workflow for handwritten digit classification with [multilayer perceptrons](https://developers.google.com/machine-learning/crash-course/introduction-to-neural-networks/anatomy) and the [MNIST dataset](http://yann.lecun.com/exdb/mnist). Visit the [Core APIs overview](https://www.tensorflow.org/guide/core) to learn more about TensorFlow Core and its intended use cases."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GHVMVIFHSzl1"
+      },
+      "source": [
+        "## Multilayer perceptron (MLP) overview\n",
+        "\n",
+        "The Multilayer Perceptron (MLP) is a type of feedforward neural network used to approach [multiclass classification](https://developers.google.com/machine-learning/crash-course/multi-class-neural-networks/video-lecture) problems. Before building an MLP, it is crucial to understand the concepts of perceptrons, layers, and activation functions.\n",
+        "\n",
+        "Multilayer Perceptrons are made up of functional units called perceptrons. The equation of a  perceptron is as follows:\n",
+        "\n",
+        "$$Z = \\vec{w}⋅\\mathrm{X} + b$$\n",
+        "\n",
+        "where\n",
+        "\n",
+        "* $Z$: perceptron output\n",
+        "* $\\mathrm{X}$: feature matrix\n",
+        "* $\\vec{w}$: weight vector\n",
+        "* $b$: bias\n",
+        "\n",
+        "When these perceptrons are stacked, they form structures called dense layers which can then be connected to build a neural network. A dense layer's equation is similar to that of a perceptron's but uses a weight matrix and a bias vector instead: \n",
+        "\n",
+        "$$Z = \\mathrm{W}⋅\\mathrm{X} + \\vec{b}$$\n",
+        "\n",
+        "where\n",
+        "\n",
+        "* $Z$: dense layer output\n",
+        "* $\\mathrm{X}$: feature matrix\n",
+        "* $\\mathrm{W}$: weight matrix\n",
+        "* $\\vec{b}$: bias vector\n",
+        "\n",
+        "\n",
+        "In an MLP, multiple dense layers are connected in such a way that the outputs of one layer are fully connected to the inputs of the next layer. Adding non-linear activation functions to the outputs of dense layers can help the MLP classifier learn complex decision boundaries and generalize well to unseen data."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nchsZfwEVtVs"
+      },
+      "source": [
+        "## Setup\n",
+        "\n",
+        "Import TensorFlow, [pandas](https://pandas.pydata.org), [Matplotlib](https://matplotlib.org) and [seaborn](https://seaborn.pydata.org) to get started."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "mSfgqmwBagw_"
+      },
+      "outputs": [],
+      "source": [
+        "# Use seaborn for countplot.\n",
+        "!pip install -q seaborn"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "1rRo8oNqZ-Rj"
+      },
+      "outputs": [],
+      "source": [
+        "import pandas as pd\n",
+        "import matplotlib\n",
+        "from matplotlib import pyplot as plt\n",
+        "import seaborn as sns\n",
+        "import tempfile\n",
+        "import os\n",
+        "# Preset Matplotlib figure sizes.\n",
+        "matplotlib.rcParams['figure.figsize'] = [9, 6]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9xQKvCJ85kCQ"
+      },
+      "outputs": [],
+      "source": [
+        "import tensorflow as tf\n",
+        "import tensorflow_datasets as tfds\n",
+        "print(tf.__version__)\n",
+        "# Set random seed for reproducible results \n",
+        "tf.random.set_seed(22)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "F_72b0LCNbjx"
+      },
+      "source": [
+        "## Load the data\n",
+        "\n",
+        "This tutorial uses the [MNIST dataset](http://yann.lecun.com/exdb/mnist), and demonstrates how to build an MLP model that can classify handwritten digits. The dataset is available from [TensorFlow Datasets](https://www.tensorflow.org/datasets/catalog/mnist).\n",
+        "\n",
+        "Split the MNIST dataset into training, validation, and testing sets. The validation set can be used to gauge the model's generalizability during training so that the test set can serve as a final unbiased estimator for the model's performance.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Uiuh0B098_3p"
+      },
+      "outputs": [],
+      "source": [
+        "train_data, val_data, test_data = tfds.load(\"mnist\", \n",
+        "                                            split=['train[10000:]', 'train[0:10000]', 'test'],\n",
+        "                                            batch_size=128, as_supervised=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "X9uN3Lf6ANtn"
+      },
+      "source": [
+        "The MNIST dataset consists of handwritten digits and their corresponding true labels. Visualize a couple of examples below."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "6V8hSqJ7AMjQ"
+      },
+      "outputs": [],
+      "source": [
+        "x_viz, y_viz = tfds.load(\"mnist\", split=['train[:1500]'], batch_size=-1, as_supervised=True)[0]\n",
+        "x_viz = tf.squeeze(x_viz, axis=3)\n",
+        "\n",
+        "for i in range(9):\n",
+        "    plt.subplot(3,3,1+i)\n",
+        "    plt.axis('off')\n",
+        "    plt.imshow(x_viz[i], cmap='gray')\n",
+        "    plt.title(f\"True Label: {y_viz[i]}\")\n",
+        "    plt.subplots_adjust(hspace=.5)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bRald9dSE4qS"
+      },
+      "source": [
+        "Also review the distribution of digits in the training data to verify that each class is well represented in the dataset.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Rj3K4XgQE7qR"
+      },
+      "outputs": [],
+      "source": [
+        "sns.countplot(x=y_viz.numpy());\n",
+        "plt.xlabel('Digits')\n",
+        "plt.title(\"MNIST Digit Distribution\");"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "x_Wt4bDx_BRV"
+      },
+      "source": [
+        "## Preprocess the data\n",
+        "\n",
+        "First, reshape the feature matrices to be 2-dimensional by flattening the images. Next, rescale the data so that the pixel values of [0,255] fit into the range of [0,1]. This step ensures that the input pixels have similar distributions and helps with training convergence."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "JSyCm2V2_AvI"
+      },
+      "outputs": [],
+      "source": [
+        "def preprocess(x, y):\n",
+        "  # Reshaping the data\n",
+        "  x = tf.reshape(x, shape=[-1, 784])\n",
+        "  # Rescaling the data\n",
+        "  x = x/255\n",
+        "  return x, y\n",
+        "\n",
+        "train_data, val_data = train_data.map(preprocess), val_data.map(preprocess)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6o3CrycBXA2s"
+      },
+      "source": [
+        "## Build the MLP \n",
+        "\n",
+        "Start by visualizing the [ReLU](https://developers.google.com/machine-learning/glossary#ReLU) and [Softmax](https://developers.google.com/machine-learning/glossary#softmax) activation functions. Both functions are available in `tf.nn.relu` and `tf.nn.softmax` respectively. The ReLU is a non-linear activation function that outputs the input if it is positive and 0 otherwise: \n",
+        "\n",
+        "$$\\text{ReLU}(X) = max(0, X)$$"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "hYunzt3UyT9G"
+      },
+      "outputs": [],
+      "source": [
+        "x = tf.linspace(-2, 2, 201)\n",
+        "x = tf.cast(x, tf.float32)\n",
+        "plt.plot(x, tf.nn.relu(x));\n",
+        "plt.xlabel('x')\n",
+        "plt.ylabel('ReLU(x)')\n",
+        "plt.title('ReLU activation function');"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fuGrM9jMwsRM"
+      },
+      "source": [
+        "The softmax activation function is a normalized exponential function that converts $m$ real numbers into a probability distribution with $m$ outcomes/classes. This is useful for predicting class probabilities from a neural network's output:\n",
+        "\n",
+        "$$\\text{Softmax}(X) = \\frac{e^{X}}{\\sum_{i=1}^{m}e^{X_i}}$$"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "fVM8pvhWwuwI"
+      },
+      "outputs": [],
+      "source": [
+        "x = tf.linspace(-4, 4, 201)\n",
+        "x = tf.cast(x, tf.float32)\n",
+        "plt.plot(x, tf.nn.softmax(x, axis=0));\n",
+        "plt.xlabel('x')\n",
+        "plt.ylabel('Softmax(x)')\n",
+        "plt.title('Softmax activation function');"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "OHW6Yvg2yS6H"
+      },
+      "source": [
+        "### The dense layer\n",
+        "\n",
+        "Create a class for the dense layer. By definition, the outputs of one layer are fully connected to the inputs of the next layer in an MLP. Therefore, the input dimension for a dense layer can be inferred based on the output dimension of its previous layer and does not need to be specified upfront during its initialization. The weights should also be initialized properly to prevent activation outputs from becoming too large or small. One of the most popular weight initialization methods is the Xavier scheme, where each element of the weight matrix is sampled in the following manner:\n",
+        "\n",
+        "$$W_{ij} \\sim \\text{Uniform}(-\\frac{\\sqrt{6}}{\\sqrt{n + m}},\\frac{\\sqrt{6}}{\\sqrt{n + m}})$$\n",
+        "\n",
+        "The bias vector can be initialized to zeros."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "re1SSFyBdMrS"
+      },
+      "outputs": [],
+      "source": [
+        "def xavier_init(shape):\n",
+        "  # Computes the xavier initialization values for a weight matrix\n",
+        "  in_dim, out_dim = shape\n",
+        "  xavier_lim = tf.sqrt(6.)/tf.sqrt(tf.cast(in_dim + out_dim, tf.float32))\n",
+        "  weight_vals = tf.random.uniform(shape=(in_dim, out_dim), \n",
+        "                                  minval=-xavier_lim, maxval=xavier_lim, seed=22)\n",
+        "  return weight_vals"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "otDFX4u6e6ml"
+      },
+      "source": [
+        "The Xavier initialization method can also be implemented with `tf.keras.initializers.GlorotUniform`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "IM0yJos25FG5"
+      },
+      "outputs": [],
+      "source": [
+        "class DenseLayer(tf.Module):\n",
+        "\n",
+        "  def __init__(self, out_dim, weight_init=xavier_init, activation=tf.identity):\n",
+        "    # Initialize the dimensions and activation functions\n",
+        "    self.out_dim = out_dim\n",
+        "    self.weight_init = weight_init\n",
+        "    self.activation = activation\n",
+        "    self.built = False\n",
+        "\n",
+        "  def __call__(self, x):\n",
+        "    if not self.built:\n",
+        "      # Infer the input dimension based on first call\n",
+        "      self.in_dim = x.shape[1]\n",
+        "      # Initialize the weights and biases\n",
+        "      self.w = tf.Variable(self.weight_init(shape=(self.in_dim, self.out_dim)))\n",
+        "      self.b = tf.Variable(tf.zeros(shape=(self.out_dim,)))\n",
+        "      self.built = True\n",
+        "    # Compute the forward pass\n",
+        "    z = tf.add(tf.matmul(x, self.w), self.b)\n",
+        "    return self.activation(z)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "X-7MzpjgyHg6"
+      },
+      "source": [
+        "Next, build a class for the MLP model that executes layers sequentially.\n",
+        "Remember that the model variables are only available after the first sequence of dense layer calls due to dimension inference."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "6XisRWiCyHAb"
+      },
+      "outputs": [],
+      "source": [
+        "class MLP(tf.Module):\n",
+        "\n",
+        "  def __init__(self, layers):\n",
+        "    self.layers = layers\n",
+        "   \n",
+        "  @tf.function\n",
+        "  def __call__(self, x, preds=False): \n",
+        "    # Execute the model's layers sequentially\n",
+        "    for layer in self.layers:\n",
+        "      x = layer(x)\n",
+        "    return x"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "luXKup-43nd7"
+      },
+      "source": [
+        "Initialize a MLP model with the following architecture:\n",
+        "\n",
+        "- Forward Pass: ReLU(784 x 700) x ReLU(700 x 500) x Softmax(500 x 10)\n",
+        "\n",
+        "The softmax activation function does not need to be applied by the MLP. It is computed separately in the loss and prediction functions."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "VmlACuki3oPi"
+      },
+      "outputs": [],
+      "source": [
+        "hidden_layer_1_size = 700\n",
+        "hidden_layer_2_size = 500\n",
+        "output_size = 10\n",
+        "\n",
+        "mlp_model = MLP([\n",
+        "    DenseLayer(out_dim=hidden_layer_1_size, activation=tf.nn.relu),\n",
+        "    DenseLayer(out_dim=hidden_layer_2_size, activation=tf.nn.relu),\n",
+        "    DenseLayer(out_dim=output_size)])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tyBATDoRmDkg"
+      },
+      "source": [
+        "### Define the loss function\n",
+        "\n",
+        "The cross-entropy loss function is a great choice for multiclass classification problems since it measures the negative-log-likelihood of the data according to the model's probability predictions. The higher the probability assigned to the true class, the lower the loss. The equation for the cross-entropy loss is as follows:\n",
+        "\n",
+        "$$L = -\\frac{1}{n}\\sum_{i=1}^{n}\\sum_{i=j}^{n} {y_j}^{[i]}⋅\\log(\\hat{{y_j}}^{[i]})$$\n",
+        "\n",
+        "where\n",
+        "\n",
+        "* $\\underset{n\\times m}{\\hat{y}}$: a matrix of predicted class distributions\n",
+        "* $\\underset{n\\times m}{y}$: a one hot encoded matrix of true classes\n",
+        "\n",
+        "The `tf.nn.sparse_softmax_cross_entropy_with_logits` function can be used to compute the cross-entropy loss. This function does not require the model's last layer to apply the softmax activation function nor does it require the class labels to be one hot encoded"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "rskOYA7FVCwg"
+      },
+      "outputs": [],
+      "source": [
+        "def cross_entropy_loss(y_pred, y):\n",
+        "  # Compute cross entropy loss with a sparse operation\n",
+        "  sparse_ce = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=y_pred)\n",
+        "  return tf.reduce_mean(sparse_ce)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "BvWxED1km8jh"
+      },
+      "source": [
+        "Write a basic accuracy function that calculates the proportion of correct classifications during training. In order to generate class predictions from softmax outputs, return the index that corresponds to the largest class probability. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "jPJMWx2UgiBm"
+      },
+      "outputs": [],
+      "source": [
+        "def accuracy(y_pred, y):\n",
+        "  # Compute accuracy after extracting class predictions\n",
+        "  class_preds = tf.argmax(tf.nn.softmax(y_pred), axis=1)\n",
+        "  is_equal = tf.equal(y, class_preds)\n",
+        "  return tf.reduce_mean(tf.cast(is_equal, tf.float32))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JSiNRhTOnKZr"
+      },
+      "source": [
+        "### Train the model\n",
+        "\n",
+        "Using an optimizer can result in significantly faster convergence compared to standard gradient descent. The Adam optimizer is implemented below. Visit the [Optimizers](https://www.tensorflow.org/guide/core/optimizers_core) guide to learn more about designing custom optimizers with TensorFlow Core."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "iGIBDk3cAv6a"
+      },
+      "outputs": [],
+      "source": [
+        "class Adam:\n",
+        "\n",
+        "    def __init__(self, learning_rate=1e-3, beta_1=0.9, beta_2=0.999, ep=1e-7):\n",
+        "      # Initialize optimizer parameters and variable slots\n",
+        "      self.beta_1 = beta_1\n",
+        "      self.beta_2 = beta_2\n",
+        "      self.learning_rate = learning_rate\n",
+        "      self.ep = ep\n",
+        "      self.t = 1.\n",
+        "      self.v_dvar, self.s_dvar = [], []\n",
+        "      self.built = False\n",
+        "      \n",
+        "    def apply_gradients(self, grads, vars):\n",
+        "      # Initialize variables on the first call\n",
+        "      if not self.built:\n",
+        "        for var in vars:\n",
+        "          v = tf.Variable(tf.zeros(shape=var.shape))\n",
+        "          s = tf.Variable(tf.zeros(shape=var.shape))\n",
+        "          self.v_dvar.append(v)\n",
+        "          self.s_dvar.append(s)\n",
+        "        self.built = True\n",
+        "      # Update the model variables given their gradients\n",
+        "      for i, (d_var, var) in enumerate(zip(grads, vars)):\n",
+        "        self.v_dvar[i].assign(self.beta_1*self.v_dvar[i] + (1-self.beta_1)*d_var)\n",
+        "        self.s_dvar[i].assign(self.beta_2*self.s_dvar[i] + (1-self.beta_2)*tf.square(d_var))\n",
+        "        v_dvar_bc = self.v_dvar[i]/(1-(self.beta_1**self.t))\n",
+        "        s_dvar_bc = self.s_dvar[i]/(1-(self.beta_2**self.t))\n",
+        "        var.assign_sub(self.learning_rate*(v_dvar_bc/(tf.sqrt(s_dvar_bc) + self.ep)))\n",
+        "      self.t += 1.\n",
+        "      return "
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "osEK3rqpYfKd"
+      },
+      "source": [
+        "Now, write a custom training loop that updates the MLP parameters with mini-batch gradient descent. Using mini-batches for training provides both memory efficiency and faster convergence."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "CJLeY2ao1aw6"
+      },
+      "outputs": [],
+      "source": [
+        "def train_step(x_batch, y_batch, loss, acc, model, optimizer):\n",
+        "  # Update the model state given a batch of data\n",
+        "  with tf.GradientTape() as tape:\n",
+        "    y_pred = model(x_batch)\n",
+        "    batch_loss = loss(y_pred, y_batch)\n",
+        "  batch_acc = acc(y_pred, y_batch)\n",
+        "  grads = tape.gradient(batch_loss, model.variables)\n",
+        "  optimizer.apply_gradients(grads, model.variables)\n",
+        "  return batch_loss, batch_acc\n",
+        "\n",
+        "def val_step(x_batch, y_batch, loss, acc, model):\n",
+        "  # Evaluate the model on given a batch of validation data\n",
+        "  y_pred = model(x_batch)\n",
+        "  batch_loss = loss(y_pred, y_batch)\n",
+        "  batch_acc = acc(y_pred, y_batch)\n",
+        "  return batch_loss, batch_acc"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "oC85kuZgmh3q"
+      },
+      "outputs": [],
+      "source": [
+        "def train_model(mlp, train_data, val_data, loss, acc, optimizer, epochs):\n",
+        "  # Initialize data structures\n",
+        "  train_losses, train_accs = [], []\n",
+        "  val_losses, val_accs = [], []\n",
+        "\n",
+        "  # Format training loop and begin training\n",
+        "  for epoch in range(epochs):\n",
+        "    batch_losses_train, batch_accs_train = [], []\n",
+        "    batch_losses_val, batch_accs_val = [], []\n",
+        "\n",
+        "    # Iterate over the training data\n",
+        "    for x_batch, y_batch in train_data:\n",
+        "      # Compute gradients and update the model's parameters\n",
+        "      batch_loss, batch_acc = train_step(x_batch, y_batch, loss, acc, mlp, optimizer)\n",
+        "      # Keep track of batch-level training performance\n",
+        "      batch_losses_train.append(batch_loss)\n",
+        "      batch_accs_train.append(batch_acc)\n",
+        "\n",
+        "    # Iterate over the validation data\n",
+        "    for x_batch, y_batch in val_data:\n",
+        "      batch_loss, batch_acc = val_step(x_batch, y_batch, loss, acc, mlp)\n",
+        "      batch_losses_val.append(batch_loss)\n",
+        "      batch_accs_val.append(batch_acc)\n",
+        "\n",
+        "    # Keep track of epoch-level model performance\n",
+        "    train_loss, train_acc = tf.reduce_mean(batch_losses_train), tf.reduce_mean(batch_accs_train)\n",
+        "    val_loss, val_acc = tf.reduce_mean(batch_losses_val), tf.reduce_mean(batch_accs_val)\n",
+        "    train_losses.append(train_loss)\n",
+        "    train_accs.append(train_acc)\n",
+        "    val_losses.append(val_loss)\n",
+        "    val_accs.append(val_acc)\n",
+        "    print(f\"Epoch: {epoch}\")\n",
+        "    print(f\"Training loss: {train_loss:.3f}, Training accuracy: {train_acc:.3f}\")\n",
+        "    print(f\"Validation loss: {val_loss:.3f}, Validation accuracy: {val_acc:.3f}\")\n",
+        "  return train_losses, train_accs, val_losses, val_accs"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "FvbfXlN5lwwB"
+      },
+      "source": [
+        "Train the MLP model for 10 epochs with batch size of 128. Hardware accelerators like GPUs or TPUs can also help speed up training time. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "zPlT8QfxptYl"
+      },
+      "outputs": [],
+      "source": [
+        "train_losses, train_accs, val_losses, val_accs = train_model(mlp_model, train_data, val_data, \n",
+        "                                                             loss=cross_entropy_loss, acc=accuracy,\n",
+        "                                                             optimizer=Adam(), epochs=10)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "j_RVmt43G12R"
+      },
+      "source": [
+        "### Performance evaluation\n",
+        "\n",
+        "Start by writing a plotting function to visualize the model's loss and accuracy during training. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "VXTCYVtNDjAM"
+      },
+      "outputs": [],
+      "source": [
+        "def plot_metrics(train_metric, val_metric, metric_type):\n",
+        "  # Visualize metrics vs training Epochs\n",
+        "  plt.figure()\n",
+        "  plt.plot(range(len(train_metric)), train_metric, label = f\"Training {metric_type}\")\n",
+        "  plt.plot(range(len(val_metric)), val_metric, label = f\"Validation {metric_type}\")\n",
+        "  plt.xlabel(\"Epochs\")\n",
+        "  plt.ylabel(metric_type)\n",
+        "  plt.legend()\n",
+        "  plt.title(f\"{metric_type} vs Training epochs\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "DC-qIvZbHo0G"
+      },
+      "outputs": [],
+      "source": [
+        "plot_metrics(train_losses, val_losses, \"cross entropy loss\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "P-w2xk2PIDve"
+      },
+      "outputs": [],
+      "source": [
+        "plot_metrics(train_accs, val_accs, \"accuracy\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tbrJJaFrD_XR"
+      },
+      "source": [
+        "## Save and load the model\n",
+        "\n",
+        "Start by making an export module that takes in raw data and performs the following operations:\n",
+        "- Data preprocessing \n",
+        "- Probability prediction\n",
+        "- Class prediction"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "1sszfWuJJZoo"
+      },
+      "outputs": [],
+      "source": [
+        "class ExportModule(tf.Module):\n",
+        "  def __init__(self, model, preprocess, class_pred):\n",
+        "    # Initialize pre and postprocessing functions\n",
+        "    self.model = model\n",
+        "    self.preprocess = preprocess\n",
+        "    self.class_pred = class_pred\n",
+        "\n",
+        "  @tf.function(input_signature=[tf.TensorSpec(shape=[None, None, None, None], dtype=tf.uint8)]) \n",
+        "  def __call__(self, x):\n",
+        "    # Run the ExportModule for new data points\n",
+        "    x = self.preprocess(x)\n",
+        "    y = self.model(x)\n",
+        "    y = self.class_pred(y)\n",
+        "    return y "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "p8x6gjTDVi5d"
+      },
+      "outputs": [],
+      "source": [
+        "def preprocess_test(x):\n",
+        "  # The export module takes in unprocessed and unlabeled data\n",
+        "  x = tf.reshape(x, shape=[-1, 784])\n",
+        "  x = x/255\n",
+        "  return x\n",
+        "\n",
+        "def class_pred_test(y):\n",
+        "  # Generate class predictions from MLP output\n",
+        "  return tf.argmax(tf.nn.softmax(y), axis=1)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vu9H5STrJzdo"
+      },
+      "source": [
+        "This export module can now be saved with the `tf.saved_model.save` function. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "fN9pPBQTKTe3"
+      },
+      "outputs": [],
+      "source": [
+        "mlp_model_export = ExportModule(model=mlp_model,\n",
+        "                                preprocess=preprocess_test,\n",
+        "                                class_pred=class_pred_test)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "idS7rQKbKwRS"
+      },
+      "outputs": [],
+      "source": [
+        "models = tempfile.mkdtemp()\n",
+        "save_path = os.path.join(models, 'mlp_model_export')\n",
+        "tf.saved_model.save(mlp_model_export, save_path)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_zZxO8iqBGZ-"
+      },
+      "source": [
+        "Load the saved model with `tf.saved_model.load` and examine its performance on the unseen test data."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "W5cwBTUqxldW"
+      },
+      "outputs": [],
+      "source": [
+        "mlp_loaded = tf.saved_model.load(save_path)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "bmv0u6j_b5OC"
+      },
+      "outputs": [],
+      "source": [
+        "def accuracy_score(y_pred, y):\n",
+        "  # Generic accuracy function\n",
+        "  is_equal = tf.equal(y_pred, y)\n",
+        "  return tf.reduce_mean(tf.cast(is_equal, tf.float32))\n",
+        "\n",
+        "x_test, y_test = tfds.load(\"mnist\", split=['test'], batch_size=-1, as_supervised=True)[0]\n",
+        "test_classes = mlp_loaded(x_test)\n",
+        "test_acc = accuracy_score(test_classes, y_test)\n",
+        "print(f\"Test Accuracy: {test_acc:.3f}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "j5t9vgv_ciQ_"
+      },
+      "source": [
+        "The model does a great job of classifying handwritten digits in the training dataset and also generalizes well to unseen data. Now, examine the model's class-wise accuracy to ensure good performance for each digit. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "UD8YiC1Vfeyp"
+      },
+      "outputs": [],
+      "source": [
+        "print(\"Accuracy breakdown by digit:\")\n",
+        "print(\"---------------------------\")\n",
+        "label_accs = {}\n",
+        "for label in range(10):\n",
+        "  label_ind = (y_test == label)\n",
+        "  # extract predictions for specific true label\n",
+        "  pred_label = test_classes[label_ind]\n",
+        "  labels = y_test[label_ind]\n",
+        "  # compute class-wise accuracy\n",
+        "  label_accs[accuracy_score(pred_label, labels).numpy()] = label\n",
+        "for key in sorted(label_accs):\n",
+        "  print(f\"Digit {label_accs[key]}: {key:.3f}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rcykuJFhdGb0"
+      },
+      "source": [
+        "It looks like the model struggles with some digits a little more than others which is quite common in many multiclass classification problems. As a final exercise, plot a confusion matrix of the model's predictions and its corresponding true labels to gather more class-level insights. Sklearn and seaborn have functions for generating and visualizing confusion matrices. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "JqCaqPwwh1tN"
+      },
+      "outputs": [],
+      "source": [
+        "import sklearn.metrics as sk_metrics\n",
+        "\n",
+        "def show_confusion_matrix(test_labels, test_classes):\n",
+        "  # Compute confusion matrix and normalize\n",
+        "  plt.figure(figsize=(10,10))\n",
+        "  confusion = sk_metrics.confusion_matrix(test_labels.numpy(), \n",
+        "                                          test_classes.numpy())\n",
+        "  confusion_normalized = confusion / confusion.sum(axis=1, keepdims=True)\n",
+        "  axis_labels = range(10)\n",
+        "  ax = sns.heatmap(\n",
+        "      confusion_normalized, xticklabels=axis_labels, yticklabels=axis_labels,\n",
+        "      cmap='Blues', annot=True, fmt='.4f', square=True)\n",
+        "  plt.title(\"Confusion matrix\")\n",
+        "  plt.ylabel(\"True label\")\n",
+        "  plt.xlabel(\"Predicted label\")\n",
+        "\n",
+        "show_confusion_matrix(y_test, test_classes)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JT-WA7GVda6d"
+      },
+      "source": [
+        "Class-level insights can help identify reasons for misclassifications and improve model performance in future training cycles."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "VFLfEH4ManbW"
+      },
+      "source": [
+        "## Conclusion\n",
+        "\n",
+        "This notebook introduced a few techniques to handle a multiclass classification problem with an [MLP](https://developers.google.com/machine-learning/crash-course/multi-class-neural-networks/softmax). Here are a few more tips that may help:\n",
+        "\n",
+        "- The [TensorFlow Core APIs](https://www.tensorflow.org/guide/core) can be used to build machine learning workflows with high levels of configurability\n",
+        "- Initialization schemes can help prevent model parameters from vanishing or exploding during training.\n",
+        "- Overfitting is another common problem for neural networks, though it wasn't a problem for this tutorial. Visit the [Overfit and underfit](overfit_and_underfit.ipynb) tutorial for more help with this.\n",
+        "\n",
+        "For more examples of using the TensorFlow Core APIs, check out the [guide](https://www.tensorflow.org/guide/core). If you want to learn more about loading and preparing data, see the tutorials on [image data loading](https://www.tensorflow.org/tutorials/load_data/images) or [CSV data loading](https://www.tensorflow.org/tutorials/load_data/csv)."
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [
+        "FhGuhbZ6M5tl"
+      ],
+      "name": "mlp_core.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/guide/core/optimizers_core.ipynb b/site/en/guide/core/optimizers_core.ipynb
new file mode 100644
index 00000000000..e22f0327419
--- /dev/null
+++ b/site/en/guide/core/optimizers_core.ipynb
@@ -0,0 +1,612 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "FhGuhbZ6M5tl"
+      },
+      "source": [
+        "##### Copyright 2022 The TensorFlow Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "AwOEIRJC6Une"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "EIdT9iu_Z4Rb"
+      },
+      "source": [
+        "# Optimizers with Core APIs"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bBIlTPscrIT9"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/guide/core/optimizers_core\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/guide/core/optimizers_core.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/guide/core/optimizers_core.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/guide/core/optimizers_core.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "SjAxxRpBzVYg"
+      },
+      "source": [
+        "## Introduction\n",
+        "\n",
+        "This notebook introduces the process of creating custom optimizers with the [TensorFlow Core low-level APIs](https://www.tensorflow.org/guide/core). Visit the [Core APIs overview](https://www.tensorflow.org/guide/core) to learn more about TensorFlow Core and its intended use cases. \n",
+        "\n",
+        "The [Keras optimizers](https://www.tensorflow.org/api_docs/python/tf/keras/optimizers) module is the recommended optimization toolkit for many general training purposes. It includes a variety of prebuilt optimiziers as well as subclassing functionality for customization. The Keras optimizers are also compatible with custom layers, models, and training loops built with the Core APIs. These prebuilt and customizable optimizers are suitable for most cases, but the Core APIs allow for complete control over the optimization process. For example, techniques such as Sharpness-Aware Minimization (SAM) require the model and optimizer to be coupled, which does not fit the traditional definition of ML optimizers. This guide walks through the process of building custom optimizers from scratch with the Core APIs, giving you the power to have full control over the structure, implementation, and behavior of your optimizers."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nBmqYyodNRd_"
+      },
+      "source": [
+        "## Optimizers overview\n",
+        "\n",
+        "An optimizer is an algorithm used to minimize a loss function with respect to a model's trainable parameters. The most straightforward optimization technique is gradient descent, which iteratively updates a model's parameters by taking a step in the direction of its loss function's steepest descent. Its step size is directly proportional to the size of the gradient, which can be problematic when the gradient is either too large or too small. There are many other gradient-based optimizers such as Adam, Adagrad, and RMSprop that leverage various mathematical properties of gradients for memory efficiency and fast convergence."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nchsZfwEVtVs"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "d9idwpXCltUl"
+      },
+      "outputs": [],
+      "source": [
+        "import matplotlib\n",
+        "from matplotlib import pyplot as plt\n",
+        "# Preset Matplotlib figure sizes.\n",
+        "matplotlib.rcParams['figure.figsize'] = [9, 6]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9xQKvCJ85kCQ"
+      },
+      "outputs": [],
+      "source": [
+        "import tensorflow as tf\n",
+        "print(tf.__version__)\n",
+        "# set random seed for reproducible results \n",
+        "tf.random.set_seed(22)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0UmF5aU3MnwX"
+      },
+      "source": [
+        "## Gradient descent\n",
+        "\n",
+        "The basic optimizer class should have an initialization method and a function to update a list of variables given a list of gradients. Start by implementing the basic gradient descent optimizer which updates each variable by subtracting its gradient scaled by a learning rate."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "MWjmUmeOQFFN"
+      },
+      "outputs": [],
+      "source": [
+        "class GradientDescent(tf.Module):\n",
+        "\n",
+        "  def __init__(self, learning_rate=1e-3):\n",
+        "    # Initialize parameters\n",
+        "    self.learning_rate = learning_rate\n",
+        "    self.title = f\"Gradient descent optimizer: learning rate={self.learning_rate}\"\n",
+        "\n",
+        "  def apply_gradients(self, grads, vars):\n",
+        "    # Update variables\n",
+        "    for grad, var in zip(grads, vars):\n",
+        "      var.assign_sub(self.learning_rate*grad)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ZSekgBHDRzmp"
+      },
+      "source": [
+        "To test this optimizer, create a sample loss function to minimize with respect to a single variable, $x$. Compute its gradient function and solve for its minimizing parameter value:\n",
+        "\n",
+        "$$L = 2x^4 + 3x^3 + 2$$\n",
+        "\n",
+        "$$\\frac{dL}{dx} = 8x^3 + 9x^2$$\n",
+        "\n",
+        "$\\frac{dL}{dx}$ is 0 at $x = 0$, which is a saddle point and at $x = - \\frac{9}{8}$, which is the global minimum. Therefore, the loss function is optimized at $x^\\star = - \\frac{9}{8}$."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "VCtJaUo6Ry8V"
+      },
+      "outputs": [],
+      "source": [
+        "x_vals = tf.linspace(-2, 2, 201)\n",
+        "x_vals = tf.cast(x_vals, tf.float32)\n",
+        "\n",
+        "def loss(x):\n",
+        "  return 2*(x**4) + 3*(x**3) + 2\n",
+        "\n",
+        "def grad(f, x):\n",
+        "  with tf.GradientTape() as tape:\n",
+        "    tape.watch(x)\n",
+        "    result = f(x)\n",
+        "  return tape.gradient(result, x)\n",
+        "\n",
+        "plt.plot(x_vals, loss(x_vals), c='k', label = \"Loss function\")\n",
+        "plt.plot(x_vals, grad(loss, x_vals), c='tab:blue', label = \"Gradient function\")\n",
+        "plt.plot(0, loss(0),  marker=\"o\", c='g', label = \"Inflection point\")\n",
+        "plt.plot(-9/8, loss(-9/8),  marker=\"o\", c='r', label = \"Global minimum\")\n",
+        "plt.legend()\n",
+        "plt.ylim(0,5)\n",
+        "plt.xlabel(\"x\")\n",
+        "plt.ylabel(\"loss\")\n",
+        "plt.title(\"Sample loss function and gradient\");"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fLlIBJ9yuwhE"
+      },
+      "source": [
+        "Write a function to test the convergence of an optimizer with a single variable loss function. Assume that convergence has been achieved when the updated parameter's value at timestep $t$ is the same as its value held at timestep $t-1$. Terminate the test after a set number of iterations and also keep track of any exploding gradients during the process. In order to truly challenge the optimization algorithm, initialize the parameter poorly. In the above example, $x = 2$ is a good choice since it involves an steep gradient and also leads into an inflection point."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "SLQTc41ouv0F"
+      },
+      "outputs": [],
+      "source": [
+        "def convergence_test(optimizer, loss_fn, grad_fn=grad, init_val=2., max_iters=2000):\n",
+        "  # Function for optimizer convergence test\n",
+        "  print(optimizer.title)\n",
+        "  print(\"-------------------------------\")\n",
+        "  # Initializing variables and structures\n",
+        "  x_star = tf.Variable(init_val)\n",
+        "  param_path = []\n",
+        "  converged = False\n",
+        "\n",
+        "  for iter in range(1, max_iters + 1):\n",
+        "    x_grad = grad_fn(loss_fn, x_star)\n",
+        "\n",
+        "    # Case for exploding gradient\n",
+        "    if tf.math.is_nan(x_grad):\n",
+        "      print(f\"Gradient exploded at iteration {iter}\\n\")\n",
+        "      return []\n",
+        "\n",
+        "    # Updating the variable and storing its old-version\n",
+        "    x_old = x_star.numpy()\n",
+        "    optimizer.apply_gradients([x_grad], [x_star])\n",
+        "    param_path.append(x_star.numpy())\n",
+        "\n",
+        "    # Checking for convergence\n",
+        "    if x_star == x_old:\n",
+        "      print(f\"Converged in {iter} iterations\\n\")\n",
+        "      converged = True\n",
+        "      break\n",
+        "      \n",
+        "  # Print early termination message\n",
+        "  if not converged:\n",
+        "    print(f\"Exceeded maximum of {max_iters} iterations. Test terminated.\\n\")\n",
+        "  return param_path"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vK-7_TsmyAgI"
+      },
+      "source": [
+        "Test the convergence of the gradient descent optimizer for the following learning rates: 1e-3, 1e-2, 1e-1"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "lWRn8c91mqB0"
+      },
+      "outputs": [],
+      "source": [
+        "param_map_gd = {}\n",
+        "learning_rates = [1e-3, 1e-2, 1e-1]\n",
+        "for learning_rate in learning_rates:\n",
+        "  param_map_gd[learning_rate] = (convergence_test(\n",
+        "      GradientDescent(learning_rate=learning_rate), loss_fn=loss))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "TydrGHF5y6iI"
+      },
+      "source": [
+        "Visualize the path of the parameters over a contour plot of the loss function."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "piffzGHI_u5G"
+      },
+      "outputs": [],
+      "source": [
+        "def viz_paths(param_map, x_vals, loss_fn, title, max_iters=2000):\n",
+        "  # Creating a controur plot of the loss function\n",
+        "  t_vals = tf.range(1., max_iters + 100.)\n",
+        "  t_grid, x_grid = tf.meshgrid(t_vals, x_vals)\n",
+        "  loss_grid = tf.math.log(loss_fn(x_grid))\n",
+        "  plt.pcolormesh(t_vals, x_vals, loss_grid, vmin=0, shading='nearest')\n",
+        "  colors = ['r', 'w', 'c']\n",
+        "  # Plotting the parameter paths over the contour plot\n",
+        "  for i, learning_rate in enumerate(param_map):\n",
+        "    param_path = param_map[learning_rate]\n",
+        "    if len(param_path) > 0:\n",
+        "      x_star = param_path[-1]\n",
+        "      plt.plot(t_vals[:len(param_path)], param_path, c=colors[i])\n",
+        "      plt.plot(len(param_path), x_star, marker='o', c=colors[i], \n",
+        "              label = f\"x*: learning rate={learning_rate}\")\n",
+        "  plt.xlabel(\"Iterations\")\n",
+        "  plt.ylabel(\"Parameter value\")\n",
+        "  plt.legend()\n",
+        "  plt.title(f\"{title} parameter paths\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Ssyj2sO4BcNY"
+      },
+      "outputs": [],
+      "source": [
+        "viz_paths(param_map_gd, x_vals, loss, \"Gradient descent\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MmM-5eDLFnmC"
+      },
+      "source": [
+        "Gradient descent seems to get stuck at the inflection point when using smaller learning rates. Increasing the learning rate can encourage faster movement around the plateau region due to a larger step size; however, this comes at the risk of having exploding gradients in early iterations when the loss function is extremely steep."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "m5CDeXN8S1SF"
+      },
+      "source": [
+        "## Gradient descent with momentum\n",
+        "\n",
+        "Gradient descent with momentum not only uses the gradient to update a variable but also involves the change in position of a variable based on its previous update. The momentum parameter determines the level of influence the update at timestep $t-1$ has on the update at timestep $t$. Accumulating momentum helps to move variables past plataeu regions faster than basic gradient descent. The momentum update rule is as follows:\n",
+        "\n",
+        "$$\\Delta_x^{[t]} = lr \\cdot L^\\prime(x^{[t-1]}) + p \\cdot \\Delta_x^{[t-1]}$$\n",
+        "\n",
+        "$$x^{[t]} = x^{[t-1]} - \\Delta_x^{[t]}$$\n",
+        "\n",
+        "where\n",
+        "\n",
+        "* $x$: the variable being optimized\n",
+        "* $\\Delta_x$: change in $x$ \n",
+        "* $lr$: learning rate\n",
+        "* $L^\\prime(x)$: gradient of the loss function with respect to x\n",
+        "* $p$: momentum parameter"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "rOBY8Tz4S0dX"
+      },
+      "outputs": [],
+      "source": [
+        "class Momentum(tf.Module):\n",
+        "\n",
+        "  def __init__(self, learning_rate=1e-3, momentum=0.7):\n",
+        "    # Initialize parameters\n",
+        "    self.learning_rate = learning_rate\n",
+        "    self.momentum = momentum\n",
+        "    self.change = 0.\n",
+        "    self.title = f\"Gradient descent optimizer: learning rate={self.learning_rate}\"\n",
+        "\n",
+        "  def apply_gradients(self, grads, vars):\n",
+        "    # Update variables \n",
+        "    for grad, var in zip(grads, vars):\n",
+        "      curr_change = self.learning_rate*grad + self.momentum*self.change\n",
+        "      var.assign_sub(curr_change)\n",
+        "      self.change = curr_change"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "t_nDu38gW6Fu"
+      },
+      "source": [
+        "Test the convergence of the momentum optimizer for the following learning rates: 1e-3, 1e-2, 1e-1"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "tA6oQL-sW2xg"
+      },
+      "outputs": [],
+      "source": [
+        "param_map_mtm = {}\n",
+        "learning_rates = [1e-3, 1e-2, 1e-1]\n",
+        "for learning_rate in learning_rates:\n",
+        "  param_map_mtm[learning_rate] = (convergence_test(\n",
+        "      Momentum(learning_rate=learning_rate),\n",
+        "      loss_fn=loss, grad_fn=grad))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wz_LV0EPYE6k"
+      },
+      "source": [
+        "Visualize the path of the parameters over a contour plot of the loss function."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "qbW1eEKaX3T9"
+      },
+      "outputs": [],
+      "source": [
+        "viz_paths(param_map_mtm, x_vals, loss, \"Momentum\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4bEFnhPRTBXh"
+      },
+      "source": [
+        "## Adaptive moment estimation (Adam)\n",
+        "\n",
+        "The Adaptive Moment Estimation (Adam) algorithm is an efficient and highly generalizable optimization technique that leverages two key gradient descent methedologies: momentum, and root mean square propogation (RMSP). Momentum helps accelerate gradient descent by using the first moment (sum of gradients) along with a decay parameter. RMSP is similar; however, it leverages the second moment (sum of gradients squared). \n",
+        "\n",
+        "The Adam algorithm combines both the first and second moment to provide a more generalizable update rule. The sign of a variable, $x$, can be determined by computing $\\frac{x}{\\sqrt{x^2}}$. The Adam optimizer uses this fact to calculate an update step which is effectively a smoothed sign. Instead of calculating $\\frac{x}{\\sqrt{x^2}}$, the optimizer calculates a smoothed version of $x$ (first moment) and $x^2$ (second moment) for each variable update. \n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WjgyqRiZ7XhA"
+      },
+      "source": [
+        "**Adam algorithm**\n",
+        "\n",
+        "$\\beta_1 \\gets 0.9 \\; \\triangleright \\text{literature value}$\n",
+        "\n",
+        "$\\beta_2 \\gets 0.999 \\; \\triangleright \\text{literature value}$\n",
+        "\n",
+        "$lr \\gets \\text{1e-3} \\; \\triangleright \\text{configurable learning rate}$\n",
+        "\n",
+        "$\\epsilon \\gets \\text{1e-7} \\; \\triangleright \\text{prevents divide by 0 error}$\n",
+        "\n",
+        "$V_{dv} \\gets \\vec {\\underset{n\\times1}{0}} \\;\\triangleright \\text{stores momentum updates for each variable}$\n",
+        "\n",
+        "$S_{dv} \\gets \\vec {\\underset{n\\times1}{0}} \\; \\triangleright \\text{stores RMSP updates for each variable}$\n",
+        "\n",
+        "$t \\gets 1$\n",
+        "\n",
+        "$\\text{On iteration } t:$\n",
+        "\n",
+        "$\\;\\;\\;\\; \\text{For} (\\frac{dL}{dv}, v) \\text{ in gradient variable pairs}:$\n",
+        "\n",
+        "$\\;\\;\\;\\;\\;\\;\\;\\; V_{dv\\_i} = \\beta_1V_{dv\\_i} + (1 - \\beta_1)\\frac{dL}{dv} \\; \\triangleright \\text{momentum update}$\n",
+        "\n",
+        "$\\;\\;\\;\\;\\;\\;\\;\\; S_{dv\\_i} = \\beta_2V_{dv\\_i} + (1 - \\beta_2)(\\frac{dL}{dv})^2 \\; \\triangleright \\text{RMSP update}$\n",
+        "\n",
+        "$\\;\\;\\;\\;\\;\\;\\;\\; v_{dv}^{bc} = \\frac{V_{dv\\_i}}{(1-\\beta_1)^t} \\; \\triangleright \\text{momentum bias correction}$\n",
+        "\n",
+        "$\\;\\;\\;\\;\\;\\;\\;\\; s_{dv}^{bc} = \\frac{S_{dv\\_i}}{(1-\\beta_2)^t} \\; \\triangleright \\text{RMSP bias correction}$\n",
+        "\n",
+        "$\\;\\;\\;\\;\\;\\;\\;\\; v = v - lr\\frac{v_{dv}^{bc}}{\\sqrt{s_{dv}^{bc}} + \\epsilon} \\; \\triangleright \\text{parameter update}$\n",
+        "\n",
+        "$\\;\\;\\;\\;\\;\\;\\;\\; t = t + 1$\n",
+        "\n",
+        "**End of algorithm**\n",
+        "\n",
+        "Given that $V_{dv}$ and $S_{dv}$ are initialized to 0 and that $\\beta_1$ and $\\beta_2$ are close to 1, the momentum and RMSP updates are naturally biased towards 0; therefore, the variables can benefit from bias correction. Bias correction also helps to control the osccilation of weights as they approach the global minimum."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "hm5vffRJRsEc"
+      },
+      "outputs": [],
+      "source": [
+        "class Adam(tf.Module):\n",
+        "  \n",
+        "    def __init__(self, learning_rate=1e-3, beta_1=0.9, beta_2=0.999, ep=1e-7):\n",
+        "      # Initialize the Adam parameters\n",
+        "      self.beta_1 = beta_1\n",
+        "      self.beta_2 = beta_2\n",
+        "      self.learning_rate = learning_rate\n",
+        "      self.ep = ep\n",
+        "      self.t = 1.\n",
+        "      self.v_dvar, self.s_dvar = [], []\n",
+        "      self.title = f\"Adam: learning rate={self.learning_rate}\"\n",
+        "      self.built = False\n",
+        "\n",
+        "    def apply_gradients(self, grads, vars):\n",
+        "      # Set up moment and RMSprop slots for each variable on the first call\n",
+        "      if not self.built:\n",
+        "        for var in vars:\n",
+        "          v = tf.Variable(tf.zeros(shape=var.shape))\n",
+        "          s = tf.Variable(tf.zeros(shape=var.shape))\n",
+        "          self.v_dvar.append(v)\n",
+        "          self.s_dvar.append(s)\n",
+        "        self.built = True\n",
+        "      # Perform Adam updates\n",
+        "      for i, (d_var, var) in enumerate(zip(grads, vars)):\n",
+        "        # Moment calculation\n",
+        "        self.v_dvar[i] = self.beta_1*self.v_dvar[i] + (1-self.beta_1)*d_var\n",
+        "        # RMSprop calculation\n",
+        "        self.s_dvar[i] = self.beta_2*self.s_dvar[i] + (1-self.beta_2)*tf.square(d_var)\n",
+        "        # Bias correction\n",
+        "        v_dvar_bc = self.v_dvar[i]/(1-(self.beta_1**self.t))\n",
+        "        s_dvar_bc = self.s_dvar[i]/(1-(self.beta_2**self.t))\n",
+        "        # Update model variables\n",
+        "        var.assign_sub(self.learning_rate*(v_dvar_bc/(tf.sqrt(s_dvar_bc) + self.ep)))\n",
+        "      # Increment the iteration counter\n",
+        "      self.t += 1."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "UWN4Qus7flUO"
+      },
+      "source": [
+        "Test the performance of the Adam optimizer with the same learning rates used with the gradient descent examples. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "GXHCxtemFBpR"
+      },
+      "outputs": [],
+      "source": [
+        "param_map_adam = {}\n",
+        "learning_rates = [1e-3, 1e-2, 1e-1]\n",
+        "for learning_rate in learning_rates:\n",
+        "  param_map_adam[learning_rate] = (convergence_test(\n",
+        "      Adam(learning_rate=learning_rate), loss_fn=loss))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jgpUcs_xXEjX"
+      },
+      "source": [
+        "Visualize the path of the parameters over a contour plot of the loss function."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ctvOUmlzFK8s"
+      },
+      "outputs": [],
+      "source": [
+        "viz_paths(param_map_adam, x_vals, loss, \"Adam\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_oGScF8zJcY4"
+      },
+      "source": [
+        "In this particular example, the Adam optimizer has slower convergence compared to traditional gradient descent when using small learning rates. However, the algorithm successfully moves past the plataeu region and converges to the global minimum when a larger learning rate. Exploding gradients are no longer an issue due to Adam's dynamic scaling of learning rates when encountering large gradients."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "VFLfEH4ManbW"
+      },
+      "source": [
+        "## Conclusion\n",
+        "\n",
+        "This notebook introduced the basics of writing and comparing optimizers with the [TensorFlow Core APIs](https://www.tensorflow.org/guide/core). Although prebuilt optimizers like Adam are generalizable, they may not always be the best choice for every model or dataset. Having fine-grained control over the optimization process can help streamline ML training workflows and improve overall performance. Refer to the following documentation for more examples of custom optimizers:\n",
+        "\n",
+        "*   This Adam optimizer is used in the [Multilayer perceptrons](https://www.tensorflow.org/guide/core/mlp_core) tutorial and the [Distributed training]()\n",
+        "*   [Model Garden](https://blog.tensorflow.org/2020/03/introducing-model-garden-for-tensorflow-2.html) has a variety of [custom optimizers](https://github.com/tensorflow/models/tree/master/official/modeling/optimization) written with the Core APIs.\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "optimizers_core.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/guide/core/quickstart_core.ipynb b/site/en/guide/core/quickstart_core.ipynb
new file mode 100644
index 00000000000..70586fd3f0c
--- /dev/null
+++ b/site/en/guide/core/quickstart_core.ipynb
@@ -0,0 +1,591 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rX8mhOLljYeM"
+      },
+      "source": [
+        "##### Copyright 2022 The TensorFlow Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "BZSlp3DAjdYf"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3wF5wszaj97Y"
+      },
+      "source": [
+        "# Quickstart for the TensorFlow Core APIs"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DUNzJc4jTj6G"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/guide/core/quickstart_core\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/guide/core/quickstart_core.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/guide/core/quickstart_core.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/guide/core/quickstart_core.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "04QgGZc9bF5D"
+      },
+      "source": [
+        "This quickstart tutorial demonstrates how you can use the [TensorFlow Core low-level APIs](https://www.tensorflow.org/guide/core) to build and train a multiple linear regression model that predicts fuel efficiency. It uses the [Auto MPG](https://archive.ics.uci.edu/ml/datasets/auto+mpg) dataset which contains fuel efficiency data for late-1970s and early 1980s automobiles.\n",
+        "\n",
+        "You will follow the typical stages of a machine learning process:\n",
+        "\n",
+        "1. Load the dataset.\n",
+        "2. Build an [input pipeline](../data.ipynb).\n",
+        "3. Build a multiple [linear regression](https://developers.google.com/machine-learning/glossary#linear-regression) model.\n",
+        "4. Evaluate the performance of the model."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nnrWf3PCEzXL"
+      },
+      "source": [
+        "## Setup\n",
+        "\n",
+        "Import TensorFlow and other necessary libraries to get started:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "0trJmd6DjqBZ"
+      },
+      "outputs": [],
+      "source": [
+        "import tensorflow as tf\n",
+        "import pandas as pd\n",
+        "import matplotlib\n",
+        "from matplotlib import pyplot as plt\n",
+        "print(\"TensorFlow version:\", tf.__version__)\n",
+        "# Set a random seed for reproducible results \n",
+        "tf.random.set_seed(22)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "7NAbSZiaoJ4z"
+      },
+      "source": [
+        "## Load and preprocess the dataset\n",
+        "\n",
+        "Next, you need to load and preprocess the [Auto MPG dataset](https://archive.ics.uci.edu/ml/datasets/auto+mpg) from the [UCI Machine Learning Repository](https://archive.ics.uci.edu/ml/). This dataset uses a variety of quantitative and categorical features such as cylinders, displacement, horsepower and weight to predict the fuel efficiencies of automobiles in the late-1970s and early 1980s.\n",
+        "\n",
+        "The dataset contains a few unknown values. Make sure to drop any missing values with `pandas.DataFrame.dropna`, and convert the dataset to a `tf.float32` tensor type with the `tf.convert_to_tensor` and `tf.cast` functions."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "HglhDsUfrJ98"
+      },
+      "outputs": [],
+      "source": [
+        "url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'\n",
+        "column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight',\n",
+        "                'Acceleration', 'Model Year', 'Origin']\n",
+        "\n",
+        "dataset = pd.read_csv(url, names=column_names, na_values='?', comment='\\t',\n",
+        "                          sep=' ', skipinitialspace=True)\n",
+        "\n",
+        "dataset = dataset.dropna()\n",
+        "dataset_tf = tf.convert_to_tensor(dataset, dtype=tf.float32)\n",
+        "dataset.tail()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0vgoDL3hYesB"
+      },
+      "source": [
+        "Next, split the dataset into training and test sets. Make sure to shuffle the dataset with `tf.random.shuffle` to avoid biased splits."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "0mJU4kt6YiAp"
+      },
+      "outputs": [],
+      "source": [
+        "dataset_shuffled = tf.random.shuffle(dataset_tf, seed=22)\n",
+        "train_data, test_data = dataset_shuffled[100:], dataset_shuffled[:100]\n",
+        "x_train, y_train = train_data[:, 1:], train_data[:, 0]\n",
+        "x_test, y_test = test_data[:, 1:], test_data[:, 0]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Bscb2Vsbi3TE"
+      },
+      "source": [
+        "Perform basic feature engineering by one-hot-encoding the `\"Origin\"` feature. The `tf.one_hot` function is useful for transforming this categorical column into 3 separate binary columns."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "_B8N9IV1i6IV"
+      },
+      "outputs": [],
+      "source": [
+        "def onehot_origin(x):\n",
+        "  origin = tf.cast(x[:, -1], tf.int32)\n",
+        "  # Use `origin - 1` to account for 1-indexed feature\n",
+        "  origin_oh = tf.one_hot(origin - 1, 3)\n",
+        "  x_ohe = tf.concat([x[:, :-1], origin_oh], axis = 1)\n",
+        "  return x_ohe\n",
+        "\n",
+        "x_train_ohe, x_test_ohe = onehot_origin(x_train), onehot_origin(x_test)\n",
+        "x_train_ohe.numpy()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "qnoCDzzedite"
+      },
+      "source": [
+        "This example shows a multiple regression problem with predictors or features on vastly different scales. Therefore, it is beneficial to standardize the data so that each feature has zero mean and unit variance. Use the `tf.reduce_mean` and `tf.math.reduce_std` functions for standardization. The regression model's prediction can then be unstandardized to obtain its value in terms of the original units."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dJJFdvqydhyp"
+      },
+      "outputs": [],
+      "source": [
+        "class Normalize(tf.Module):\n",
+        "  def __init__(self, x):\n",
+        "    # Initialize the mean and standard deviation for normalization\n",
+        "    self.mean = tf.math.reduce_mean(x, axis=0)\n",
+        "    self.std = tf.math.reduce_std(x, axis=0)\n",
+        "\n",
+        "  def norm(self, x):\n",
+        "    # Normalize the input\n",
+        "    return (x - self.mean)/self.std\n",
+        "\n",
+        "  def unnorm(self, x):\n",
+        "    # Unnormalize the input\n",
+        "    return (x * self.std) + self.mean"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "5BONV6fYYwZb"
+      },
+      "outputs": [],
+      "source": [
+        "norm_x = Normalize(x_train_ohe)\n",
+        "norm_y = Normalize(y_train)\n",
+        "x_train_norm, y_train_norm = norm_x.norm(x_train_ohe), norm_y.norm(y_train)\n",
+        "x_test_norm, y_test_norm = norm_x.norm(x_test_ohe), norm_y.norm(y_test)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "BPZ68wASog_I"
+      },
+      "source": [
+        "## Build a machine learning model\n",
+        "\n",
+        "Build a linear regression model with the TensorFlow Core APIs. The equation for multiple linear regression is as follows:\n",
+        "\n",
+        "$${\\mathrm{Y}} = {\\mathrm{X}}w + b$$\n",
+        "\n",
+        "where\n",
+        "\n",
+        "* $\\underset{m\\times 1}{\\mathrm{Y}}$: target vector\n",
+        "* $\\underset{m\\times n}{\\mathrm{X}}$: feature matrix\n",
+        "* $\\underset{n\\times 1}w$: weight vector\n",
+        "* $b$: bias\n",
+        "\n",
+        "By using the `@tf.function` decorator, the corresponding Python code is traced to generate a callable TensorFlow graph. This approach is beneficial for saving and loading the model after training. It can also provide a performance boost for models with many layers and complex operations. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "h3IKyzTCDNGo"
+      },
+      "outputs": [],
+      "source": [
+        "class LinearRegression(tf.Module):\n",
+        "\n",
+        "  def __init__(self):\n",
+        "    self.built = False\n",
+        "\n",
+        "  @tf.function\n",
+        "  def __call__(self, x):\n",
+        "    # Initialize the model parameters on the first call\n",
+        "    if not self.built:\n",
+        "      # Randomly generate the weight vector and bias term\n",
+        "      rand_w = tf.random.uniform(shape=[x.shape[-1], 1])\n",
+        "      rand_b = tf.random.uniform(shape=[])\n",
+        "      self.w = tf.Variable(rand_w)\n",
+        "      self.b = tf.Variable(rand_b)\n",
+        "      self.built = True\n",
+        "    y = tf.add(tf.matmul(x, self.w), self.b)\n",
+        "    return tf.squeeze(y, axis=1)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "l2hiez2eIUz8"
+      },
+      "source": [
+        "For each example, the model returns a prediction for the input automobile's MPG by computing the weighted sum of its features plus a bias term. This prediction can then be unstandardized to obtain its value in terms of the original units."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "OeOrNdnkEEcR"
+      },
+      "outputs": [],
+      "source": [
+        "lin_reg = LinearRegression()\n",
+        "prediction = lin_reg(x_train_norm[:1])\n",
+        "prediction_unnorm = norm_y.unnorm(prediction)\n",
+        "prediction_unnorm.numpy()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "FIHANxNSvWr9"
+      },
+      "source": [
+        "## Define a loss function\n",
+        "\n",
+        "Now, define a loss function to evaluate the model's performance during the training process.\n",
+        "\n",
+        "Since regression problems deal with continuous outputs, the mean squared error (MSE) is an ideal choice for the loss function. The MSE is defined by the following equation:\n",
+        "\n",
+        "$$MSE = \\frac{1}{m}\\sum_{i=1}^{m}(\\hat{y}_i -y_i)^2$$\n",
+        "\n",
+        "where\n",
+        "\n",
+        "* $\\hat{y}$: vector of predictions\n",
+        "* $y$: vector of true targets\n",
+        "\n",
+        "The goal of this regression problem is to find the optimal weight vector, $w$, and bias, $b$, that minimizes the MSE loss function. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "8tYNVUkmw35s"
+      },
+      "outputs": [],
+      "source": [
+        "def mse_loss(y_pred, y):\n",
+        "  return tf.reduce_mean(tf.square(y_pred - y))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "htI-7aJPqclK"
+      },
+      "source": [
+        "## Train and evaluate your model\n",
+        "\n",
+        "Using mini-batches for training provides both memory efficiency and faster convergence. The `tf.data.Dataset` API has useful functions for batching and shuffling. The API enables you to build complex input pipelines from simple, reusable pieces. Learn more about building TensorFlow input pipelines in [this guide](https://www.tensorflow.org/guide/data)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "kxST2w_Nq0C5"
+      },
+      "outputs": [],
+      "source": [
+        "batch_size = 64\n",
+        "train_dataset = tf.data.Dataset.from_tensor_slices((x_train_norm, y_train_norm))\n",
+        "train_dataset = train_dataset.shuffle(buffer_size=x_train.shape[0]).batch(batch_size)\n",
+        "test_dataset = tf.data.Dataset.from_tensor_slices((x_test_norm, y_test_norm))\n",
+        "test_dataset = test_dataset.shuffle(buffer_size=x_test.shape[0]).batch(batch_size)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "C9haUW8Yq3xD"
+      },
+      "source": [
+        "Next, write a training loop to iteratively update your model's parameters by making use of the MSE loss function and its gradients with respect to the input parameters.\n",
+        "\n",
+        "This iterative method is referred to as [gradient descent](https://developers.google.com/machine-learning/glossary#gradient-descent). At each iteration, the model's parameters are updated by taking a step in the opposite direction of their computed gradients. The size of this step is determined by the learning rate, which is a configurable hyperparameter. Recall that the gradient of a function indicates the direction of its steepest ascent; therefore, taking a step in the opposite direction indicates the direction of steepest descent, which ultimately helps to minimize the MSE loss function."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "y7suUbJXVLqP"
+      },
+      "outputs": [],
+      "source": [
+        "# Set training parameters\n",
+        "epochs = 100\n",
+        "learning_rate = 0.01\n",
+        "train_losses, test_losses = [], []\n",
+        "\n",
+        "# Format training loop\n",
+        "for epoch in range(epochs):\n",
+        "  batch_losses_train, batch_losses_test = [], []\n",
+        "\n",
+        "  # Iterate through the training data\n",
+        "  for x_batch, y_batch in train_dataset:\n",
+        "    with tf.GradientTape() as tape:\n",
+        "      y_pred_batch = lin_reg(x_batch)\n",
+        "      batch_loss = mse_loss(y_pred_batch, y_batch)\n",
+        "    # Update parameters with respect to the gradient calculations\n",
+        "    grads = tape.gradient(batch_loss, lin_reg.variables)\n",
+        "    for g,v in zip(grads, lin_reg.variables):\n",
+        "      v.assign_sub(learning_rate * g)\n",
+        "    # Keep track of batch-level training performance \n",
+        "    batch_losses_train.append(batch_loss)\n",
+        "  \n",
+        "  # Iterate through the testing data\n",
+        "  for x_batch, y_batch in test_dataset:\n",
+        "    y_pred_batch = lin_reg(x_batch)\n",
+        "    batch_loss = mse_loss(y_pred_batch, y_batch)\n",
+        "    # Keep track of batch-level testing performance \n",
+        "    batch_losses_test.append(batch_loss)\n",
+        "\n",
+        "  # Keep track of epoch-level model performance\n",
+        "  train_loss = tf.reduce_mean(batch_losses_train)\n",
+        "  test_loss = tf.reduce_mean(batch_losses_test)\n",
+        "  train_losses.append(train_loss)\n",
+        "  test_losses.append(test_loss)\n",
+        "  if epoch % 10 == 0:\n",
+        "    print(f'Mean squared error for step {epoch}: {train_loss.numpy():0.3f}')\n",
+        "\n",
+        "# Output final losses\n",
+        "print(f\"\\nFinal train loss: {train_loss:0.3f}\")\n",
+        "print(f\"Final test loss: {test_loss:0.3f}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4mDAAPFqVVgn"
+      },
+      "source": [
+        "Plot the changes in MSE loss over time. Calculating performance metrics on a designated [validation set](https://developers.google.com/machine-learning/glossary#validation-set) or [test set](https://developers.google.com/machine-learning/glossary#test-set) ensures the model does not overfit to the training dataset and can generalize well to unseen data."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "F7dTAzgHDUh7"
+      },
+      "outputs": [],
+      "source": [
+        "matplotlib.rcParams['figure.figsize'] = [9, 6]\n",
+        "\n",
+        "plt.plot(range(epochs), train_losses, label = \"Training loss\")\n",
+        "plt.plot(range(epochs), test_losses, label = \"Testing loss\")\n",
+        "plt.xlabel(\"Epoch\")\n",
+        "plt.ylabel(\"Mean squared error loss\")\n",
+        "plt.legend()\n",
+        "plt.title(\"MSE loss vs training iterations\");"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Aj8NrlzlJqDG"
+      },
+      "source": [
+        "It seems like the model does a good job of fitting the training data while also generalizing well to the unseen test data."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "AUNIPubuPYDR"
+      },
+      "source": [
+        "## Save and load the model\n",
+        "\n",
+        "Start by making an export module that takes in raw data and performs the following operations:\n",
+        "- Feature extraction \n",
+        "- Normalization \n",
+        "- Prediction\n",
+        "- Unnormalization"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "g-uOrGa9ZehG"
+      },
+      "outputs": [],
+      "source": [
+        "class ExportModule(tf.Module):\n",
+        "  def __init__(self, model, extract_features, norm_x, norm_y):\n",
+        "    # Initialize pre and postprocessing functions\n",
+        "    self.model = model\n",
+        "    self.extract_features = extract_features\n",
+        "    self.norm_x = norm_x\n",
+        "    self.norm_y = norm_y\n",
+        "\n",
+        "  @tf.function(input_signature=[tf.TensorSpec(shape=[None, None], dtype=tf.float32)]) \n",
+        "  def __call__(self, x):\n",
+        "    # Run the ExportModule for new data points\n",
+        "    x = self.extract_features(x)\n",
+        "    x = self.norm_x.norm(x)\n",
+        "    y = self.model(x)\n",
+        "    y = self.norm_y.unnorm(y)\n",
+        "    return y "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "YPYYLQ8EZiU8"
+      },
+      "outputs": [],
+      "source": [
+        "lin_reg_export = ExportModule(model=lin_reg,\n",
+        "                              extract_features=onehot_origin,\n",
+        "                              norm_x=norm_x,\n",
+        "                              norm_y=norm_y)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6v8xi06XZWiC"
+      },
+      "source": [
+        "If you want to save the model at its current state, use the `tf.saved_model.save` function. To load a saved model for making predictions, use the `tf.saved_model.load` function."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "K1IvMoHbptht"
+      },
+      "outputs": [],
+      "source": [
+        "import tempfile\n",
+        "import os\n",
+        "\n",
+        "models = tempfile.mkdtemp()\n",
+        "save_path = os.path.join(models, 'lin_reg_export')\n",
+        "tf.saved_model.save(lin_reg_export, save_path)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "rYb6DrEH0GMv"
+      },
+      "outputs": [],
+      "source": [
+        "lin_reg_loaded = tf.saved_model.load(save_path)\n",
+        "test_preds = lin_reg_loaded(x_test)\n",
+        "test_preds[:10].numpy()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-47O6_GLdRuT"
+      },
+      "source": [
+        "## Conclusion\n",
+        "\n",
+        "Congratulations! You have trained a regression model using the TensorFlow Core low-level APIs.\n",
+        "\n",
+        "For more examples of using TensorFlow Core APIs, check out the following guides:\n",
+        "* [Logistic regression](./logistic_regression_core.ipynb) for binary classification\n",
+        "* [Multi-layer perceptrons](./mlp_core.ipynb) for hand-written digit recognition\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [
+        "rX8mhOLljYeM"
+      ],
+      "name": "quickstart_core.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/guide/create_op.md b/site/en/guide/create_op.md
index 90d7fb1ddff..fa4f573fa32 100644
--- a/site/en/guide/create_op.md
+++ b/site/en/guide/create_op.md
@@ -47,7 +47,7 @@ To incorporate your custom op you'll need to:
     test the op in C++. If you define gradients, you can verify them with the
     Python `tf.test.compute_gradient_error`.
     See
-    [`relu_op_test.py`](https://www.tensorflow.org/code/tensorflow/python/kernel_tests/relu_op_test.py) as
+    [`relu_op_test.py`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/kernel_tests/nn_ops/relu_op_test.py) as
     an example that tests the forward functions of Relu-like operators and
     their gradients.
 
@@ -55,8 +55,8 @@ To incorporate your custom op you'll need to:
 
 *   Some familiarity with C++.
 *   Must have installed the
-    [TensorFlow binary](../../install), or must have
-    [downloaded TensorFlow source](../../install/source.md),
+    [TensorFlow binary](https://www.tensorflow.org/install), or must have
+    [downloaded TensorFlow source](https://www.tensorflow.org/install/source),
     and be able to build it.
 
 ## Define the op interface
@@ -152,17 +152,17 @@ REGISTER_KERNEL_BUILDER(Name("ZeroOut").Device(DEVICE_CPU), ZeroOutOp);
 >   Important: Instances of your OpKernel may be accessed concurrently.
 >   Your `Compute` method must be thread-safe. Guard any access to class
 >   members with a mutex. Or better yet, don't share state via class members!
->   Consider using a [`ResourceMgr`](https://www.tensorflow.org/code/tensorflow/core/framework/resource_mgr.h)
+>   Consider using a [`ResourceMgr`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/resource_mgr.h)
 >   to keep track of op state.
 
 ### Multi-threaded CPU kernels
 
 To write a multi-threaded CPU kernel, the Shard function in
-[`work_sharder.h`](https://www.tensorflow.org/code/tensorflow/core/util/work_sharder.h)
+[`work_sharder.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/work_sharder.h)
 can be used. This function shards a computation function across the
 threads configured to be used for intra-op threading (see
 intra_op_parallelism_threads in
-[`config.proto`](https://www.tensorflow.org/code/tensorflow/core/protobuf/config.proto)).
+[`config.proto`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/config.proto)).
 
 ### GPU kernels
 
@@ -354,18 +354,19 @@ to compile your op into a dynamic library.
 ```bash
 TF_CFLAGS=( $(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))') )
 TF_LFLAGS=( $(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') )
-g++ -std=c++11 -shared zero_out.cc -o zero_out.so -fPIC ${TF_CFLAGS[@]} ${TF_LFLAGS[@]} -O2
+g++ -std=c++14 -shared zero_out.cc -o zero_out.so -fPIC ${TF_CFLAGS[@]} ${TF_LFLAGS[@]} -O2
 ```
 
 On macOS, the additional flag "-undefined dynamic_lookup" is required when
 building the `.so` file.
 
->   Note on `gcc` version `>=5`: gcc uses the new C++
->   [ABI](https://gcc.gnu.org/gcc-5/changes.html#libstdcxx) since version `5`. The binary pip
->   packages available on the TensorFlow website are built with `gcc4` that uses
->   the older ABI. If you compile your op library with `gcc>=5`, add
->   `-D_GLIBCXX_USE_CXX11_ABI=0` to the command line to make the library
->   compatible with the older abi.
+> Note on `gcc` version `>=5`: gcc uses the new C++
+> [ABI](https://gcc.gnu.org/gcc-5/changes.html#libstdcxx) since version `5`.
+> TensorFlow 2.8 and earlier were built with `gcc4` that uses the older ABI. If
+> you are using these versions of TensorFlow and are trying to compile your op
+> library with `gcc>=5`, add `-D_GLIBCXX_USE_CXX11_ABI=0` to the command line to
+> make the library compatible with the older ABI. TensorFlow 2.9+ packages are
+> compatible with the newer ABI by default.
 
 ### Compile the op using bazel (TensorFlow source installation)
 
@@ -518,16 +519,16 @@ This asserts that the input is a vector, and returns having set the
 
 *   The `context`, which can either be an `OpKernelContext` or
     `OpKernelConstruction` pointer (see
-    [`tensorflow/core/framework/op_kernel.h`](https://www.tensorflow.org/code/tensorflow/core/framework/op_kernel.h)),
+    [`tensorflow/core/framework/op_kernel.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/op_kernel.h)),
     for its `SetStatus()` method.
 *   The condition.  For example, there are functions for validating the shape
     of a tensor in
-    [`tensorflow/core/framework/tensor_shape.h`](https://www.tensorflow.org/code/tensorflow/core/framework/tensor_shape.h)
+    [`tensorflow/core/framework/tensor_shape.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor_shape.h)
 *   The error itself, which is represented by a `Status` object, see
-    [`tensorflow/core/lib/core/status.h`](https://www.tensorflow.org/code/tensorflow/core/lib/core/status.h). A
+    [`tensorflow/core/platform/status.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/status.h). A
     `Status` has both a type (frequently `InvalidArgument`, but see the list of
     types) and a message.  Functions for constructing an error may be found in
-    [`tensorflow/core/lib/core/errors.h`][validation-macros].
+    [`tensorflow/core/platform/errors.h`][validation-macros].
 
 Alternatively, if you want to test whether a `Status` object returned from some
 function is an error, and if so return it, use
@@ -667,7 +668,7 @@ There are shortcuts for common type constraints:
 
 The specific lists of types allowed by these are defined by the functions (like
 `NumberTypes()`) in
-[`tensorflow/core/framework/types.h`](https://www.tensorflow.org/code/tensorflow/core/framework/types.h).
+[`tensorflow/core/framework/types.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/types.h).
 In this example the attr `t` must be one of the numeric types:
 
 ```c++
@@ -1225,7 +1226,7 @@ There are several ways to preserve backwards-compatibility.
     type into a list of varying types).
 
 The full list of safe and unsafe changes can be found in
-[`tensorflow/core/framework/op_compatibility_test.cc`](https://www.tensorflow.org/code/tensorflow/core/framework/op_compatibility_test.cc).
+[`tensorflow/core/framework/op_compatibility_test.cc`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/op_compatibility_test.cc).
 If you cannot make your change to an operation backwards compatible, then create
 a new operation with a new name with the new semantics.
 
@@ -1242,16 +1243,16 @@ made when TensorFlow changes major versions, and must conform to the
 You can implement different OpKernels and register one for CPU and another for
 GPU, just like you can [register kernels for different types](#polymorphism).
 There are several examples of kernels with GPU support in
-[`tensorflow/core/kernels/`](https://www.tensorflow.org/code/tensorflow/core/kernels/).
+[`tensorflow/core/kernels/`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/).
 Notice some kernels have a CPU version in a `.cc` file, a GPU version in a file
 ending in `_gpu.cu.cc`, and some code shared in common in a `.h` file.
 
 For example, the `tf.pad` has
 everything but the GPU kernel in [`tensorflow/core/kernels/pad_op.cc`][pad_op].
 The GPU kernel is in
-[`tensorflow/core/kernels/pad_op_gpu.cu.cc`](https://www.tensorflow.org/code/tensorflow/core/kernels/pad_op_gpu.cu.cc),
+[`tensorflow/core/kernels/pad_op_gpu.cu.cc`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/pad_op_gpu.cu.cc),
 and the shared code is a templated class defined in
-[`tensorflow/core/kernels/pad_op.h`](https://www.tensorflow.org/code/tensorflow/core/kernels/pad_op.h).
+[`tensorflow/core/kernels/pad_op.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/pad_op.h).
 We organize the code this way for two reasons: it allows you to share common
 code among the CPU and GPU implementations, and it puts the GPU implementation
 into a separate file so that it can be compiled only by the GPU compiler.
@@ -1272,23 +1273,23 @@ kept on the CPU, add a `HostMemory()` call to the kernel registration, e.g.:
 #### Compiling the kernel for the GPU device
 
 Look at
-[cuda_op_kernel.cu.cc](https://www.tensorflow.org/code/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc)
+[cuda_op_kernel.cu.cc](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc)
 for an example that uses a CUDA kernel to implement an op. The
 `tf_custom_op_library` accepts a `gpu_srcs` argument in which the list of source
 files containing the CUDA kernels (`*.cu.cc` files) can be specified. For use
 with a binary installation of TensorFlow, the CUDA kernels have to be compiled
 with NVIDIA's `nvcc` compiler. Here is the sequence of commands you can use to
 compile the
-[cuda_op_kernel.cu.cc](https://www.tensorflow.org/code/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc)
+[cuda_op_kernel.cu.cc](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc)
 and
-[cuda_op_kernel.cc](https://www.tensorflow.org/code/tensorflow/examples/adding_an_op/cuda_op_kernel.cc)
+[cuda_op_kernel.cc](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/adding_an_op/cuda_op_kernel.cc)
 into a single dynamically loadable library:
 
 ```bash
-nvcc -std=c++11 -c -o cuda_op_kernel.cu.o cuda_op_kernel.cu.cc \
+nvcc -std=c++14 -c -o cuda_op_kernel.cu.o cuda_op_kernel.cu.cc \
   ${TF_CFLAGS[@]} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC
 
-g++ -std=c++11 -shared -o cuda_op_kernel.so cuda_op_kernel.cc \
+g++ -std=c++14 -shared -o cuda_op_kernel.so cuda_op_kernel.cc \
   cuda_op_kernel.cu.o ${TF_CFLAGS[@]} -fPIC -lcudart ${TF_LFLAGS[@]}
 ```
 
@@ -1379,6 +1380,13 @@ Note that at the time the gradient function is called, only the data flow graph
 of ops is available, not the tensor data itself.  Thus, all computation must be
 performed using other tensorflow ops, to be run at graph execution time.
 
+Add type hints when registering the custom gradient for an op type to make the
+code more readable, debuggable, easier to maintain, and more robust through data
+validation. For example, when taking an `op` as a parameter in a function,
+specify that the gradient function will take an
+<a href="https://www.tensorflow.org/api_docs/python/tf/Operation"><code>tf.Operation</code></a>
+as its parameter type.
+
 ### Shape functions in C++
 
 The TensorFlow API has a feature called "shape inference" that provides
@@ -1404,7 +1412,7 @@ be set to the first input's shape. If the output is selected by its index as in
 
 There are a number of common shape functions
 that apply to many ops, such as `shape_inference::UnchangedShape` which can be
-found in [common_shape_fns.h](https://www.tensorflow.org/code/tensorflow/core/framework/common_shape_fns.h) and used as follows:
+found in [common_shape_fns.h](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/common_shape_fns.h) and used as follows:
 
 ```c++
 REGISTER_OP("ZeroOut")
@@ -1451,7 +1459,7 @@ provides access to the attributes of the op).
 
 Since shape inference is an optional feature, and the shapes of tensors may vary
 dynamically, shape functions must be robust to incomplete shape information for
-any of the inputs. The `Merge` method in [`InferenceContext`](https://www.tensorflow.org/code/tensorflow/core/framework/shape_inference.h)
+any of the inputs. The `Merge` method in [`InferenceContext`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/shape_inference.h)
 allows the caller to assert that two shapes are the same, even if either
 or both of them do not have complete information. Shape functions are defined
 for all of the core TensorFlow ops and provide many different usage examples.
@@ -1476,7 +1484,7 @@ If you have a complicated shape function, you should consider adding a test for
 validating that various input shape combinations produce the expected output
 shape combinations.  You can see examples of how to write these tests in some
 our
-[core ops tests](https://www.tensorflow.org/code/tensorflow/core/ops/array_ops_test.cc).
+[core ops tests](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/ops/array_ops_test.cc).
 (The syntax of `INFER_OK` and `INFER_ERROR` are a little cryptic, but try to be
 compact in representing input and output shape specifications in tests.  For
 now, see the surrounding comments in those tests to get a sense of the shape
@@ -1489,20 +1497,20 @@ To build a `pip` package for your op, see the
 guide shows how to build custom ops from the TensorFlow pip package instead
 of building TensorFlow from source.
 
-[core-array_ops]:https://www.tensorflow.org/code/tensorflow/core/ops/array_ops.cc
-[python-user_ops]:https://www.tensorflow.org/code/tensorflow/python/user_ops/user_ops.py
-[tf-kernels]:https://www.tensorflow.org/code/tensorflow/core/kernels/
-[user_ops]:https://www.tensorflow.org/code/tensorflow/core/user_ops/
-[pad_op]:https://www.tensorflow.org/code/tensorflow/core/kernels/pad_op.cc
-[standard_ops-py]:https://www.tensorflow.org/code/tensorflow/python/ops/standard_ops.py
-[standard_ops-cc]:https://www.tensorflow.org/code/tensorflow/cc/ops/standard_ops.h
-[python-BUILD]:https://www.tensorflow.org/code/tensorflow/python/BUILD
-[validation-macros]:https://www.tensorflow.org/code/tensorflow/core/lib/core/errors.h
-[op_def_builder]:https://www.tensorflow.org/code/tensorflow/core/framework/op_def_builder.h
-[register_types]:https://www.tensorflow.org/code/tensorflow/core/framework/register_types.h
-[FinalizeAttr]:https://www.tensorflow.org/code/tensorflow/core/framework/op_def_builder.cc
-[DataTypeString]:https://www.tensorflow.org/code/tensorflow/core/framework/types.cc
-[python-BUILD]:https://www.tensorflow.org/code/tensorflow/python/BUILD
-[types-proto]:https://www.tensorflow.org/code/tensorflow/core/framework/types.proto
-[TensorShapeProto]:https://www.tensorflow.org/code/tensorflow/core/framework/tensor_shape.proto
-[TensorProto]:https://www.tensorflow.org/code/tensorflow/core/framework/tensor.proto
+[core-array_ops]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/ops/array_ops.cc
+[python-user_ops]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/user_ops/user_ops.py
+[tf-kernels]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/
+[user_ops]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/user_ops/
+[pad_op]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/pad_op.cc
+[standard_ops-py]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/standard_ops.py
+[standard_ops-cc]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/cc/ops/standard_ops.h
+[python-BUILD]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/BUILD
+[validation-macros]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/errors.h
+[op_def_builder]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/op_def_builder.h
+[register_types]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/register_types.h
+[FinalizeAttr]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/op_def_builder.cc
+[DataTypeString]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/types.cc
+[python-BUILD]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/BUILD
+[types-proto]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/types.proto
+[TensorShapeProto]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor_shape.proto
+[TensorProto]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor.proto
diff --git a/site/en/guide/data.ipynb b/site/en/guide/data.ipynb
index 42146ac9a01..739ef131005 100644
--- a/site/en/guide/data.ipynb
+++ b/site/en/guide/data.ipynb
@@ -15,7 +15,6 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "cellView": "form",
         "id": "llMNufAK7nfK"
       },
       "outputs": [],
@@ -139,8 +138,8 @@
         "\n",
         "Once you have a `Dataset` object, you can *transform* it into a new `Dataset` by\n",
         "chaining method calls on the `tf.data.Dataset` object. For example, you can\n",
-        "apply per-element transformations such as `Dataset.map()`, and multi-element\n",
-        "transformations such as `Dataset.batch()`. See the documentation for\n",
+        "apply per-element transformations such as `Dataset.map`, and multi-element\n",
+        "transformations such as `Dataset.batch`. Refer to the documentation for\n",
         "`tf.data.Dataset` for a complete list of transformations.\n",
         "\n",
         "The `Dataset` object is a Python iterable. This makes it possible to consume its\n",
@@ -238,9 +237,9 @@
         "structure of elements include `tuple`, `dict`, `NamedTuple`, and\n",
         "`OrderedDict`. In particular, `list` is not a valid construct for\n",
         "expressing the structure of dataset elements. This is because\n",
-        "early tf.data users felt strongly about `list` inputs (e.g. passed\n",
+        "early `tf.data` users felt strongly about `list` inputs (for example, when passed\n",
         "to `tf.data.Dataset.from_tensors`) being automatically packed as\n",
-        "tensors and `list` outputs (e.g. return values of user-defined\n",
+        "tensors and `list` outputs (for example, return values of user-defined\n",
         "functions) being coerced into a `tuple`. As a consequence, if you\n",
         "would like a `list` input to be treated as a structure, you need\n",
         "to convert it into `tuple` and if you would like a `list` output\n",
@@ -328,7 +327,7 @@
       },
       "source": [
         "The `Dataset` transformations support datasets of any structure. When using the\n",
-        "`Dataset.map()`, and `Dataset.filter()` transformations,\n",
+        "`Dataset.map`, and `Dataset.filter` transformations,\n",
         "which apply a function to each element, the element structure determines the\n",
         "arguments of the function:"
       ]
@@ -416,11 +415,11 @@
       "source": [
         "### Consuming NumPy arrays\n",
         "\n",
-        "See [Loading NumPy arrays](../tutorials/load_data/numpy.ipynb) for more examples.\n",
+        "Refer to the [Loading NumPy arrays](../tutorials/load_data/numpy.ipynb) tutorial for more examples.\n",
         "\n",
         "If all of your input data fits in memory, the simplest way to create a `Dataset`\n",
         "from them is to convert them to `tf.Tensor` objects and use\n",
-        "`Dataset.from_tensor_slices()`."
+        "`Dataset.from_tensor_slices`."
       ]
     },
     {
@@ -472,7 +471,7 @@
         "\n",
         "Another common data source that can easily be ingested as a `tf.data.Dataset` is the python generator.\n",
         "\n",
-        "Caution: While this is a convienient approach it has limited portability and scalibility. It must run in the same python process that created the generator, and is still subject to the Python [GIL](https://en.wikipedia.org/wiki/Global_interpreter_lock)."
+        "Caution: While this is a convenient approach it has limited portability and scalability. It must run in the same python process that created the generator, and is still subject to the Python [GIL](https://en.wikipedia.org/wiki/Global_interpreter_lock)."
       ]
     },
     {
@@ -544,11 +543,11 @@
         "id": "wxy9hDMTq1zD"
       },
       "source": [
-        "The `output_shapes` argument is not *required* but is highly recomended as many tensorflow operations do not support tensors with unknown rank. If the length of a particular axis is unknown or variable, set it as `None` in the `output_shapes`.\n",
+        "The `output_shapes` argument is not *required* but is highly recommended as many TensorFlow operations do not support tensors with an unknown rank. If the length of a particular axis is unknown or variable, set it as `None` in the `output_shapes`.\n",
         "\n",
         "It's also important to note that the `output_shapes` and `output_types` follow the same nesting rules as other dataset methods.\n",
         "\n",
-        "Here is an example generator that demonstrates both aspects, it returns tuples of arrays, where the second array is a vector with unknown length."
+        "Here is an example generator that demonstrates both aspects: it returns tuples of arrays, where the second array is a vector with unknown length."
       ]
     },
     {
@@ -589,7 +588,7 @@
       "source": [
         "The first output is an `int32` the second is a `float32`.\n",
         "\n",
-        "The first item is a scalar, shape `()`, and the second is a vector of unknown length, shape `(None,)` "
+        "The first item is a scalar, shape `()`, and the second is a vector of unknown length, shape `(None,)`"
       ]
     },
     {
@@ -601,8 +600,8 @@
       "outputs": [],
       "source": [
         "ds_series = tf.data.Dataset.from_generator(\n",
-        "    gen_series, \n",
-        "    output_types=(tf.int32, tf.float32), \n",
+        "    gen_series,\n",
+        "    output_types=(tf.int32, tf.float32),\n",
         "    output_shapes=((), (None,)))\n",
         "\n",
         "ds_series"
@@ -710,8 +709,8 @@
       "outputs": [],
       "source": [
         "ds = tf.data.Dataset.from_generator(\n",
-        "    lambda: img_gen.flow_from_directory(flowers), \n",
-        "    output_types=(tf.float32, tf.float32), \n",
+        "    lambda: img_gen.flow_from_directory(flowers),\n",
+        "    output_types=(tf.float32, tf.float32),\n",
         "    output_shapes=([32,256,256,3], [32,5])\n",
         ")\n",
         "\n",
@@ -726,7 +725,7 @@
       },
       "outputs": [],
       "source": [
-        "for images, label in ds.take(1):\n",
+        "for images, labels in ds.take(1):\n",
         "  print('images.shape: ', images.shape)\n",
         "  print('labels.shape: ', labels.shape)\n"
       ]
@@ -739,7 +738,7 @@
       "source": [
         "### Consuming TFRecord data\n",
         "\n",
-        "See [Loading TFRecords](../tutorials/load_data/tfrecord.ipynb) for an end-to-end example.\n",
+        "Refer to the [Loading TFRecords](../tutorials/load_data/tfrecord.ipynb) tutorial for an end-to-end example.\n",
         "\n",
         "The `tf.data` API supports a variety of file formats so that you can process\n",
         "large datasets that do not fit in memory. For example, the TFRecord file format\n",
@@ -825,7 +824,7 @@
       "source": [
         "### Consuming text data\n",
         "\n",
-        "See [Loading Text](../tutorials/load_data/text.ipynb) for an end to end example.\n",
+        "Refer to the [Load text](../tutorials/load_data/text.ipynb) tutorial for an end-to-end example.\n",
         "\n",
         "Many datasets are distributed as one or more text files. The\n",
         "`tf.data.TextLineDataset` provides an easy way to extract lines from one or more\n",
@@ -916,7 +915,7 @@
       "source": [
         "By default, a `TextLineDataset` yields *every* line of each file, which may\n",
         "not be desirable, for example, if the file starts with a header line, or contains comments. These lines can be removed using the `Dataset.skip()` or\n",
-        "`Dataset.filter()` transformations. Here, you skip the first line, then filter to\n",
+        "`Dataset.filter` transformations. Here, you skip the first line, then filter to\n",
         "find only survivors."
       ]
     },
@@ -985,7 +984,7 @@
         "id": "ChDHNi3qbDch"
       },
       "source": [
-        "See [Loading CSV Files](../tutorials/load_data/csv.ipynb), and [Loading Pandas DataFrames](../tutorials/load_data/pandas_dataframe.ipynb) for more examples. \n",
+        "Refer to the [Loading CSV Files](../tutorials/load_data/csv.ipynb) and [Loading Pandas DataFrames](../tutorials/load_data/pandas_dataframe.ipynb) tutorials for more examples.\n",
         "\n",
         "The CSV file format is a popular format for storing tabular data in plain text.\n",
         "\n",
@@ -1045,11 +1044,11 @@
         "id": "47yippqaHFk6"
       },
       "source": [
-        "A more scalable approach is to load from disk as necessary. \n",
+        "A more scalable approach is to load from disk as necessary.\n",
         "\n",
         "The `tf.data` module provides methods to extract records from one or more CSV files that comply with [RFC 4180](https://tools.ietf.org/html/rfc4180).\n",
         "\n",
-        "The `experimental.make_csv_dataset` function is the high level interface for reading sets of csv files. It supports column type inference and many other features, like batching and shuffling, to make usage simple."
+        "The `tf.data.experimental.make_csv_dataset` function is the high-level interface for reading sets of CSV files. It supports column type inference and many other features, like batching and shuffling, to make usage simple."
       ]
     },
     {
@@ -1122,7 +1121,7 @@
         "id": "TSVgJJ1HJD6M"
       },
       "source": [
-        "There is also a lower-level `experimental.CsvDataset` class which provides finer grained control. It does not support column type inference. Instead you must specify the type of each column. "
+        "There is also a lower-level `experimental.CsvDataset` class which provides finer grained control. It does not support column type inference. Instead you must specify the type of each column."
       ]
     },
     {
@@ -1133,7 +1132,7 @@
       },
       "outputs": [],
       "source": [
-        "titanic_types  = [tf.int32, tf.string, tf.float32, tf.int32, tf.int32, tf.float32, tf.string, tf.string, tf.string, tf.string] \n",
+        "titanic_types  = [tf.int32, tf.string, tf.float32, tf.int32, tf.int32, tf.float32, tf.string, tf.string, tf.string, tf.string]\n",
         "dataset = tf.data.experimental.CsvDataset(titanic_file, titanic_types , header=True)\n",
         "\n",
         "for line in dataset.take(10):\n",
@@ -1386,7 +1385,7 @@
         "The simplest form of batching stacks `n` consecutive elements of a dataset into\n",
         "a single element. The `Dataset.batch()` transformation does exactly this, with\n",
         "the same constraints as the `tf.stack()` operator, applied to each component\n",
-        "of the elements: i.e. for each component *i*, all elements must have a tensor\n",
+        "of the elements: i.e., for each component *i*, all elements must have a tensor\n",
         "of the exact same shape."
       ]
     },
@@ -1457,10 +1456,10 @@
         "### Batching tensors with padding\n",
         "\n",
         "The above recipe works for tensors that all have the same size. However, many\n",
-        "models (e.g. sequence models) work with input data that can have varying size\n",
-        "(e.g. sequences of different lengths). To handle this case, the\n",
+        "models (including sequence models) work with input data that can have varying size\n",
+        "(for example, sequences of different lengths). To handle this case, the\n",
         "`Dataset.padded_batch` transformation enables you to batch tensors of\n",
-        "different shape by specifying one or more dimensions in which they may be\n",
+        "different shapes by specifying one or more dimensions in which they may be\n",
         "padded."
       ]
     },
@@ -1604,7 +1603,7 @@
         "id": "DlEM5f9loSHR"
       },
       "source": [
-        "If you would like to perform a custom computation (e.g. to collect statistics) at the end of each epoch then it's simplest to restart the dataset iteration on each epoch:"
+        "If you would like to perform a custom computation (for example, to collect statistics) at the end of each epoch then it's simplest to restart the dataset iteration on each epoch:"
       ]
     },
     {
@@ -1693,7 +1692,7 @@
       "source": [
         "As with `Dataset.batch` the order relative to `Dataset.repeat` matters.\n",
         "\n",
-        "`Dataset.shuffle` doesn't signal the end of an epoch until the shuffle buffer is empty. So a shuffle placed before a repeat will show every element of one epoch before moving to the next: "
+        "`Dataset.shuffle` doesn't signal the end of an epoch until the shuffle buffer is empty. So a shuffle placed before a repeat will show every element of one epoch before moving to the next:"
       ]
     },
     {
@@ -1838,7 +1837,7 @@
         "  label = parts[-2]\n",
         "\n",
         "  image = tf.io.read_file(filename)\n",
-        "  image = tf.image.decode_jpeg(image)\n",
+        "  image = tf.io.decode_jpeg(image)\n",
         "  image = tf.image.convert_image_dtype(image, tf.float32)\n",
         "  image = tf.image.resize(image, [128, 128])\n",
         "  return image, label"
@@ -1906,7 +1905,7 @@
         "\n",
         "For performance reasons, use TensorFlow operations for\n",
         "preprocessing your data whenever possible. However, it is sometimes useful to\n",
-        "call external Python libraries when parsing your input data. You can use the `tf.py_function()` operation in a `Dataset.map()` transformation."
+        "call external Python libraries when parsing your input data. You can use the `tf.py_function` operation in a `Dataset.map` transformation."
       ]
     },
     {
@@ -1915,7 +1914,7 @@
         "id": "R2u7CeA67DU8"
       },
       "source": [
-        "For example, if you want to apply a random rotation, the `tf.image` module only has `tf.image.rot90`, which is not very useful for image augmentation. \n",
+        "For example, if you want to apply a random rotation, the `tf.image` module only has `tf.image.rot90`, which is not very useful for image augmentation.\n",
         "\n",
         "Note: `tensorflow_addons` has a TensorFlow compatible `rotate` in `tensorflow_addons.image.rotate`.\n",
         "\n",
@@ -1932,6 +1931,7 @@
       "source": [
         "import scipy.ndimage as ndimage\n",
         "\n",
+        "@tf.py_function(Tout=tf.float32)\n",
         "def random_rotate_image(image):\n",
         "  image = ndimage.rotate(image, np.random.uniform(-30, 30), reshape=False)\n",
         "  return image"
@@ -1969,7 +1969,7 @@
       "source": [
         "def tf_random_rotate_image(image, label):\n",
         "  im_shape = image.shape\n",
-        "  [image,] = tf.py_function(random_rotate_image, [image], [tf.float32])\n",
+        "  image = random_rotate_image(image)\n",
         "  image.set_shape(im_shape)\n",
         "  return image, label"
       ]
@@ -2124,7 +2124,7 @@
         "id": "t0JMgvXEz9y1"
       },
       "source": [
-        "For an end to end time series example see: [Time series forecasting](../../tutorials/structured_data/time_series.ipynb)."
+        "For an end-to-end time series example see: [Time series forecasting](../../tutorials/structured_data/time_series.ipynb)."
       ]
     },
     {
@@ -2155,7 +2155,7 @@
         "id": "o6GLGhxgpazJ"
       },
       "source": [
-        "Typically, models based on this sort of data will want a contiguous time slice. \n",
+        "Typically, models based on this sort of data will want a contiguous time slice.\n",
         "\n",
         "The simplest approach would be to batch the data:"
       ]
@@ -2283,7 +2283,7 @@
         "id": "fF6pEdlduq8E"
       },
       "source": [
-        "While using `Dataset.batch` works, there are situations where you may need finer control. The `Dataset.window` method gives you complete control, but requires some care: it returns a `Dataset` of `Datasets`. See [Dataset structure](#dataset_structure) for details."
+        "While using `Dataset.batch` works, there are situations where you may need finer control. The `Dataset.window` method gives you complete control, but requires some care: it returns a `Dataset` of `Datasets`. Go to the [Dataset structure](#dataset_structure) section for details."
       ]
     },
     {
@@ -2328,7 +2328,7 @@
         "id": "sgLIwq9Anc34"
       },
       "source": [
-        "In nearly all cases, you will want to `.batch` the dataset first:"
+        "In nearly all cases, you will want to `Dataset.batch` the dataset first:"
       ]
     },
     {
@@ -2422,7 +2422,7 @@
         "\n",
         "When working with a dataset that is very class-imbalanced, you may want to resample the dataset. `tf.data` provides two methods to do this. The credit card fraud dataset is a good example of this sort of problem.\n",
         "\n",
-        "Note: See [Imbalanced Data](../tutorials/keras/imbalanced_data.ipynb) for a full tutorial.\n"
+        "Note: Go to [Classification on imbalanced data](../tutorials/structured_data/imbalanced_data.ipynb) for a full tutorial.\n"
       ]
     },
     {
@@ -2529,7 +2529,7 @@
         "id": "ov14SRrQyQE3"
       },
       "source": [
-        "One approach to resampling a dataset is to use `sample_from_datasets`. This is more applicable when you have a separate `data.Dataset` for each class.\n",
+        "One approach to resampling a dataset is to use `sample_from_datasets`. This is more applicable when you have a separate `tf.data.Dataset` for each class.\n",
         "\n",
         "Here, just use filter to generate them from the credit card fraud data:"
       ]
@@ -2572,7 +2572,7 @@
         "id": "GxLAr-7p0ATX"
       },
       "source": [
-        "To use `tf.data.experimental.sample_from_datasets` pass the datasets, and the weight for each:"
+        "To use `tf.data.Dataset.sample_from_datasets` pass the datasets, and the weight for each:"
       ]
     },
     {
@@ -2583,7 +2583,7 @@
       },
       "outputs": [],
       "source": [
-        "balanced_ds = tf.data.experimental.sample_from_datasets(\n",
+        "balanced_ds = tf.data.Dataset.sample_from_datasets(\n",
         "    [negative_ds, positive_ds], [0.5, 0.5]).batch(10)"
       ]
     },
@@ -2593,7 +2593,7 @@
         "id": "2K4ObOms082B"
       },
       "source": [
-        "Now the dataset produces examples of each class with 50/50 probability:"
+        "Now the dataset produces examples of each class with a 50/50 probability:"
       ]
     },
     {
@@ -2623,15 +2623,15 @@
         "id": "kZ9ezkK6irMD"
       },
       "source": [
-        "One problem with the above `experimental.sample_from_datasets` approach is that\n",
-        "it needs a separate `tf.data.Dataset` per class. Using `Dataset.filter`\n",
-        "works, but results in all the data being loaded twice.\n",
+        "One problem with the above `Dataset.sample_from_datasets` approach is that\n",
+        "it needs a separate `tf.data.Dataset` per class. You could use `Dataset.filter`\n",
+        "to create those two datasets, but that results in all the data being loaded twice.\n",
         "\n",
-        "The `data.experimental.rejection_resample` function can be applied to a dataset to rebalance it, while only loading it once. Elements will be dropped from the dataset to achieve balance.\n",
+        "The `tf.data.Dataset.rejection_resample` method can be applied to a dataset to rebalance it, while only loading it once. Elements will be dropped or repeated to achieve balance.\n",
         "\n",
-        "`data.experimental.rejection_resample` takes a `class_func` argument. This `class_func` is applied to each dataset element, and is used to determine which class an example belongs to for the purposes of balancing.\n",
+        "The `rejection_resample` method takes a `class_func` argument. This `class_func` is applied to each dataset element, and is used to determine which class an example belongs to for the purposes of balancing.\n",
         "\n",
-        "The elements of `creditcard_ds` are already `(features, label)` pairs. So the `class_func` just needs to return those labels:"
+        "The goal here is to balance the label distribution, and the elements of `creditcard_ds` are already `(features, label)` pairs. So the `class_func` just needs to return those labels:"
       ]
     },
     {
@@ -2646,34 +2646,15 @@
         "  return label"
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "DdKmE8Jumlp0"
-      },
-      "source": [
-        "The resampler also needs a target distribution, and optionally an initial distribution estimate:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "9tv0tWNxmkzM"
-      },
-      "outputs": [],
-      "source": [
-        "resampler = tf.data.experimental.rejection_resample(\n",
-        "    class_func, target_dist=[0.5, 0.5], initial_dist=fractions)"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "YxJrOZVToGuE"
       },
       "source": [
-        "The resampler deals with individual examples, so you must `unbatch` the dataset before applying the resampler:"
+        "The resampling method deals with individual examples, so in this case you must `unbatch` the dataset before applying that method.\n",
+        "\n",
+        "The method needs a target distribution, and optionally an initial distribution estimate as inputs."
       ]
     },
     {
@@ -2684,7 +2665,12 @@
       },
       "outputs": [],
       "source": [
-        "resample_ds = creditcard_ds.unbatch().apply(resampler).batch(10)"
+        "resample_ds = (\n",
+        "    creditcard_ds\n",
+        "    .unbatch()\n",
+        "    .rejection_resample(class_func, target_dist=[0.5,0.5],\n",
+        "                        initial_dist=fractions)\n",
+        "    .batch(10))"
       ]
     },
     {
@@ -2693,7 +2679,7 @@
         "id": "L-HnC1s8idqV"
       },
       "source": [
-        "The resampler returns creates `(class, example)` pairs from the output of the `class_func`. In this case, the `example` was already a `(feature, label)` pair, so use `map` to drop the extra copy of the labels:"
+        "The `rejection_resample` method returns `(class, example)` pairs where the `class` is the output of the `class_func`. In this case, the `example` was already a `(feature, label)` pair, so use `map` to drop the extra copy of the labels:"
       ]
     },
     {
@@ -2713,7 +2699,7 @@
         "id": "j3d2jyEhx9kD"
       },
       "source": [
-        "Now the dataset produces examples of each class with 50/50 probability:"
+        "Now the dataset produces examples of each class with a 50/50 probability:"
       ]
     },
     {
@@ -2743,7 +2729,7 @@
         "id": "SOGg1UFhUE4z"
       },
       "source": [
-        "Tensorflow supports [taking checkpoints](https://www.tensorflow.org/guide/checkpoint) so that when your training process restarts it can restore the latest checkpoint to recover most of its progress. In addition to checkpointing the model variables, you can also checkpoint the progress of the dataset iterator. This could be useful if you have a large dataset and don't want to start the dataset from the beginning on each restart. Note however that iterator checkpoints may be large, since transformations such as `shuffle` and `prefetch` require buffering elements within the iterator. \n",
+        "Tensorflow supports [taking checkpoints](./checkpoint.ipynb) so that when your training process restarts it can restore the latest checkpoint to recover most of its progress. In addition to checkpointing the model variables, you can also checkpoint the progress of the dataset iterator. This could be useful if you have a large dataset and don't want to start the dataset from the beginning on each restart. Note however that iterator checkpoints may be large, since transformations such as `Dataset.shuffle` and `Dataset.prefetch` require buffering elements within the iterator.\n",
         "\n",
         "To include your iterator in a checkpoint, pass the iterator to the `tf.train.Checkpoint` constructor."
       ]
@@ -2779,7 +2765,7 @@
         "id": "gxWglTwX9Fex"
       },
       "source": [
-        "Note: It is not possible to checkpoint an iterator which relies on external state such as a `tf.py_function`. Attempting to do so will raise an exception complaining about the external state."
+        "Note: It is not possible to checkpoint an iterator which relies on an external state, such as a `tf.py_function`. Attempting to do so will raise an exception complaining about the external state."
       ]
     },
     {
@@ -2788,7 +2774,7 @@
         "id": "uLRdedPpbDdD"
       },
       "source": [
-        "## Using tf.data with tf.keras"
+        "## Using `tf.data` with `tf.keras`"
       ]
     },
     {
@@ -2798,7 +2784,7 @@
       },
       "source": [
         "The `tf.keras` API simplifies many aspects of creating and executing machine\n",
-        "learning models. Its `.fit()` and `.evaluate()` and `.predict()` APIs support datasets as inputs. Here is a quick dataset and model setup:"
+        "learning models. Its `Model.fit` and `Model.evaluate` and `Model.predict` APIs support datasets as inputs. Here is a quick dataset and model setup:"
       ]
     },
     {
@@ -2833,7 +2819,7 @@
         "])\n",
         "\n",
         "model.compile(optimizer='adam',\n",
-        "              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), \n",
+        "              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
         "              metrics=['accuracy'])"
       ]
     },
@@ -2863,7 +2849,7 @@
         "id": "FzpAQfJMJF41"
       },
       "source": [
-        "If you pass an infinite dataset, for example by calling `Dataset.repeat()`, you just need to also pass the `steps_per_epoch` argument:"
+        "If you pass an infinite dataset, for example by calling `Dataset.repeat`, you just need to also pass the `steps_per_epoch` argument:"
       ]
     },
     {
@@ -2927,7 +2913,7 @@
         "id": "aZYhJ_YSIl6w"
       },
       "source": [
-        "The labels are not required in when calling `Model.predict`. "
+        "The labels are not required when calling `Model.predict`."
       ]
     },
     {
@@ -2967,8 +2953,8 @@
   ],
   "metadata": {
     "colab": {
-      "collapsed_sections": [],
       "name": "data.ipynb",
+      "provenance": [],
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/guide/data_performance.ipynb b/site/en/guide/data_performance.ipynb
index 78427505020..81d8b3fd5b3 100644
--- a/site/en/guide/data_performance.ipynb
+++ b/site/en/guide/data_performance.ipynb
@@ -274,6 +274,8 @@
       "source": [
         "### Prefetching\n",
         "\n",
+        "<a name=\"prefetching\"></a>\n",
+        "\n",
         "Prefetching overlaps the preprocessing and model execution of a training step.\n",
         "While the model is executing training step `s`, the input pipeline is reading the data for step `s+1`.\n",
         "Doing so reduces the step time to the maximum (as opposed to the sum) of the training and the time it takes to extract the data.\n",
@@ -321,6 +323,8 @@
       "source": [
         "### Parallelizing data extraction\n",
         "\n",
+        "<a name=\"parallelizing_data_extraction\"></a>\n",
+        "\n",
         "In a real-world setting, the input data may be stored remotely (for example, on Google Cloud Storage or HDFS).\n",
         "A dataset pipeline that works well when reading data locally might become bottlenecked on I/O when reading data remotely because of the following differences between local and remote storage:\n",
         "\n",
@@ -420,6 +424,8 @@
       "source": [
         "### Parallelizing data transformation\n",
         "\n",
+        "<a name=\"parallelizing_data_transformation\"></a>\n",
+        "\n",
         "When preparing data, input elements may need to be pre-processed.\n",
         "To this end, the `tf.data` API offers the `tf.data.Dataset.map` transformation, which applies a user-defined function to each element of the input dataset.\n",
         "Because input elements are independent of one another, the pre-processing can be parallelized across multiple CPU cores.\n",
@@ -527,6 +533,8 @@
       "source": [
         "### Caching\n",
         "\n",
+        "<a name=\"caching\"></a>\n",
+        "\n",
         "The `tf.data.Dataset.cache` transformation can cache a dataset, either in memory or on local storage.\n",
         "This will save some operations (like file opening and data reading) from being executed during each epoch."
       ]
@@ -572,6 +580,8 @@
       "source": [
         "### Vectorizing mapping\n",
         "\n",
+        "<a name=\"vectorizing_mapping\"></a>\n",
+        "\n",
         "Invoking a user-defined function passed into the `map` transformation has overhead related to scheduling and executing the user-defined function.\n",
         "Vectorize the user-defined function (that is, have it operate over a batch of inputs at once) and apply the `batch` transformation _before_ the `map` transformation.\n",
         "\n",
@@ -687,6 +697,8 @@
       "source": [
         "### Reducing memory footprint\n",
         "\n",
+        "<a name=\"reducing_memory_footprint\"></a>\n",
+        "\n",
         "A number of transformations, including `interleave`, `prefetch`, and `shuffle`, maintain an internal buffer of elements. If the user-defined function passed into the `map` transformation changes the size of the elements, then the ordering of the map transformation and the transformations that buffer elements affects the memory usage. In general, choose the order that results in lower memory footprint, unless different ordering is desirable for performance.\n",
         "\n",
         "#### Caching partial computations\n",
@@ -713,12 +725,12 @@
         "Here is a summary of the best practices for designing performant TensorFlow\n",
         "input pipelines:\n",
         "\n",
-        "*   [Use the `prefetch` transformation](#Pipelining) to overlap the work of a producer and consumer\n",
-        "*   [Parallelize the data reading transformation](#Parallelizing-data-extraction) using the `interleave` transformation\n",
-        "*   [Parallelize the `map` transformation](#Parallelizing-data-transformation) by setting the `num_parallel_calls` argument\n",
-        "*   [Use the `cache` transformation](#Caching) to cache data in memory during the first epoch\n",
-        "*   [Vectorize user-defined functions](#Map-and-batch) passed in to the `map` transformation\n",
-        "*   [Reduce memory usage](#Reducing-memory-footprint) when applying the `interleave`, `prefetch`, and `shuffle` transformations"
+        "*   [Use the `prefetch` transformation](#prefetching) to overlap the work of a producer and consumer\n",
+        "*   [Parallelize the data reading transformation](#parallelizing_data_extraction) using the `interleave` transformation\n",
+        "*   [Parallelize the `map` transformation](#parallelizing_data_transformation) by setting the `num_parallel_calls` argument\n",
+        "*   [Use the `cache` transformation](#caching) to cache data in memory during the first epoch\n",
+        "*   [Vectorize user-defined functions](#vectorizing_mapping) passed in to the `map` transformation\n",
+        "*   [Reduce memory usage](#reducing_memory_footprint) when applying the `interleave`, `prefetch`, and `shuffle` transformations"
       ]
     },
     {
@@ -1153,7 +1165,6 @@
     "colab": {
       "collapsed_sections": [],
       "name": "data_performance.ipynb",
-      "provenance": [],
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/guide/distributed_training.ipynb b/site/en/guide/distributed_training.ipynb
index 6a0c72f3207..04b7118b1f2 100644
--- a/site/en/guide/distributed_training.ipynb
+++ b/site/en/guide/distributed_training.ipynb
@@ -78,7 +78,7 @@
         "* Provide good performance out of the box.\n",
         "* Easy switching between strategies.\n",
         "\n",
-        "You can distribute training using `tf.distribute.Strategy` with a high-level API like Keras `Model.fit`, as well as [custom training loops](keras/writing_a_training_loop_from_scratch.ipynb) (and, in general, any computation using TensorFlow).\n",
+        "You can distribute training using `tf.distribute.Strategy` with a high-level API like Keras `Model.fit`, as well as [custom training loops](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch) (and, in general, any computation using TensorFlow).\n",
         "\n",
         "In TensorFlow 2.x, you can execute your programs eagerly, or in a graph using [`tf.function`](function.ipynb). `tf.distribute.Strategy` intends to support both these modes of execution, but works best with `tf.function`. Eager mode is only recommended for debugging purposes and not supported for `tf.distribute.TPUStrategy`. Although training is the focus of this guide, this API can also be used for distributing evaluation and prediction on different platforms.\n",
         "\n",
@@ -130,7 +130,7 @@
         "| **Custom training loop** | Supported          | Supported     | Supported                     | Experimental support     | Experimental support      |\n",
         "| **Estimator API**        | Limited Support    | Not supported | Limited Support               | Limited Support          | Limited Support           |\n",
         "\n",
-        "Note: [Experimental support](https://www.tensorflow.org/guide/versions#what_is_not_covered) means the APIs are not covered by any compatibilities guarantees.\n",
+        "Note: [Experimental support](https://www.tensorflow.org/guide/versions#what_is_not_covered) means the APIs are not covered by any compatibility guarantees.\n",
         "\n",
         "Warning: Estimator support is limited. Basic training and evaluation are experimental, and advanced features—such as scaffold—are not implemented. You should be using Keras or custom training loops if a use case is not covered. Estimators are not recommended for new code. Estimators run `v1.Session`-style code which is more difficult to write correctly, and can behave unexpectedly, especially when combined with TF 2 code. Estimators do fall under our [compatibility guarantees](https://tensorflow.org/guide/versions), but will receive no fixes other than security vulnerabilities. Go to the [migration guide](https://tensorflow.org/guide/migrate) for details."
       ]
@@ -421,7 +421,7 @@
       "source": [
         "This strategy serves two main purposes:\n",
         "\n",
-        "* It allows writing distribution-aware library code unconditionally. For example, in `tf.optimizer`s you can use `tf.distribute.get_strategy` and use that strategy for reducing gradients—it will always return a strategy object on which you can call the `Strategy.reduce` API.\n"
+        "* It allows writing distribution-aware library code unconditionally. For example, in `tf.keras.optimizers` you can use `tf.distribute.get_strategy` and use that strategy for reducing gradients—it will always return a strategy object on which you can call the `Strategy.reduce` API.\n"
       ]
     },
     {
@@ -503,14 +503,14 @@
       "source": [
         "## Use tf.distribute.Strategy with Keras Model.fit\n",
         "\n",
-        "`tf.distribute.Strategy` is integrated into `tf.keras`, which is TensorFlow's implementation of the [Keras API specification](https://keras.io). `tf.keras` is a high-level API to build and train models. By integrating into the `tf.keras` backend, it's seamless for you to distribute your training written in the Keras training framework [using Model.fit](/keras/customizing_what_happens_in_fit.ipynb).\n",
+        "`tf.distribute.Strategy` is integrated into `tf.keras`, which is TensorFlow's implementation of the [Keras API specification](https://keras.io/api/). `tf.keras` is a high-level API to build and train models. By integrating into the `tf.keras` backend, it's seamless for you to distribute your training written in the Keras training framework [using Model.fit](https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit).\n",
         "\n",
         "Here's what you need to change in your code:\n",
         "\n",
         "1. Create an instance of the appropriate `tf.distribute.Strategy`.\n",
-        "2. Move the creation of Keras model, optimizer and metrics inside `strategy.scope`.\n",
+        "2. Move the creation of Keras model, optimizer and metrics inside `strategy.scope`. Thus the code in the model's `call()`, `train_step()`, and `test_step()` methods will all be distributed and executed on the accelerator(s).\n",
         "\n",
-        "TensorFlow distribution strategies support all types of Keras models—[Sequential](/keras/sequential_model.ipynb), [Functional](/keras/functional.ipynb), and [subclassed](/keras/custom_layers_and_models.ipynb).\n",
+        "TensorFlow distribution strategies support all types of Keras models—[Sequential](https://www.tensorflow.org/guide/keras/sequential_model), [Functional](https://www.tensorflow.org/guide/keras/functional), and [subclassed](https://www.tensorflow.org/guide/keras/custom_layers_and_models)\n",
         "\n",
         "Here is a snippet of code to do this for a very simple Keras model with one `Dense` layer:"
       ]
@@ -526,9 +526,10 @@
         "mirrored_strategy = tf.distribute.MirroredStrategy()\n",
         "\n",
         "with mirrored_strategy.scope():\n",
-        "  model = tf.keras.Sequential([tf.keras.layers.Dense(1, input_shape=(1,))])\n",
-        "\n",
-        "model.compile(loss='mse', optimizer='sgd')"
+        "  model = tf.keras.Sequential([\n",
+        "      tf.keras.layers.Dense(1, input_shape=(1,),\n",
+        "                            kernel_regularizer=tf.keras.regularizers.L2(1e-4))])\n",
+        "  model.compile(loss='mse', optimizer='sgd')"
       ]
     },
     {
@@ -585,6 +586,17 @@
         "In both cases—with `Dataset` or NumPy—each batch of the given input is divided equally among the multiple replicas. For instance, if you are using the `MirroredStrategy` with 2 GPUs, each batch of size 10 will be divided among the 2 GPUs, with each receiving 5 input examples in each step. Each epoch will then train faster as you add more GPUs. Typically, you would want to increase your batch size as you add more accelerators, so as to make effective use of the extra computing power. You will also need to re-tune your learning rate, depending on the model. You can use `strategy.num_replicas_in_sync` to get the number of replicas."
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "8ZmJqErtS4A1"
+      },
+      "outputs": [],
+      "source": [
+        "mirrored_strategy.num_replicas_in_sync"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -600,7 +612,7 @@
         "dataset = tf.data.Dataset.from_tensors(([1.], [1.])).repeat(100)\n",
         "dataset = dataset.batch(global_batch_size)\n",
         "\n",
-        "LEARNING_RATES_BY_BATCH_SIZE = {5: 0.1, 10: 0.15}\n",
+        "LEARNING_RATES_BY_BATCH_SIZE = {5: 0.1, 10: 0.15, 20:0.175}\n",
         "learning_rate = LEARNING_RATES_BY_BATCH_SIZE[global_batch_size]"
       ]
     },
@@ -636,7 +648,7 @@
       "source": [
         "## Use tf.distribute.Strategy with custom training loops\n",
         "\n",
-        "As demonstrated above, using `tf.distribute.Strategy` with Keras `Model.fit` requires changing only a couple lines of your code. With a little more effort, you can also use `tf.distribute.Strategy` [with custom training loops](/keras/writing_a_training_loop_from_scratch.ipynb).\n",
+        "As demonstrated above, using `tf.distribute.Strategy` with Keras `Model.fit` requires changing only a couple lines of your code. With a little more effort, you can also use `tf.distribute.Strategy` [with custom training loops](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch).\n",
         "\n",
         "If you need more flexibility and control over your training loops than is possible with Estimator or Keras, you can write custom training loops. For instance, when using a GAN, you may want to take a different number of generator or discriminator steps each round. Similarly, the high level frameworks are not very suitable for Reinforcement Learning training.\n",
         "\n",
@@ -663,7 +675,9 @@
       "outputs": [],
       "source": [
         "with mirrored_strategy.scope():\n",
-        "  model = tf.keras.Sequential([tf.keras.layers.Dense(1, input_shape=(1,))])\n",
+        "  model = tf.keras.Sequential([\n",
+        "      tf.keras.layers.Dense(1, input_shape=(1,),\n",
+        "                            kernel_regularizer=tf.keras.regularizers.L2(1e-4))])\n",
         "  optimizer = tf.keras.optimizers.SGD()"
       ]
     },
@@ -684,7 +698,7 @@
       },
       "outputs": [],
       "source": [
-        "dataset = tf.data.Dataset.from_tensors(([1.], [1.])).repeat(100).batch(\n",
+        "dataset = tf.data.Dataset.from_tensors(([1.], [1.])).repeat(1000).batch(\n",
         "    global_batch_size)\n",
         "dist_dataset = mirrored_strategy.experimental_distribute_dataset(dataset)"
       ]
@@ -706,20 +720,21 @@
       },
       "outputs": [],
       "source": [
+        "# Sets `reduction=NONE` to leave it to tf.nn.compute_average_loss() below.\n",
         "loss_object = tf.keras.losses.BinaryCrossentropy(\n",
         "  from_logits=True,\n",
         "  reduction=tf.keras.losses.Reduction.NONE)\n",
         "\n",
-        "def compute_loss(labels, predictions):\n",
-        "  per_example_loss = loss_object(labels, predictions)\n",
-        "  return tf.nn.compute_average_loss(per_example_loss, global_batch_size=global_batch_size)\n",
-        "\n",
         "def train_step(inputs):\n",
         "  features, labels = inputs\n",
         "\n",
         "  with tf.GradientTape() as tape:\n",
         "    predictions = model(features, training=True)\n",
-        "    loss = compute_loss(labels, predictions)\n",
+        "    per_example_loss = loss_object(labels, predictions)\n",
+        "    loss = tf.nn.compute_average_loss(per_example_loss)\n",
+        "    model_losses = model.losses\n",
+        "    if model_losses:\n",
+        "      loss += tf.nn.scale_regularization_loss(tf.add_n(model_losses))\n",
         "\n",
         "  gradients = tape.gradient(loss, model.trainable_variables)\n",
         "  optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n",
@@ -740,9 +755,16 @@
       "source": [
         "A few other things to note in the code above:\n",
         "\n",
-        "1. You used `tf.nn.compute_average_loss` to compute the loss. `tf.nn.compute_average_loss` sums the per example loss and divides the sum by the `global_batch_size`. This is important because later after the gradients are calculated on each replica, they are aggregated across the replicas by **summing** them.\n",
-        "2. You also used the `tf.distribute.Strategy.reduce` API to aggregate the results returned by `tf.distribute.Strategy.run`. `tf.distribute.Strategy.run` returns results from each local replica in the strategy, and there are multiple ways to consume this result. You can `reduce` them to get an aggregated value. You can also do `tf.distribute.Strategy.experimental_local_results` to get the list of values contained in the result, one per local replica.\n",
-        "3. When you call `apply_gradients` within a distribution strategy scope, its behavior is modified. Specifically, before applying gradients on each parallel instance during synchronous training, it performs a sum-over-all-replicas of the gradients.\n"
+        "  1. You used `tf.nn.compute_average_loss` to reduce the per-example prediction losses to a scalar. `tf.nn.compute_average_loss` sums the per example loss and divides the sum by the global batch size. This is important because later after the gradients are calculated on each replica, they are aggregated across the replicas by **summing** them.\n",
+        "\n",
+        "  By default, the global batch size is taken to be `tf.get_strategy().num_replicas_in_sync * tf.shape(per_example_loss)[0]`. It can also be specified explicitly as a keyword argument `global_batch_size=`. Without short batches, the default is equivalent to `tf.nn.compute_average_loss(..., global_batch_size=global_batch_size)` with the `global_batch_size` defined above. (For more on short batches and how to avoid or handle them, see the [Custom Training tutorial](../tutorials/distribute/custom_training.ipynb).)\n",
+        "\n",
+        "  2. You used `tf.nn.scale_regularization_loss` to scale regularization losses registered with the `Model` object, if any, by `1/num_replicas_in_sync` as well. For those regularization losses that are input-dependent, it falls on the modeling code, not the custom training loop, to perform the averaging over the per-replica(!) batch size; that way the modeling code can remain agnostic of replication while the training loop remains agnostic of how regularization losses are computed.\n",
+        "\n",
+        "  3. When you call `apply_gradients` within a distribution strategy scope, its behavior is modified. Specifically, before applying gradients on each parallel instance during synchronous training, it performs a sum-over-all-replicas of the gradients.\n",
+        "\n",
+        "  4. You also used the `tf.distribute.Strategy.reduce` API to aggregate the results returned by `tf.distribute.Strategy.run` for reporting. `tf.distribute.Strategy.run` returns results from each local replica in the strategy, and there are multiple ways to consume this result. You can `reduce` them to get an aggregated value. You can also do `tf.distribute.Strategy.experimental_local_results` to get the list of values contained in the result, one per local replica.\n",
+        "\n"
       ]
     },
     {
diff --git a/site/en/guide/dtensor_overview.ipynb b/site/en/guide/dtensor_overview.ipynb
new file mode 100644
index 00000000000..1b55ee0283f
--- /dev/null
+++ b/site/en/guide/dtensor_overview.ipynb
@@ -0,0 +1,1082 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "1ljvLya59ep5"
+      },
+      "source": [
+        "##### Copyright 2019 The TensorFlow Authors.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "tuOe1ymfHZPu"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "VcQIa1uG86Wh"
+      },
+      "source": [
+        "# DTensor concepts"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6dWNQEum9AfY"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/guide/dtensor_overview\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/guide/dtensor_overview.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/guide/dtensor_overview.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/guide/dtensor_overview.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MGZuakHVlVQf"
+      },
+      "source": [
+        "## Overview\n",
+        "\n",
+        "This colab introduces DTensor, an extension to TensorFlow for synchronous distributed computing.\n",
+        "\n",
+        "DTensor provides a global programming model that allows developers to compose applications that operate on Tensors globally while managing the distribution across devices internally. DTensor distributes the program and tensors according to the sharding directives through a procedure called *[Single program, multiple data (SPMD)](https://en.wikipedia.org/wiki/SPMD) expansion*.\n",
+        "\n",
+        "By decoupling the application from sharding directives, DTensor enables running the same application on a single device, multiple devices, or even multiple clients, while preserving its global semantics.\n",
+        "\n",
+        "This guide introduces DTensor concepts for distributed computing, and how DTensor integrates with TensorFlow. For a demo of using DTensor in model training, refer to the [Distributed training with DTensor](../tutorials/distribute/dtensor_ml_tutorial.ipynb) tutorial."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "h7ZTDq7KngwA"
+      },
+      "source": [
+        "## Setup\n",
+        "\n",
+        "DTensor (`tf.experimental.dtensor`) has been part of TensorFlow since the 2.9.0 release.\n",
+        "\n",
+        "Begin by importing TensorFlow, `dtensor`, and configure TensorFlow to use 6 virtual CPUs. Even though this example uses virtual CPUs, DTensor works the same way on CPU, GPU or TPU devices."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Q92lo0zjwej8"
+      },
+      "outputs": [],
+      "source": [
+        "import tensorflow as tf\n",
+        "from tensorflow.experimental import dtensor\n",
+        "\n",
+        "print('TensorFlow version:', tf.__version__)\n",
+        "\n",
+        "def configure_virtual_cpus(ncpu):\n",
+        "  phy_devices = tf.config.list_physical_devices('CPU')\n",
+        "  tf.config.set_logical_device_configuration(phy_devices[0], [\n",
+        "        tf.config.LogicalDeviceConfiguration(),\n",
+        "    ] * ncpu)\n",
+        "\n",
+        "configure_virtual_cpus(6)\n",
+        "DEVICES = [f'CPU:{i}' for i in range(6)]\n",
+        "\n",
+        "tf.config.list_logical_devices('CPU')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "O-lsrxUnlsCC"
+      },
+      "source": [
+        "## DTensor's model of distributed tensors\n",
+        "\n",
+        "DTensor introduces two concepts: `dtensor.Mesh` and `dtensor.Layout`. They are abstractions to model the sharding of tensors across topologically related devices.\n",
+        "\n",
+        "- `Mesh` defines the device list for computation.\n",
+        "- `Layout` defines how to shard the Tensor dimension on a `Mesh`."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JjiHaH0ql9yo"
+      },
+      "source": [
+        "### Mesh\n",
+        "\n",
+        "`Mesh` represents a logical Cartisian topology of a set of devices. Each dimension of the Cartisian grid is called a **Mesh dimension**, and referred to with a name. Names of mesh dimension within the same `Mesh` must be unique.\n",
+        "\n",
+        "Names of mesh dimensions are referenced by `Layout` to describe the sharding behavior of a `tf.Tensor` along each of its axes. This is described in more detail later in the section on `Layout`.\n",
+        "\n",
+        "`Mesh` can be thought of as a multi-dimensional array of devices."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_J6cOieEbaUw"
+      },
+      "source": [
+        "In a 1 dimensional `Mesh`, all devices form a list in a single mesh dimension. The following example uses `dtensor.create_mesh` to create a mesh from 6 CPU devices along a mesh dimension `'x'` with a size of 6 devices:\n",
+        "\n",
+        "<img src=\"https://www.tensorflow.org/images/dtensor/dtensor_mesh_1d.png\" alt=\"A 1 dimensional mesh with 6 CPUs\" class=\"no-filter\">\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "QLH5fgdBmA58"
+      },
+      "outputs": [],
+      "source": [
+        "mesh_1d = dtensor.create_mesh([('x', 6)], devices=DEVICES)\n",
+        "print(mesh_1d)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hSZwaUwnEgXB"
+      },
+      "source": [
+        "A `Mesh` can be multi dimensional as well. In the following example, 6 CPU devices form a `3x2` mesh, where the `'x'` mesh dimension has a size of 3 devices, and the `'y'` mesh dimension has a size of 2 devices:\n",
+        "\n",
+        "<img src=\"https://www.tensorflow.org/images/dtensor/dtensor_mesh_2d.png\" alt=\"A 2 dimensional mesh with 6 CPUs\"\n",
+        "     class=\"no-filter\">"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "op6TmKUQE-sZ"
+      },
+      "outputs": [],
+      "source": [
+        "mesh_2d = dtensor.create_mesh([('x', 3), ('y', 2)], devices=DEVICES)\n",
+        "print(mesh_2d)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "deAqdrDPFn2f"
+      },
+      "source": [
+        "### Layout\n",
+        "\n",
+        "**`Layout`** specifies how a tensor is distributed, or sharded, on a `Mesh`.\n",
+        "\n",
+        "Note: In order to avoid confusions between `Mesh` and `Layout`, the term *dimension* is always associated with `Mesh`, and the term *axis* with `Tensor` and `Layout` in this guide.\n",
+        "\n",
+        "The rank of `Layout` should be the same as the rank of the `Tensor` where the `Layout` is applied. For each of the `Tensor`'s axes the `Layout` may specify a mesh dimension to shard the tensor across, or specify the axis as \"unsharded\".\n",
+        "The tensor is replicated across any mesh dimensions that it is not sharded across.\n",
+        "\n",
+        "The rank of a `Layout` and the number of dimensions of a `Mesh` do not need to match. The `unsharded` axes of a `Layout` do not need to be associated to a mesh dimension, and `unsharded` mesh dimensions do not need to be associated with a `layout` axis.\n",
+        "\n",
+        "<img src=\"https://www.tensorflow.org/images/dtensor/dtensor_components_diag.png\" alt=\"Diagram of dtensor components.\"\n",
+        "     class=\"no-filter\">"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Px_bF1c-bQ7e"
+      },
+      "source": [
+        "Let's analyze a few examples of `Layout` for the `Mesh`'s created in the previous section."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fqzCNlWAbm-c"
+      },
+      "source": [
+        "On a 1-dimensional mesh such as `[(\"x\", 6)]` (`mesh_1d` in the previous section), `Layout([\"unsharded\", \"unsharded\"], mesh_1d)` is a layout for a rank-2 tensor replicated across 6 devices.\n",
+        "<img src=\"https://www.tensorflow.org/images/dtensor/dtensor_layout_replicated.png\" alt=\"A tensor replicated across a rank-1 mesh\" class=\"no-filter\">"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "-a3EnmZag6x1"
+      },
+      "outputs": [],
+      "source": [
+        "layout = dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh_1d)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ywRJwuLDt2Qq"
+      },
+      "source": [
+        "Using the same tensor and mesh the layout `Layout(['unsharded', 'x'])` would shard the second axis of the tensor across the 6 devices.\n",
+        "\n",
+        "<img src=\"https://www.tensorflow.org/images/dtensor/dtensor_layout_rank1.png\" alt=\"A tensor sharded across a rank-1 mesh\" class=\"no-filter\">"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "7BgqL0jUvV5a"
+      },
+      "outputs": [],
+      "source": [
+        "layout = dtensor.Layout([dtensor.UNSHARDED, 'x'], mesh_1d)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DgciDNmK76l9"
+      },
+      "source": [
+        "Given a 2-dimensional 3x2 mesh such as `[(\"x\", 3), (\"y\", 2)]`, (`mesh_2d` from the previous section), `Layout([\"y\", \"x\"], mesh_2d)` is a layout for a rank-2 `Tensor` whose first axis is sharded across mesh dimension `\"y\"`, and whose second axis is sharded across mesh dimension `\"x\"`."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Eyp_qOSyvieo"
+      },
+      "source": [
+        "<img src=\"https://www.tensorflow.org/images/dtensor/dtensor_layout_rank2.png\" alt=\"A tensorr with its first axis sharded across mesh dimension 'y' and it's second axis sharded across mesh dimension 'x'\" class=\"no-filter\">\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "p8OrehEuhPbS"
+      },
+      "outputs": [],
+      "source": [
+        "layout = dtensor.Layout(['y', 'x'], mesh_2d)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "1Kyg0V3ehMNJ"
+      },
+      "source": [
+        "For the same `mesh_2d`, the layout `Layout([\"x\", dtensor.UNSHARDED], mesh_2d)` is a layout for a rank-2 `Tensor` that is replicated across `\"y\"`, and whose first axis is sharded on mesh dimension `x`.\n",
+        "\n",
+        "<img src=\"https://www.tensorflow.org/images/dtensor/dtensor_layout_hybrid.png\" alt=\"A tensor replicated across mesh-dimension y, with its first axis sharded across mesh dimension 'x'\" class=\"no-filter\">\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "IkWe6mVl7uRb"
+      },
+      "outputs": [],
+      "source": [
+        "layout = dtensor.Layout([\"x\", dtensor.UNSHARDED], mesh_2d)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "TTalu6M-ISYb"
+      },
+      "source": [
+        "### Single-client and multi-client applications\n",
+        "\n",
+        "DTensor supports both single-client and multi-client applications. The colab Python kernel is an example of a single client DTensor application, where there is a single Python process.\n",
+        "\n",
+        "In a multi-client DTensor application, multiple Python processes collectively perform as a coherent application. The Cartisian grid of a `Mesh` in a multi-client DTensor application can span across devices regardless of whether they are attached locally to the current client or attached remotely to another client. The set of all devices used by a `Mesh` are called the *global device list*.\n",
+        "\n",
+        "The creation of a `Mesh` in a multi-client DTensor application is a collective operation where the *global device list* is identical for all of the participating clients, and the creation of the `Mesh` serves as a global barrier.\n",
+        "\n",
+        "During `Mesh` creation, each client provides its *local device list* together with the expected *global device list*. DTensor validates that both lists are consistent. Please refer to the API documentation for `dtensor.create_mesh` and `dtensor.create_distributed_mesh`\n",
+        " for more information on multi-client mesh creation and the *global device list*.\n",
+        "\n",
+        "Single-client can be thought of as a special case of multi-client, with 1 client. In a single-client application, the *global device list* is identical to the *local device list*.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "P_F7DWkXkB4w"
+      },
+      "source": [
+        "## DTensor as a sharded tensor\n",
+        "\n",
+        "Now, start coding with `DTensor`. The helper function, `dtensor_from_array`, demonstrates creating DTensors from something that looks like a `tf.Tensor`. The function performs two steps:\n",
+        "\n",
+        "  - Replicates the tensor to every device on the mesh.\n",
+        "  - Shards the copy according to the layout requested in its arguments."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "s6aws-b8dN9L"
+      },
+      "outputs": [],
+      "source": [
+        "def dtensor_from_array(arr, layout, shape=None, dtype=None):\n",
+        "  \"\"\"Convert a DTensor from something that looks like an array or Tensor.\n",
+        "\n",
+        "  This function is convenient for quick doodling DTensors from a known,\n",
+        "  unsharded data object in a single-client environment. This is not the\n",
+        "  most efficient way of creating a DTensor, but it will do for this\n",
+        "  tutorial.\n",
+        "  \"\"\"\n",
+        "  if shape is not None or dtype is not None:\n",
+        "    arr = tf.constant(arr, shape=shape, dtype=dtype)\n",
+        "\n",
+        "  # replicate the input to the mesh\n",
+        "  a = dtensor.copy_to_mesh(arr,\n",
+        "          layout=dtensor.Layout.replicated(layout.mesh, rank=layout.rank))\n",
+        "  # shard the copy to the desirable layout\n",
+        "  return dtensor.relayout(a, layout=layout)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "r3o6IysrlGMu"
+      },
+      "source": [
+        "### Anatomy of a DTensor\n",
+        "\n",
+        "A DTensor is a `tf.Tensor` object, but augumented with the `Layout` annotation that defines its sharding behavior. A DTensor consists of the following:\n",
+        "\n",
+        "  - Global tensor meta-data, including the global shape and dtype of the tensor.\n",
+        "  - A `Layout`, which defines the `Mesh` the `Tensor` belongs to, and how the `Tensor` is sharded onto the `Mesh`.\n",
+        "  - A list of **component tensors**, one item per local device in the `Mesh`.\n",
+        "\n",
+        "With `dtensor_from_array`, you can create your first DTensor, `my_first_dtensor`, and examine its contents:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "mQu_nScGUvYH"
+      },
+      "outputs": [],
+      "source": [
+        "mesh = dtensor.create_mesh([(\"x\", 6)], devices=DEVICES)\n",
+        "layout = dtensor.Layout([dtensor.UNSHARDED], mesh)\n",
+        "\n",
+        "my_first_dtensor = dtensor_from_array([0, 1], layout)\n",
+        "\n",
+        "# Examine the DTensor content\n",
+        "print(my_first_dtensor)\n",
+        "print(\"global shape:\", my_first_dtensor.shape)\n",
+        "print(\"dtype:\", my_first_dtensor.dtype)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "r8LQy1nqmvFy"
+      },
+      "source": [
+        "#### Layout and `fetch_layout`\n",
+        "\n",
+        "The layout of a DTensor is not a regular attribute of `tf.Tensor`. Instead, DTensor provides a function, `dtensor.fetch_layout` to access the layout of a DTensor:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dCSFyaAjmzGu"
+      },
+      "outputs": [],
+      "source": [
+        "print(dtensor.fetch_layout(my_first_dtensor))\n",
+        "assert layout == dtensor.fetch_layout(my_first_dtensor)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ed7i3l2lmatm"
+      },
+      "source": [
+        "#### Component tensors, `pack` and `unpack`\n",
+        "\n",
+        "A DTensor consists of a list of **component tensors**. The component tensor for a device in the `Mesh` is the `Tensor` object representing the piece of the global DTensor that is stored on this device.\n",
+        "\n",
+        "A DTensor can be unpacked into component tensors through `dtensor.unpack`. You can make use of `dtensor.unpack` to inspect the components of the DTensor, and confirm they are on all devices of the `Mesh`.\n",
+        "\n",
+        "Note that the positions of component tensors in the global view may overlap each other. For example, in the case of a fully replicated layout, all components are identical replicas of the global tensor."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "BGbjqVAOnXMk"
+      },
+      "outputs": [],
+      "source": [
+        "for component_tensor in dtensor.unpack(my_first_dtensor):\n",
+        "  print(\"Device:\", component_tensor.device, \",\", component_tensor)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-tqIQM52k788"
+      },
+      "source": [
+        "As shown, `my_first_dtensor` is a tensor of `[0, 1]` replicated to all 6 devices."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6By3k-CGn3yv"
+      },
+      "source": [
+        "The inverse operation of `dtensor.unpack` is `dtensor.pack`. Component tensors can be packed back into a DTensor.\n",
+        "\n",
+        "The components must have the same rank and dtype, which will be the rank and dtype of the returned DTensor. However, there is no strict requirement on the device placement of component tensors as inputs of `dtensor.unpack`: the function will automatically copy the component tensors to their respective corresponding devices.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9lT-6qQwxOgf"
+      },
+      "outputs": [],
+      "source": [
+        "packed_dtensor = dtensor.pack(\n",
+        "    [[0, 1], [0, 1], [0, 1],\n",
+        "     [0, 1], [0, 1], [0, 1]],\n",
+        "     layout=layout\n",
+        ")\n",
+        "print(packed_dtensor)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "zvS3autrpK2U"
+      },
+      "source": [
+        "### Sharding a DTensor to a Mesh\n",
+        "\n",
+        "So far you've worked with the `my_first_dtensor`, which is a rank-1 DTensor fully replicated across a dim-1 `Mesh`.\n",
+        "\n",
+        "Next, create and inspect DTensors that are sharded across a dim-2 `Mesh`. The following example does this with a 3x2 `Mesh` on 6 CPU devices, where size of mesh dimension `'x'` is 3 devices, and size of mesh dimension`'y'` is 2 devices:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "KWb9Ae0VJ-Rc"
+      },
+      "outputs": [],
+      "source": [
+        "mesh = dtensor.create_mesh([(\"x\", 3), (\"y\", 2)], devices=DEVICES)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ndSeQSFWKQk9"
+      },
+      "source": [
+        "#### Fully sharded rank-2 Tensor on a dim-2 Mesh\n",
+        "\n",
+        "Create a 3x2 rank-2 DTensor, sharding its first axis along the `'x'` mesh dimension, and its second axis along the `'y'` mesh dimension.\n",
+        "\n",
+        "- Because the tensor shape equals to the mesh dimension along all of the sharded axes, each device receives a single element of the DTensor.\n",
+        "- The rank of the component tensor is always the same as the rank of the global shape. DTensor adopts this convention as a simple way to preserve information for locating the relation between a component tensor and the global DTensor."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ax_ZHouJp1MX"
+      },
+      "outputs": [],
+      "source": [
+        "fully_sharded_dtensor = dtensor_from_array(\n",
+        "    tf.reshape(tf.range(6), (3, 2)),\n",
+        "    layout=dtensor.Layout([\"x\", \"y\"], mesh))\n",
+        "\n",
+        "for raw_component in dtensor.unpack(fully_sharded_dtensor):\n",
+        "  print(\"Device:\", raw_component.device, \",\", raw_component)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "zhsLC-NgrC2p"
+      },
+      "source": [
+        "#### Fully replicated rank-2 Tensor on a dim-2 Mesh\n",
+        "\n",
+        "For comparison, create a 3x2 rank-2 DTensor, fully replicated to the same dim-2 Mesh.\n",
+        "\n",
+        " - Because the DTensor is fully replicated, each device receives a full replica of the 3x2 DTensor.\n",
+        " - The rank of the component tensors are the same as the rank of the global shape -- this fact is trivial, because in this case, the shape of the component tensors are the same as the global shape anyway."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "xmyC6H6Ec90P"
+      },
+      "outputs": [],
+      "source": [
+        "fully_replicated_dtensor = dtensor_from_array(\n",
+        "    tf.reshape(tf.range(6), (3, 2)),\n",
+        "    layout=dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh))\n",
+        "# Or, layout=tensor.Layout.fully_replicated(mesh, rank=2)\n",
+        "\n",
+        "for component_tensor in dtensor.unpack(fully_replicated_dtensor):\n",
+        "  print(\"Device:\", component_tensor.device, \",\", component_tensor)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KWoyv_oHMzk1"
+      },
+      "source": [
+        "#### Hybrid rank-2 Tensor on a dim-2 Mesh\n",
+        "\n",
+        "What about somewhere between fully sharded and fully replicated?\n",
+        "\n",
+        "DTensor allows a `Layout` to be a hybrid, sharded along some axes, but replicated along others.\n",
+        "\n",
+        "For example, you can shard the same 3x2 rank-2 DTensor in the following way:\n",
+        "\n",
+        "  - 1st axis sharded along the `'x'` mesh dimension.\n",
+        "  - 2nd axis replicated along the `'y'` mesh dimension.\n",
+        "\n",
+        "To achieve this sharding scheme, you just need to replace the sharding spec of the 2nd axis from `'y'` to `dtensor.UNSHARDED`, to indicate your intention of replicating along the 2nd axis. The layout object will look like `Layout(['x', dtensor.UNSHARDED], mesh)`:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "DygnbkQ1Lu42"
+      },
+      "outputs": [],
+      "source": [
+        "hybrid_sharded_dtensor = dtensor_from_array(\n",
+        "    tf.reshape(tf.range(6), (3, 2)),\n",
+        "    layout=dtensor.Layout(['x', dtensor.UNSHARDED], mesh))\n",
+        "\n",
+        "for component_tensor in dtensor.unpack(hybrid_sharded_dtensor):\n",
+        "  print(\"Device:\", component_tensor.device, \",\", component_tensor)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "T7FtZ9kQRZgE"
+      },
+      "source": [
+        "You can inspect the component tensors of the created DTensor and verify they are indeed sharded according to your scheme. It may be helpful to illustrate the situation with a chart:\n",
+        "\n",
+        "  <img src=\"https://www.tensorflow.org/images/dtensor/dtensor_hybrid_mesh.png\" alt=\"A 3x2 hybrid mesh with 6 CPUs\"\n",
+        "     class=\"no-filter\" width=75%>\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "auAkA38XjL-q"
+      },
+      "source": [
+        "#### Tensor.numpy() and sharded DTensor\n",
+        "\n",
+        "Be aware that calling the `.numpy()` method on a sharded DTensor raises an error. The rationale for erroring is to protect against unintended gathering of data from multiple computing devices to the host CPU device backing the returned NumPy array:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "hNdwmnL0jAXS"
+      },
+      "outputs": [],
+      "source": [
+        "print(fully_replicated_dtensor.numpy())\n",
+        "\n",
+        "try:\n",
+        "  fully_sharded_dtensor.numpy()\n",
+        "except tf.errors.UnimplementedError:\n",
+        "  print(\"got an error as expected for fully_sharded_dtensor\")\n",
+        "\n",
+        "try:\n",
+        "  hybrid_sharded_dtensor.numpy()\n",
+        "except tf.errors.UnimplementedError:\n",
+        "  print(\"got an error as expected for hybrid_sharded_dtensor\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8WcMkiagPF_6"
+      },
+      "source": [
+        "## TensorFlow API on DTensor\n",
+        "\n",
+        "DTensor strives to be a drop-in replacement for tensor in your program. The TensorFlow Python API that consume `tf.Tensor`, such as the Ops library functions, `tf.function`, `tf.GradientTape`, also work with DTensor.\n",
+        "\n",
+        "To accomplish this, for each [TensorFlow Graph](https://www.tensorflow.org/guide/intro_to_graphs), DTensor produces and executes an equivalent [SPMD](https://en.wikipedia.org/wiki/SPMD) graph in a procedure called *SPMD expansion*. A few critical steps in DTensor SPMD expansion are:\n",
+        "\n",
+        "  - Propagating the sharding `Layout` of DTensor in the TensorFlow graph\n",
+        "  - Rewriting TensorFlow Ops on the global DTensor with equivalent TensorFlow Ops on the component tensors, inserting collective and communication Ops when necessary\n",
+        "  - Lowering backend neutral TensorFlow Ops to backend specific TensorFlow Ops.\n",
+        "\n",
+        "The final result is that **DTensor is a drop-in replacement for Tensor**.\n",
+        "\n",
+        "Note: DTensor is still an experimental API which means you will be exploring and pushing the boundaries and limits of the DTensor programming model.\n",
+        "\n",
+        "There are 2 ways of triggering DTensor execution:\n",
+        "\n",
+        "  - DTensor as operands of a Python function, such as `tf.matmul(a, b)`, will run through DTensor if `a`, `b`, or both are DTensors.\n",
+        "  - Requesting the result of a Python function to be a DTensor, such as `dtensor.call_with_layout(tf.ones, layout, shape=(3, 2))`, will run through DTensor because we requested the output of `tf.ones` to be sharded according to a `layout`."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "urKzmqAoPssT"
+      },
+      "source": [
+        "### DTensor as operands\n",
+        "\n",
+        "Many TensorFlow API functions take `tf.Tensor` as their operands, and returns `tf.Tensor` as their results. For these functions, you can express intention to run a function through DTensor by passing in DTensor as operands. This section uses `tf.matmul(a, b)` as an example."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "7LO8ZT7iWVga"
+      },
+      "source": [
+        "#### Fully replicated input and output\n",
+        "\n",
+        "In this case, the DTensors are fully replicated. On each of the devices of the `Mesh`,\n",
+        "  - the component tensor for operand `a` is `[[1, 2, 3], [4, 5, 6]]` (2x3)\n",
+        "  - the component tensor for operand `b` is `[[6, 5], [4, 3], [2, 1]]` (3x2)\n",
+        "  - the computation consists of a single `MatMul` of `(2x3, 3x2) -> 2x2`,\n",
+        "  - the component tensor for result `c` is `[[20, 14], [56,41]]` (2x2)\n",
+        "\n",
+        "Total number of floating point mul operations is `6 device * 4 result * 3 mul = 72`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "TiZf2J9JNd2D"
+      },
+      "outputs": [],
+      "source": [
+        "mesh = dtensor.create_mesh([(\"x\", 6)], devices=DEVICES)\n",
+        "layout = dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh)\n",
+        "a = dtensor_from_array([[1, 2, 3], [4, 5, 6]], layout=layout)\n",
+        "b = dtensor_from_array([[6, 5], [4, 3], [2, 1]], layout=layout)\n",
+        "\n",
+        "c = tf.matmul(a, b) # runs 6 identical matmuls in parallel on 6 devices\n",
+        "\n",
+        "# `c` is a DTensor replicated on all devices (same as `a` and `b`)\n",
+        "print('Sharding spec:', dtensor.fetch_layout(c).sharding_specs)\n",
+        "print(\"components:\")\n",
+        "for component_tensor in dtensor.unpack(c):\n",
+        "  print(component_tensor.device, component_tensor.numpy())"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "QXtR9qgKWgWV"
+      },
+      "source": [
+        "#### Sharding operands along the contracted axis\n",
+        "\n",
+        "You can reduce the amount of computation per device by sharding the operands `a` and `b`. A popular sharding scheme for `tf.matmul` is to shard the operands along the axis of the contraction, which means sharding `a` along the second axis, and `b` along the first axis.\n",
+        "\n",
+        "The global matrix product sharded under this scheme can be performed efficiently, by local matmuls that runs concurrently, followed by a collective reduction to aggregate the local results. This is also the [canonical way](https://github.com/open-mpi/ompi/blob/ee87ec391f48512d3718fc7c8b13596403a09056/docs/man-openmpi/man3/MPI_Reduce.3.rst?plain=1#L265) of implementing a distributed matrix dot product.\n",
+        "\n",
+        "Total number of floating point mul operations is `6 devices * 4 result * 1 = 24`, a factor of 3 reduction compared to the fully replicated case (72) above. The factor of 3 is due to the sharding along `x` mesh dimension with a size of `3` devices.\n",
+        "\n",
+        "The reduction of the number of operations run sequentially is the main mechansism with which synchronuous model parallelism accelerates training."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "EyVAUvMePbms"
+      },
+      "outputs": [],
+      "source": [
+        "mesh = dtensor.create_mesh([(\"x\", 3), (\"y\", 2)], devices=DEVICES)\n",
+        "a_layout = dtensor.Layout([dtensor.UNSHARDED, 'x'], mesh)\n",
+        "a = dtensor_from_array([[1, 2, 3], [4, 5, 6]], layout=a_layout)\n",
+        "b_layout = dtensor.Layout(['x', dtensor.UNSHARDED], mesh)\n",
+        "b = dtensor_from_array([[6, 5], [4, 3], [2, 1]], layout=b_layout)\n",
+        "\n",
+        "c = tf.matmul(a, b)\n",
+        "# `c` is a DTensor replicated on all devices (same as `a` and `b`)\n",
+        "print('Sharding spec:', dtensor.fetch_layout(c).sharding_specs)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "IhD8yYgJiCEh"
+      },
+      "source": [
+        "#### Additional sharding\n",
+        "\n",
+        "You can perform additional sharding on the inputs, and they are appropriately carried over to the results. For example, you can apply additional sharding of operand `a` along its first axis to the `'y'` mesh dimension. The additional sharding will be carried over to the first axis of the result `c`.\n",
+        "\n",
+        "Total number of floating point mul operations is `6 devices * 2 result * 1 = 12`, an additional factor of 2 reduction compared to the case (24) above. The factor of 2 is due to the sharding along `y` mesh dimension with a size of `2` devices."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "0PYqe0neiOpR"
+      },
+      "outputs": [],
+      "source": [
+        "mesh = dtensor.create_mesh([(\"x\", 3), (\"y\", 2)], devices=DEVICES)\n",
+        "\n",
+        "a_layout = dtensor.Layout(['y', 'x'], mesh)\n",
+        "a = dtensor_from_array([[1, 2, 3], [4, 5, 6]], layout=a_layout)\n",
+        "b_layout = dtensor.Layout(['x', dtensor.UNSHARDED], mesh)\n",
+        "b = dtensor_from_array([[6, 5], [4, 3], [2, 1]], layout=b_layout)\n",
+        "\n",
+        "c = tf.matmul(a, b)\n",
+        "# The sharding of `a` on the first axis is carried to `c'\n",
+        "print('Sharding spec:', dtensor.fetch_layout(c).sharding_specs)\n",
+        "print(\"components:\")\n",
+        "for component_tensor in dtensor.unpack(c):\n",
+        "  print(component_tensor.device, component_tensor.numpy())"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "c-1NazCVmLWZ"
+      },
+      "source": [
+        "### DTensor as output\n",
+        "\n",
+        "What about Python functions that do not take operands, but returns a Tensor result that can be sharded? Examples of such functions are:\n",
+        "\n",
+        "  - `tf.ones`, `tf.zeros`, `tf.random.stateless_normal`\n",
+        "\n",
+        "For these Python functions, DTensor provides `dtensor.call_with_layout` which eagerly executes a Python function with DTensor, and ensures that the returned Tensor is a DTensor with the requested `Layout`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "J0jo_8NPtJiO"
+      },
+      "outputs": [],
+      "source": [
+        "help(dtensor.call_with_layout)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "V-YdLvfytM7g"
+      },
+      "source": [
+        "The eagerly executed Python function usually only contain a single non-trivial TensorFlow Op.\n",
+        "\n",
+        "To use a Python function that emits multiple TensorFlow Ops with `dtensor.call_with_layout`, the function should be converted to a `tf.function`. Calling a `tf.function` is a single TensorFlow Op. When the `tf.function` is called, DTensor can perform layout propagation when it analyzes the computing graph of the `tf.function`, before any of the intermediate tensors are materialized."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DLrksgFjqRLS"
+      },
+      "source": [
+        "#### APIs that emit a single TensorFlow Op\n",
+        "\n",
+        "If a function  emits a single TensorFlow Op, you can directly apply `dtensor.call_with_layout` to the function:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "G1CuKYSFtFeM"
+      },
+      "outputs": [],
+      "source": [
+        "help(tf.ones)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2m_EAwy-ozOh"
+      },
+      "outputs": [],
+      "source": [
+        "mesh = dtensor.create_mesh([(\"x\", 3), (\"y\", 2)], devices=DEVICES)\n",
+        "ones = dtensor.call_with_layout(tf.ones, dtensor.Layout(['x', 'y'], mesh), shape=(6, 4))\n",
+        "print(ones)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bx-7Xo8Cpb8S"
+      },
+      "source": [
+        "#### APIs that emit multiple TensorFlow Ops\n",
+        "\n",
+        "If the API emits multiple TensorFlow Ops, convert the function into a single Op through `tf.function`. For example, `tf.random.stateleess_normal`:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "H8BQSTRFtCih"
+      },
+      "outputs": [],
+      "source": [
+        "help(tf.random.stateless_normal)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "TvP81eYopSPm"
+      },
+      "outputs": [],
+      "source": [
+        "ones = dtensor.call_with_layout(\n",
+        "    tf.function(tf.random.stateless_normal),\n",
+        "    dtensor.Layout(['x', 'y'], mesh),\n",
+        "    shape=(6, 4),\n",
+        "    seed=(1, 1))\n",
+        "print(ones)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "qKoojp9ZyWzW"
+      },
+      "source": [
+        "Wrapping a Python function that emits a single TensorFlow Op with `tf.function` is allowed. The only caveat is paying the associated cost and complexity of creating a `tf.function` from a Python function."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "LbAtKrSkpOaq"
+      },
+      "outputs": [],
+      "source": [
+        "ones = dtensor.call_with_layout(\n",
+        "    tf.function(tf.ones),\n",
+        "    dtensor.Layout(['x', 'y'], mesh),\n",
+        "    shape=(6, 4))\n",
+        "print(ones)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "D-m1816JP3CE"
+      },
+      "source": [
+        "### From `tf.Variable` to `dtensor.DVariable`\n",
+        "\n",
+        "In Tensorflow, `tf.Variable` is the holder for a mutable `Tensor` value.\n",
+        "With DTensor, the corresponding variable semantics is provided by `dtensor.DVariable`.\n",
+        "\n",
+        "The reason a new type `DVariable` was introduced for DTensor variable is because DVariables have an additional requirement that the layout cannot change from its initial value."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "awRPuR26P0Sc"
+      },
+      "outputs": [],
+      "source": [
+        "mesh = dtensor.create_mesh([(\"x\", 6)], devices=DEVICES)\n",
+        "layout = dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh)\n",
+        "\n",
+        "v = dtensor.DVariable(\n",
+        "    initial_value=dtensor.call_with_layout(\n",
+        "        tf.function(tf.random.stateless_normal),\n",
+        "        layout=layout,\n",
+        "        shape=tf.TensorShape([64, 32]),\n",
+        "        seed=[1, 1],\n",
+        "        dtype=tf.float32))\n",
+        "\n",
+        "print(v.handle)\n",
+        "assert layout == dtensor.fetch_layout(v)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Pb9jn473prC_"
+      },
+      "source": [
+        "Other than the requirement on matching the `layout`, a `DVariable` behaves the same as a `tf.Variable`. For example, you can add a DVariable to a DTensor,\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "adxFw9wJpqQQ"
+      },
+      "outputs": [],
+      "source": [
+        "a = dtensor.call_with_layout(tf.ones, layout=layout, shape=(64, 32))\n",
+        "b = v + a # add DVariable and DTensor\n",
+        "print(b)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "QxBdNHWSu-kV"
+      },
+      "source": [
+        "You can also assign a DTensor to a DVariable:\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "oYwfiyw5P94U"
+      },
+      "outputs": [],
+      "source": [
+        "v.assign(a) # assign a DTensor to a DVariable\n",
+        "print(a)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4fvSk_VUvGnj"
+      },
+      "source": [
+        "Attempting to mutate the layout of a `DVariable`, by assigning a DTensor with an incompatible layout produces an error:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "3pckUugYP_r-"
+      },
+      "outputs": [],
+      "source": [
+        "# variable's layout is immutable.\n",
+        "another_mesh = dtensor.create_mesh([(\"x\", 3), (\"y\", 2)], devices=DEVICES)\n",
+        "b = dtensor.call_with_layout(tf.ones,\n",
+        "                     layout=dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], another_mesh),\n",
+        "                     shape=(64, 32))\n",
+        "try:\n",
+        "  v.assign(b)\n",
+        "except:\n",
+        "  print(\"exception raised\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3LadIcwRvR6f"
+      },
+      "source": [
+        "## What's next?\n",
+        "\n",
+        "In this colab, you learned about DTensor, an extension to TensorFlow for distributed computing. To try out these concepts in a tutorial, check out [Distributed training with DTensor](../tutorials/distribute/dtensor_ml_tutorial.ipynb)."
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "dtensor_overview.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/guide/eager.ipynb b/site/en/guide/eager.ipynb
deleted file mode 100644
index 44e2e624d43..00000000000
--- a/site/en/guide/eager.ipynb
+++ /dev/null
@@ -1,1146 +0,0 @@
-{
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "CCQY7jpBfMur"
-      },
-      "source": [
-        "##### Copyright 2018 The TensorFlow Authors."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "cellView": "form",
-        "id": "z6X9omPnfO_h"
-      },
-      "outputs": [],
-      "source": [
-        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
-        "# you may not use this file except in compliance with the License.\n",
-        "# You may obtain a copy of the License at\n",
-        "#\n",
-        "# https://www.apache.org/licenses/LICENSE-2.0\n",
-        "#\n",
-        "# Unless required by applicable law or agreed to in writing, software\n",
-        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
-        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
-        "# See the License for the specific language governing permissions and\n",
-        "# limitations under the License."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "2QQJJyDzqGRb"
-      },
-      "source": [
-        "# Eager execution\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "B1xdylywqUSX"
-      },
-      "source": [
-        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/guide/eager\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
-        "  </td>\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/guide/eager.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
-        "  </td>\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/guide/eager.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
-        "  </td>\n",
-        "  <td>\n",
-        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/guide/eager.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
-        "  </td>\n",
-        "</table>"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "EGjDcGxIqEfX"
-      },
-      "source": [
-        "TensorFlow's eager execution is an imperative programming environment that\n",
-        "evaluates operations immediately, without building graphs: operations return\n",
-        "concrete values instead of constructing a computational graph to run later. This\n",
-        "makes it easy to get started with TensorFlow and debug models, and it\n",
-        "reduces boilerplate as well. To follow along with this guide, run the code\n",
-        "samples below in an interactive `python` interpreter.\n",
-        "\n",
-        "Eager execution is a flexible machine learning platform for research and\n",
-        "experimentation, providing:\n",
-        "\n",
-        "* *An intuitive interface*—Structure your code naturally and use Python data\n",
-        "  structures. Quickly iterate on small models and small data.\n",
-        "* *Easier debugging*—Call ops directly to inspect running models and test\n",
-        "  changes. Use standard Python debugging tools for immediate error reporting.\n",
-        "* *Natural control flow*—Use Python control flow instead of graph control\n",
-        "  flow, simplifying the specification of dynamic models.\n",
-        "\n",
-        "Eager execution supports most TensorFlow operations and GPU acceleration.\n",
-        "\n",
-        "Note: Some models may experience increased overhead with eager execution\n",
-        "enabled. Performance improvements are ongoing, but please\n",
-        "[file a bug](https://github.com/tensorflow/tensorflow/issues) if you find a\n",
-        "problem and share your benchmarks."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "RBAeIwOMrYk8"
-      },
-      "source": [
-        "## Setup and basic usage"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "ByNsp4VqqEfa"
-      },
-      "outputs": [],
-      "source": [
-        "import os\n",
-        "\n",
-        "import tensorflow as tf\n",
-        "\n",
-        "import cProfile"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "48P3-8q4qEfe"
-      },
-      "source": [
-        "In Tensorflow 2.0, eager execution is enabled by default."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "7aFsD8csqEff"
-      },
-      "outputs": [],
-      "source": [
-        "tf.executing_eagerly()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "x_G1zZT5qEfh"
-      },
-      "source": [
-        "Now you can run TensorFlow operations and the results will return immediately:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "9gsI54pbqEfj"
-      },
-      "outputs": [],
-      "source": [
-        "x = [[2.]]\n",
-        "m = tf.matmul(x, x)\n",
-        "print(\"hello, {}\".format(m))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ajFn6qsdqEfl"
-      },
-      "source": [
-        "Enabling eager execution changes how TensorFlow operations behave—now they\n",
-        "immediately evaluate and return their values to Python. `tf.Tensor` objects\n",
-        "reference concrete values instead of symbolic handles to nodes in a computational\n",
-        "graph. Since there isn't a computational graph to build and run later in a\n",
-        "session, it's easy to inspect results using `print()` or a debugger. Evaluating,\n",
-        "printing, and checking tensor values does not break the flow for computing\n",
-        "gradients.\n",
-        "\n",
-        "Eager execution works nicely with [NumPy](http://www.numpy.org/). NumPy\n",
-        "operations accept `tf.Tensor` arguments. The TensorFlow\n",
-        "`tf.math` operations convert\n",
-        "Python objects and NumPy arrays to `tf.Tensor` objects. The\n",
-        "`tf.Tensor.numpy` method returns the object's value as a NumPy `ndarray`."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "sTO0_5TYqz1n"
-      },
-      "outputs": [],
-      "source": [
-        "a = tf.constant([[1, 2],\n",
-        "                 [3, 4]])\n",
-        "print(a)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Dp14YT8Gq4r1"
-      },
-      "outputs": [],
-      "source": [
-        "# Broadcasting support\n",
-        "b = tf.add(a, 1)\n",
-        "print(b)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "69p3waMfq8cQ"
-      },
-      "outputs": [],
-      "source": [
-        "# Operator overloading is supported\n",
-        "print(a * b)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Ui025t1qqEfm"
-      },
-      "outputs": [],
-      "source": [
-        "# Use NumPy values\n",
-        "import numpy as np\n",
-        "\n",
-        "c = np.multiply(a, b)\n",
-        "print(c)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Tq_aFRzWrCua"
-      },
-      "outputs": [],
-      "source": [
-        "# Obtain numpy value from a tensor:\n",
-        "print(a.numpy())\n",
-        "# => [[1 2]\n",
-        "#     [3 4]]"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "H08f9ss9qEft"
-      },
-      "source": [
-        "## Dynamic control flow\n",
-        "\n",
-        "A major benefit of eager execution is that all the functionality of the host\n",
-        "language is available while your model is executing. So, for example,\n",
-        "it is easy to write [fizzbuzz](https://en.wikipedia.org/wiki/Fizz_buzz):"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "0fudRMeUqEfu"
-      },
-      "outputs": [],
-      "source": [
-        "def fizzbuzz(max_num):\n",
-        "  counter = tf.constant(0)\n",
-        "  max_num = tf.convert_to_tensor(max_num)\n",
-        "  for num in range(1, max_num.numpy()+1):\n",
-        "    num = tf.constant(num)\n",
-        "    if int(num % 3) == 0 and int(num % 5) == 0:\n",
-        "      print('FizzBuzz')\n",
-        "    elif int(num % 3) == 0:\n",
-        "      print('Fizz')\n",
-        "    elif int(num % 5) == 0:\n",
-        "      print('Buzz')\n",
-        "    else:\n",
-        "      print(num.numpy())\n",
-        "    counter += 1"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "P2cKknQWrJLB"
-      },
-      "outputs": [],
-      "source": [
-        "fizzbuzz(15)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "7kA-aC3BqEfy"
-      },
-      "source": [
-        "This has conditionals that depend on tensor values and it prints these values\n",
-        "at runtime."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "8huKpuuAwICq"
-      },
-      "source": [
-        "## Eager training"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "mp2lCCZYrxHd"
-      },
-      "source": [
-        "### Computing gradients\n",
-        "\n",
-        "[Automatic differentiation](https://en.wikipedia.org/wiki/Automatic_differentiation)\n",
-        "is useful for implementing machine learning algorithms such as\n",
-        "[backpropagation](https://en.wikipedia.org/wiki/Backpropagation) for training\n",
-        "neural networks. During eager execution, use `tf.GradientTape` to trace\n",
-        "operations for computing gradients later.\n",
-        "\n",
-        "You can use `tf.GradientTape` to train and/or compute gradients in eager. It is especially useful for complicated training loops.  \n",
-        "\n",
-        "Since different operations can occur during each call, all\n",
-        "forward-pass operations get recorded to a \"tape\". To compute the gradient, play\n",
-        "the tape backwards and then discard. A particular `tf.GradientTape` can only\n",
-        "compute one gradient; subsequent calls throw a runtime error."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "7g1yWiSXqEf-"
-      },
-      "outputs": [],
-      "source": [
-        "w = tf.Variable([[1.0]])\n",
-        "with tf.GradientTape() as tape:\n",
-        "  loss = w * w\n",
-        "\n",
-        "grad = tape.gradient(loss, w)\n",
-        "print(grad)  # => tf.Tensor([[ 2.]], shape=(1, 1), dtype=float32)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "vkHs32GqweYS"
-      },
-      "source": [
-        "### Train a model\n",
-        "\n",
-        "The following example creates a multi-layer model that classifies the standard\n",
-        "MNIST handwritten digits. It demonstrates the optimizer and layer APIs to build\n",
-        "trainable graphs in an eager execution environment."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "38kymXZowhhz"
-      },
-      "outputs": [],
-      "source": [
-        "# Fetch and format the mnist data\n",
-        "(mnist_images, mnist_labels), _ = tf.keras.datasets.mnist.load_data()\n",
-        "\n",
-        "dataset = tf.data.Dataset.from_tensor_slices(\n",
-        "  (tf.cast(mnist_images[...,tf.newaxis]/255, tf.float32),\n",
-        "   tf.cast(mnist_labels,tf.int64)))\n",
-        "dataset = dataset.shuffle(1000).batch(32)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "rl1K8rOowmwT"
-      },
-      "outputs": [],
-      "source": [
-        "# Build the model\n",
-        "mnist_model = tf.keras.Sequential([\n",
-        "  tf.keras.layers.Conv2D(16,[3,3], activation='relu',\n",
-        "                         input_shape=(None, None, 1)),\n",
-        "  tf.keras.layers.Conv2D(16,[3,3], activation='relu'),\n",
-        "  tf.keras.layers.GlobalAveragePooling2D(),\n",
-        "  tf.keras.layers.Dense(10)\n",
-        "])"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "fvyk-HgGwxwl"
-      },
-      "source": [
-        "Even without training, call the model and inspect the output in eager execution:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "BsxystjBwxLS"
-      },
-      "outputs": [],
-      "source": [
-        "for images,labels in dataset.take(1):\n",
-        "  print(\"Logits: \", mnist_model(images[0:1]).numpy())"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Y3PGa8G7qEgB"
-      },
-      "source": [
-        "While keras models have a builtin training loop (using the `fit` method), sometimes you need more customization. Here's an example, of a training loop implemented with eager:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "bzRhM7JDnaEG"
-      },
-      "outputs": [],
-      "source": [
-        "optimizer = tf.keras.optimizers.Adam()\n",
-        "loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n",
-        "\n",
-        "loss_history = []"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "tXaupYXRI2YM"
-      },
-      "source": [
-        "Note: Use the assert functions in `tf.debugging` to check if a condition holds up. This works in eager and graph execution."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "DDHrigtiCIA4"
-      },
-      "outputs": [],
-      "source": [
-        "def train_step(images, labels):\n",
-        "  with tf.GradientTape() as tape:\n",
-        "    logits = mnist_model(images, training=True)\n",
-        "    \n",
-        "    # Add asserts to check the shape of the output.\n",
-        "    tf.debugging.assert_equal(logits.shape, (32, 10))\n",
-        "    \n",
-        "    loss_value = loss_object(labels, logits)\n",
-        "\n",
-        "  loss_history.append(loss_value.numpy().mean())\n",
-        "  grads = tape.gradient(loss_value, mnist_model.trainable_variables)\n",
-        "  optimizer.apply_gradients(zip(grads, mnist_model.trainable_variables))"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "0m1xAXrmqEgJ"
-      },
-      "outputs": [],
-      "source": [
-        "def train(epochs):\n",
-        "  for epoch in range(epochs):\n",
-        "    for (batch, (images, labels)) in enumerate(dataset):\n",
-        "      train_step(images, labels)\n",
-        "    print ('Epoch {} finished'.format(epoch))"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "C5dGz0p_nf4W"
-      },
-      "outputs": [],
-      "source": [
-        "train(epochs = 3)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "5vG5ql_2vYB5"
-      },
-      "outputs": [],
-      "source": [
-        "import matplotlib.pyplot as plt\n",
-        "\n",
-        "plt.plot(loss_history)\n",
-        "plt.xlabel('Batch #')\n",
-        "plt.ylabel('Loss [entropy]')"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "kKpOlHPLqEgl"
-      },
-      "source": [
-        "### Variables and optimizers\n",
-        "\n",
-        "`tf.Variable` objects store mutable `tf.Tensor`-like values accessed during\n",
-        "training to make automatic differentiation easier. \n",
-        "\n",
-        "The collections of variables can be encapsulated into layers or models, along with methods that operate on them. See [Custom Keras layers and models](./keras/custom_layers_and_models.ipynb) for details. The main difference between layers and models is that models add methods like  `Model.fit`, `Model.evaluate`, and `Model.save`.\n",
-        "\n",
-        "For example, the automatic differentiation example above\n",
-        "can be rewritten:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "2qXcPngYk8dN"
-      },
-      "outputs": [],
-      "source": [
-        "class Linear(tf.keras.Model):\n",
-        "  def __init__(self):\n",
-        "    super(Linear, self).__init__()\n",
-        "    self.W = tf.Variable(5., name='weight')\n",
-        "    self.B = tf.Variable(10., name='bias')\n",
-        "  def call(self, inputs):\n",
-        "    return inputs * self.W + self.B"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "nnQLBYmEqEgm"
-      },
-      "outputs": [],
-      "source": [
-        "# A toy dataset of points around 3 * x + 2\n",
-        "NUM_EXAMPLES = 2000\n",
-        "training_inputs = tf.random.normal([NUM_EXAMPLES])\n",
-        "noise = tf.random.normal([NUM_EXAMPLES])\n",
-        "training_outputs = training_inputs * 3 + 2 + noise\n",
-        "\n",
-        "# The loss function to be optimized\n",
-        "def loss(model, inputs, targets):\n",
-        "  error = model(inputs) - targets\n",
-        "  return tf.reduce_mean(tf.square(error))\n",
-        "\n",
-        "def grad(model, inputs, targets):\n",
-        "  with tf.GradientTape() as tape:\n",
-        "    loss_value = loss(model, inputs, targets)\n",
-        "  return tape.gradient(loss_value, [model.W, model.B])"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Q7x1CDurl3IG"
-      },
-      "source": [
-        "Next:\n",
-        "\n",
-        "1. Create the model.\n",
-        "2. The Derivatives of a loss function with respect to model parameters.\n",
-        "3. A strategy for updating the variables based on the derivatives."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "SbXJk0f2lztg"
-      },
-      "outputs": [],
-      "source": [
-        "model = Linear()\n",
-        "optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)\n",
-        "\n",
-        "print(\"Initial loss: {:.3f}\".format(loss(model, training_inputs, training_outputs)))\n",
-        "\n",
-        "steps = 300\n",
-        "for i in range(steps):\n",
-        "  grads = grad(model, training_inputs, training_outputs)\n",
-        "  optimizer.apply_gradients(zip(grads, [model.W, model.B]))\n",
-        "  if i % 20 == 0:\n",
-        "    print(\"Loss at step {:03d}: {:.3f}\".format(i, loss(model, training_inputs, training_outputs)))"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "PV_dqer7pzSH"
-      },
-      "outputs": [],
-      "source": [
-        "print(\"Final loss: {:.3f}\".format(loss(model, training_inputs, training_outputs)))"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "rvt_Wj3Tp0hm"
-      },
-      "outputs": [],
-      "source": [
-        "print(\"W = {}, B = {}\".format(model.W.numpy(), model.B.numpy()))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "rPjb8nRWqEgr"
-      },
-      "source": [
-        "Note: Variables persist until the last reference to the python object\n",
-        "is removed, and is the variable is deleted."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "scMjg6L6qEgv"
-      },
-      "source": [
-        "### Object-based saving\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Y-0ZcCcjwkux"
-      },
-      "source": [
-        "A `tf.keras.Model` includes a convenient `save_weights` method allowing you to easily create a checkpoint: "
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "oJrMX94PwD9s"
-      },
-      "outputs": [],
-      "source": [
-        "model.save_weights('weights')\n",
-        "status = model.load_weights('weights')"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "2EfTjWV_wEng"
-      },
-      "source": [
-        "Using `tf.train.Checkpoint` you can take full control over this process.\n",
-        "\n",
-        "This section is an abbreviated version of the [guide to training checkpoints](./checkpoint.ipynb).\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "7z5xRfdHzZOQ"
-      },
-      "outputs": [],
-      "source": [
-        "x = tf.Variable(10.)\n",
-        "checkpoint = tf.train.Checkpoint(x=x)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "IffrUVG7zyVb"
-      },
-      "outputs": [],
-      "source": [
-        "x.assign(2.)   # Assign a new value to the variables and save.\n",
-        "checkpoint_path = './ckpt/'\n",
-        "checkpoint.save(checkpoint_path)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "eMT9koCoqEgw"
-      },
-      "outputs": [],
-      "source": [
-        "x.assign(11.)  # Change the variable after saving.\n",
-        "\n",
-        "# Restore values from the checkpoint\n",
-        "checkpoint.restore(tf.train.latest_checkpoint(checkpoint_path))\n",
-        "\n",
-        "print(x)  # => 2.0"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "vbFnP-yLqEgx"
-      },
-      "source": [
-        "To save and load models, `tf.train.Checkpoint` stores the internal state of objects,\n",
-        "without requiring hidden variables. To record the state of a `model`,\n",
-        "an `optimizer`, and a global step, pass them to a `tf.train.Checkpoint`:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "hWZHyAXMqEg0"
-      },
-      "outputs": [],
-      "source": [
-        "model = tf.keras.Sequential([\n",
-        "  tf.keras.layers.Conv2D(16,[3,3], activation='relu'),\n",
-        "  tf.keras.layers.GlobalAveragePooling2D(),\n",
-        "  tf.keras.layers.Dense(10)\n",
-        "])\n",
-        "optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)\n",
-        "checkpoint_dir = 'path/to/model_dir'\n",
-        "if not os.path.exists(checkpoint_dir):\n",
-        "  os.makedirs(checkpoint_dir)\n",
-        "checkpoint_prefix = os.path.join(checkpoint_dir, \"ckpt\")\n",
-        "root = tf.train.Checkpoint(optimizer=optimizer,\n",
-        "                           model=model)\n",
-        "\n",
-        "root.save(checkpoint_prefix)\n",
-        "root.restore(tf.train.latest_checkpoint(checkpoint_dir))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "R-ITwkBCF6GJ"
-      },
-      "source": [
-        "Note: In many training loops, variables are created after `tf.train.Checkpoint.restore` is called. These variables will be restored as soon as they are created, and assertions are available to ensure that a checkpoint has been fully loaded. See the [guide to training checkpoints](./checkpoint.ipynb) for details."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "3yoD0VJ7qEg3"
-      },
-      "source": [
-        "### Object-oriented metrics\n",
-        "\n",
-        "`tf.keras.metrics` are stored as objects. Update a metric by passing the new data to\n",
-        "the callable, and retrieve the result using the `tf.keras.metrics.result` method,\n",
-        "for example:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "9ccu0iAaqEg5"
-      },
-      "outputs": [],
-      "source": [
-        "m = tf.keras.metrics.Mean(\"loss\")\n",
-        "m(0)\n",
-        "m(5)\n",
-        "m.result()  # => 2.5\n",
-        "m([8, 9])\n",
-        "m.result()  # => 5.5"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "aB8qWtT955pI"
-      },
-      "source": [
-        "### Summaries and TensorBoard\n",
-        "\n",
-        "[TensorBoard](https://tensorflow.org/tensorboard) is a visualization tool for\n",
-        "understanding, debugging and optimizing the model training process. It uses\n",
-        "summary events that are written while executing the program.\n",
-        "\n",
-        "You can use `tf.summary` to record summaries of variable in eager execution.\n",
-        "For example, to record summaries of `loss` once every 100 training steps:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "z6VInqhA6RH4"
-      },
-      "outputs": [],
-      "source": [
-        "logdir = \"./tb/\"\n",
-        "writer = tf.summary.create_file_writer(logdir)\n",
-        "\n",
-        "steps = 1000\n",
-        "with writer.as_default():  # or call writer.set_as_default() before the loop.\n",
-        "  for i in range(steps):\n",
-        "    step = i + 1\n",
-        "    # Calculate loss with your real train function.\n",
-        "    loss = 1 - 0.001 * step\n",
-        "    if step % 100 == 0:\n",
-        "      tf.summary.scalar('loss', loss, step=step)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "08QQD2j36TaI"
-      },
-      "outputs": [],
-      "source": [
-        "!ls tb/"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "xEL4yJe5qEhD"
-      },
-      "source": [
-        "## Advanced automatic differentiation topics\n",
-        "\n",
-        "### Dynamic models\n",
-        "\n",
-        "`tf.GradientTape` can also be used in dynamic models. This example for a\n",
-        "[backtracking line search](https://wikipedia.org/wiki/Backtracking_line_search)\n",
-        "algorithm looks like normal NumPy code, except there are gradients and is\n",
-        "differentiable, despite the complex control flow:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "L518n5dkqEhE"
-      },
-      "outputs": [],
-      "source": [
-        "def line_search_step(fn, init_x, rate=1.0):\n",
-        "  with tf.GradientTape() as tape:\n",
-        "    # Variables are automatically tracked.\n",
-        "    # But to calculate a gradient from a tensor, you must `watch` it.\n",
-        "    tape.watch(init_x)\n",
-        "    value = fn(init_x)\n",
-        "  grad = tape.gradient(value, init_x)\n",
-        "  grad_norm = tf.reduce_sum(grad * grad)\n",
-        "  init_value = value\n",
-        "  while value > init_value - rate * grad_norm:\n",
-        "    x = init_x - rate * grad\n",
-        "    value = fn(x)\n",
-        "    rate /= 2.0\n",
-        "  return x, value"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "gieGOf_DqEhK"
-      },
-      "source": [
-        "### Custom gradients\n",
-        "\n",
-        "Custom gradients are an easy way to override gradients. Within the forward function, define the gradient with respect to the\n",
-        "inputs, outputs, or intermediate results. For example, here's an easy way to clip\n",
-        "the norm of the gradients in the backward pass:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "-OwwsWUAqEhK"
-      },
-      "outputs": [],
-      "source": [
-        "@tf.custom_gradient\n",
-        "def clip_gradient_by_norm(x, norm):\n",
-        "  y = tf.identity(x)\n",
-        "  def grad_fn(dresult):\n",
-        "    return [tf.clip_by_norm(dresult, norm), None]\n",
-        "  return y, grad_fn"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "JPLDHkF_qEhN"
-      },
-      "source": [
-        "Custom gradients are commonly used to provide a numerically stable gradient for a\n",
-        "sequence of operations:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "24WiLROnqEhO"
-      },
-      "outputs": [],
-      "source": [
-        "def log1pexp(x):\n",
-        "  return tf.math.log(1 + tf.exp(x))\n",
-        "\n",
-        "def grad_log1pexp(x):\n",
-        "  with tf.GradientTape() as tape:\n",
-        "    tape.watch(x)\n",
-        "    value = log1pexp(x)\n",
-        "  return tape.gradient(value, x)\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "n8fq69r9-B-c"
-      },
-      "outputs": [],
-      "source": [
-        "# The gradient computation works fine at x = 0.\n",
-        "grad_log1pexp(tf.constant(0.)).numpy()"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "_VFSU0mG-FSp"
-      },
-      "outputs": [],
-      "source": [
-        "# However, x = 100 fails because of numerical instability.\n",
-        "grad_log1pexp(tf.constant(100.)).numpy()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "-VcTR34rqEhQ"
-      },
-      "source": [
-        "Here, the `log1pexp` function can be analytically simplified with a custom\n",
-        "gradient. The implementation below reuses the value for `tf.exp(x)` that is\n",
-        "computed during the forward pass—making it more efficient by eliminating\n",
-        "redundant calculations:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Q7nvfx_-qEhS"
-      },
-      "outputs": [],
-      "source": [
-        "@tf.custom_gradient\n",
-        "def log1pexp(x):\n",
-        "  e = tf.exp(x)\n",
-        "  def grad(dy):\n",
-        "    return dy * (1 - 1 / (1 + e))\n",
-        "  return tf.math.log(1 + e), grad\n",
-        "\n",
-        "def grad_log1pexp(x):\n",
-        "  with tf.GradientTape() as tape:\n",
-        "    tape.watch(x)\n",
-        "    value = log1pexp(x)\n",
-        "  return tape.gradient(value, x)\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "5gHPKMfl-Kge"
-      },
-      "outputs": [],
-      "source": [
-        "# As before, the gradient computation works fine at x = 0.\n",
-        "grad_log1pexp(tf.constant(0.)).numpy()"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "u38MOfz3-MDE"
-      },
-      "outputs": [],
-      "source": [
-        "# And the gradient computation also works at x = 100.\n",
-        "grad_log1pexp(tf.constant(100.)).numpy()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "rnZXjfQzqEhV"
-      },
-      "source": [
-        "## Performance\n",
-        "\n",
-        "Computation is automatically offloaded to GPUs during eager execution. If you\n",
-        "want control over where a computation runs you can enclose it in a\n",
-        "`tf.device('/gpu:0')` block (or the CPU equivalent):"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Ac9Y64H-qEhX"
-      },
-      "outputs": [],
-      "source": [
-        "import time\n",
-        "\n",
-        "def measure(x, steps):\n",
-        "  # TensorFlow initializes a GPU the first time it's used, exclude from timing.\n",
-        "  tf.matmul(x, x)\n",
-        "  start = time.time()\n",
-        "  for i in range(steps):\n",
-        "    x = tf.matmul(x, x)\n",
-        "  # tf.matmul can return before completing the matrix multiplication\n",
-        "  # (e.g., can return after enqueing the operation on a CUDA stream).\n",
-        "  # The x.numpy() call below will ensure that all enqueued operations\n",
-        "  # have completed (and will also copy the result to host memory,\n",
-        "  # so we're including a little more than just the matmul operation\n",
-        "  # time).\n",
-        "  _ = x.numpy()\n",
-        "  end = time.time()\n",
-        "  return end - start\n",
-        "\n",
-        "shape = (1000, 1000)\n",
-        "steps = 200\n",
-        "print(\"Time to multiply a {} matrix by itself {} times:\".format(shape, steps))\n",
-        "\n",
-        "# Run on CPU:\n",
-        "with tf.device(\"/cpu:0\"):\n",
-        "  print(\"CPU: {} secs\".format(measure(tf.random.normal(shape), steps)))\n",
-        "\n",
-        "# Run on GPU, if available:\n",
-        "if tf.config.list_physical_devices(\"GPU\"):\n",
-        "  with tf.device(\"/gpu:0\"):\n",
-        "    print(\"GPU: {} secs\".format(measure(tf.random.normal(shape), steps)))\n",
-        "else:\n",
-        "  print(\"GPU: not found\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "RLw3IS7UqEhe"
-      },
-      "source": [
-        "A `tf.Tensor` object can be copied to a different device to execute its\n",
-        "operations:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "ny6LX2BVqEhf"
-      },
-      "outputs": [],
-      "source": [
-        "if tf.config.list_physical_devices(\"GPU\"):\n",
-        "  x = tf.random.normal([10, 10])\n",
-        "\n",
-        "  x_gpu0 = x.gpu()\n",
-        "  x_cpu = x.cpu()\n",
-        "\n",
-        "  _ = tf.matmul(x_cpu, x_cpu)    # Runs on CPU\n",
-        "  _ = tf.matmul(x_gpu0, x_gpu0)  # Runs on GPU:0"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "oA_qaII3-p6c"
-      },
-      "source": [
-        "### Benchmarks\n",
-        "\n",
-        "For compute-heavy models, such as\n",
-        "[ResNet50](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/eager/benchmarks/resnet50)\n",
-        "training on a GPU, eager execution performance is comparable to `tf.function` execution.\n",
-        "But this gap grows larger for models with less computation and there is work to\n",
-        "be done for optimizing hot code paths for models with lots of small operations.\n",
-        "\n",
-        "## Work with functions\n",
-        "\n",
-        "While eager execution makes development and debugging more interactive,\n",
-        "TensorFlow 1.x style graph execution has advantages for distributed training, performance\n",
-        "optimizations, and production deployment. To bridge this gap, TensorFlow 2.0 introduces `function`s via the `tf.function` API. For more information, see the [tf.function](./function.ipynb) guide."
-      ]
-    }
-  ],
-  "metadata": {
-    "accelerator": "GPU",
-    "colab": {
-      "collapsed_sections": [],
-      "name": "eager.ipynb",
-      "toc_visible": true
-    },
-    "kernelspec": {
-      "display_name": "Python 3",
-      "name": "python3"
-    }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 0
-}
diff --git a/site/en/guide/effective_tf2.ipynb b/site/en/guide/effective_tf2.ipynb
index 59f8fab1e3d..f4204c0971f 100644
--- a/site/en/guide/effective_tf2.ipynb
+++ b/site/en/guide/effective_tf2.ipynb
@@ -70,7 +70,7 @@
       "source": [
         "## Overview\n",
         "\n",
-        "This guide provides a list of best practices for writing code using TensorFlow 2 (TF2). Refer to the [migrate section of the guide](https://tensorflow.org/guide/migrate) for more info on migrating your TF1.x code to TF2."
+        "This guide provides a list of best practices for writing code using TensorFlow 2 (TF2), it is written for users who have recently switched over from TensorFlow 1 (TF1).  Refer to the [migrate section of the guide](https://tensorflow.org/guide/migrate) for more info on migrating your TF1 code to TF2."
       ]
     },
     {
@@ -751,8 +751,7 @@
         "\n",
         "* When using `tf.keras.Model.compile`, set `run_eagerly = True` to disable the `Model` logic from being wrapped in a `tf.function`.\n",
         "\n",
-        "* Use `tf.data.experimental.enable_debug_mode` to enable the debug mode for `tf.data`. Read the [API docs](https://www.tensorflow.org/api_docs/python/tf/data/experimental/enable_debug_mode) for more details.\n",
-        "\n"
+        "* Use `tf.data.experimental.enable_debug_mode` to enable the debug mode for `tf.data`. Read the [API docs](https://www.tensorflow.org/api_docs/python/tf/data/experimental/enable_debug_mode) for more details.\n"
       ]
     },
     {
@@ -786,7 +785,6 @@
     "colab": {
       "collapsed_sections": [],
       "name": "effective_tf2.ipynb",
-      "provenance": [],
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/guide/estimator.ipynb b/site/en/guide/estimator.ipynb
index e0ae0a3792f..05e8fb4012a 100644
--- a/site/en/guide/estimator.ipynb
+++ b/site/en/guide/estimator.ipynb
@@ -68,7 +68,7 @@
         "id": "rILQuAiiRlI7"
       },
       "source": [
-        "> Warning: Estimators are not recommended for new code.  Estimators run `v1.Session`-style code which is more difficult to write correctly, and can behave unexpectedly, especially when combined with TF 2 code. Estimators do fall under our [compatibility guarantees](https://tensorflow.org/guide/versions), but will receive no fixes other than security vulnerabilities. See the [migration guide](https://tensorflow.org/guide/migrate) for details."
+        "> Warning: TensorFlow 2.15 included the final release of the `tf-estimator` package. Estimators will not be available in TensorFlow 2.16 or after. See the [migration guide](https://www.tensorflow.org/guide/migrate/migrating_estimator) for more information about how to convert off of Estimators."
       ]
     },
     {
@@ -869,7 +869,6 @@
         "A_lvUsSLZzVg"
       ],
       "name": "estimator.ipynb",
-      "provenance": [],
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/guide/extension_type.ipynb b/site/en/guide/extension_type.ipynb
new file mode 100644
index 00000000000..7e8edeea7c9
--- /dev/null
+++ b/site/en/guide/extension_type.ipynb
@@ -0,0 +1,2130 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WrcIOXsUQh8U"
+      },
+      "source": [
+        "##### Copyright 2021 The TensorFlow Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "tXAbWHtqs1Y2"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "HTgMAvQq-PU_"
+      },
+      "source": [
+        "# Extension types\n",
+        "\n",
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/guide/extension_type\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/guide/extension_type.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/guide/extension_type.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/guide/extension_type.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jHcw9MtgBo7e"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "0MsE_F0WBpmc"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install -q tf_nightly\n",
+        "import tensorflow as tf\n",
+        "import numpy as np\n",
+        "from typing import Tuple, List, Mapping, Union, Optional\n",
+        "import tempfile"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "1BAk3bji_0wl"
+      },
+      "source": [
+        "## Extension types\n",
+        "\n",
+        "User-defined types can make projects more readable, modular, maintainable. However, most TensorFlow APIs have very limited support for user-defined Python types. This includes both high-level APIs (such as [Keras](https://www.tensorflow.org/guide/keras/overview), [tf.function](https://www.tensorflow.org/guide/function), [`tf.SavedModel`](https://www.tensorflow.org/guide/saved_model)) and lower-level APIs (such as `tf.while_loop` and `tf.concat`). TensorFlow **extension types** can be used to create user-defined object-oriented types that work seamlessly with TensorFlow's APIs. To create an extension type, simply define a Python class with `tf.experimental.ExtensionType` as its base, and use [type annotations](https://www.python.org/dev/peps/pep-0484/) to specify the type for each field."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "7o5KY7L5_nxy"
+      },
+      "outputs": [],
+      "source": [
+        "class TensorGraph(tf.experimental.ExtensionType):\n",
+        "  \"\"\"A collection of labeled nodes connected by weighted edges.\"\"\"\n",
+        "  edge_weights: tf.Tensor               # shape=[num_nodes, num_nodes]\n",
+        "  node_labels: Mapping[str, tf.Tensor]  # shape=[num_nodes]; dtype=any\n",
+        "\n",
+        "class MaskedTensor(tf.experimental.ExtensionType):\n",
+        "  \"\"\"A tensor paired with a boolean mask, indicating which values are valid.\"\"\"\n",
+        "  values: tf.Tensor\n",
+        "  mask: tf.Tensor       # shape=values.shape; false for missing/invalid values.\n",
+        "\n",
+        "class CSRSparseMatrix(tf.experimental.ExtensionType):\n",
+        "  \"\"\"Compressed sparse row matrix (https://en.wikipedia.org/wiki/Sparse_matrix).\"\"\"\n",
+        "  values: tf.Tensor     # shape=[num_nonzero]; dtype=any\n",
+        "  col_index: tf.Tensor  # shape=[num_nonzero]; dtype=int64\n",
+        "  row_index: tf.Tensor  # shape=[num_rows+1]; dtype=int64"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "FiaNXPa7pNK-"
+      },
+      "source": [
+        "The `tf.experimental.ExtensionType` base class works similarly to [`typing.NamedTuple`](https://docs.python.org/3/library/typing.html#typing.NamedTuple) and [`@dataclasses.dataclass`](https://docs.python.org/3/library/dataclasses.html#dataclasses.dataclass) from the standard Python library. In particular, it automatically adds a constructor and special methods (such as `__repr__` and `__eq__`) based on the field type annotations."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JsE7X6_uMyLo"
+      },
+      "source": [
+        "Typically, extension types tend to fall into one of two categories:\n",
+        "\n",
+        "* ***Data structures***, which group together a collection of related values, and can provide useful operations based on those values. Data structures may be fairly general (such as the `TensorGraph` example above); or they may be highly customized to a specific model.\n",
+        "\n",
+        "* ***Tensor-like types***, which specialize or extend the concept of \"Tensor.\" Types in this category have a `rank`, a `shape`, and usually a `dtype`; and it makes sense to use them with Tensor operations (such as `tf.stack`, `tf.add`, or `tf.matmul`). `MaskedTensor` and `CSRSparseMatrix` are examples of tensor-like types."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "uxngcajlMqIY"
+      },
+      "source": [
+        "## Supported APIs\n",
+        "\n",
+        "Extension types are supported by the following TensorFlow APIs:\n",
+        "\n",
+        "* **Keras**: Extension types can be used as inputs and outputs for Keras `Models` and `Layers`.\n",
+        "* **`tf.data.Dataset`**: Extension types can be included in `Datasets`, and returned by dataset `Iterators`.\n",
+        "* **TensorFlow Hub**: Extension types can be used as inputs and outputs for `tf.hub` modules.\n",
+        "* **SavedModel**: Extension types can be used as inputs and outputs for `SavedModel` functions.\n",
+        "* **`tf.function`**: Extension types can be used as arguments and return values for functions wrapped with the `@tf.function` decorator.\n",
+        "* **While loops**: Extension types can be used as loop variables in `tf.while_loop`, and can be used as arguments and return values for the while-loop's body.\n",
+        "* **Conditionals**: Extension types can be conditionally selected using `tf.cond` and `tf.case`.\n",
+        "* **`tf.py_function`**: Extension types can be used as arguments and return values for the `func` argument to `tf.py_function`.\n",
+        "* **Tensor ops**: Extension types can be extended to support most TensorFlow ops that accept Tensor inputs (such as `tf.matmul`, `tf.gather`, and `tf.reduce_sum`). Go to the \"*Dispatch*\" section below for more information.\n",
+        "* **Distribution strategy**: Extension types can be used as per-replica values.\n",
+        "\n",
+        "For more details, see the section on \"TensorFlow APIs that support ExtensionTypes\" below.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "VIpZwuPVpwOX"
+      },
+      "source": [
+        "## Requirements\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nNk_TQeJGVwV"
+      },
+      "source": [
+        "### Field types\n",
+        "\n",
+        "All fields—instance variables—must be declared, and a type annotation must be provided for each field. The following type annotations are supported:\n",
+        "\n",
+        "Type | Example\n",
+        "---- | -------\n",
+        "Python integers | `i: int`\n",
+        "Python floats | `f: float`\n",
+        "Python strings | `s: str`\n",
+        "Python booleans | `b: bool`\n",
+        "Python `None` | `n: None`\n",
+        "[Tensor shapes](https://www.tensorflow.org/api_docs/python/tf/TensorShape) | `shape: tf.TensorShape`\n",
+        "[Tensor `dtype`s](https://www.tensorflow.org/api_docs/python/tf/dtypes/DType) | `dtype: tf.DType`\n",
+        "[Tensors](https://www.tensorflow.org/api_docs/python/tf/Tensor) | `t: tf.Tensor`\n",
+        "[Extension types](https://www.tensorflow.org/api_docs/python/tf/experimental/ExtensionType) | `mt: MyMaskedTensor`\n",
+        "[Ragged tensors](https://www.tensorflow.org/api_docs/python/tf/RaggedTensor) | `rt: tf.RaggedTensor`\n",
+        "[Sparse tensors](https://www.tensorflow.org/api_docs/python/tf/sparse/SparseTensor) | `st: tf.SparseTensor`\n",
+        "[Indexed slices](https://www.tensorflow.org/api_docs/python/tf/IndexedSlices) | `s: tf.IndexedSlices`\n",
+        "[Optional tensors](https://www.tensorflow.org/api_docs/python/tf/experimental/Optional) | `o: tf.experimental.Optional`\n",
+        "[Type unions](https://docs.python.org/3/library/typing.html#typing.Union) | `int_or_float: typing.Union[int, float]`\n",
+        "[Tuples](https://docs.python.org/3/library/typing.html#typing.Tuple) | `params: typing.Tuple[int, float, tf.Tensor, int]`\n",
+        "[Var-length tuples](https://docs.python.org/3/library/typing.html#typing.Tuple) | `lengths: typing.Tuple[int, ...]`\n",
+        "[Mappings](https://docs.python.org/3/library/typing.html#typing.Mapping) | `tags: typing.Mapping[str, tf.Tensor]`\n",
+        "[Optional values](https://docs.python.org/3/library/typing.html#typing.Optional) | `weight: typing.Optional[tf.Tensor]`"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "iFetYyZsIvf6"
+      },
+      "source": [
+        "### Mutability\n",
+        "\n",
+        "Extension types are required to be immutable. This ensures that they can be properly tracked by TensorFlow's graph-tracing mechanisms.\n",
+        "If you find yourself wanting to mutate an extension type value, consider instead defining methods that transform values. For example, rather than defining a `set_mask` method to mutate a `MaskedTensor`, you could define a `replace_mask` method that returns a new `MaskedTensor`:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "DThZLYH2IwFh"
+      },
+      "outputs": [],
+      "source": [
+        "class MaskedTensor(tf.experimental.ExtensionType):\n",
+        "  values: tf.Tensor\n",
+        "  mask: tf.Tensor\n",
+        "\n",
+        "  def replace_mask(self, new_mask):\n",
+        "      self.values.shape.assert_is_compatible_with(new_mask.shape)\n",
+        "      return MaskedTensor(self.values, new_mask)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "x3JyivI_qAtt"
+      },
+      "source": [
+        "## Functionality added by `ExtensionType`\n",
+        "\n",
+        "The `ExtensionType` base class provides the following functionality:\n",
+        "\n",
+        "* A constructor (`__init__`).\n",
+        "* A printable representation method (`__repr__`).\n",
+        "* Equality and inequality operators (`__eq__`).\n",
+        "* A validation method (`__validate__`).\n",
+        "* Enforced immutability.\n",
+        "* A nested `TypeSpec`.\n",
+        "* Tensor API dispatch support.\n",
+        "\n",
+        "Go to the \"Customizing `ExtensionType`s\" section below for more information on customizing this functionality."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "pfSYs6P26gKq"
+      },
+      "source": [
+        "### Constructor\n",
+        "The constructor added by `ExtensionType` takes each field as a named argument (in the order they were listed in the class definition). This constructor will type-check each parameter, and convert them where necessary. In particular, `Tensor` fields are converted using `tf.convert_to_tensor`; `Tuple` fields are converted to `tuple`s; and `Mapping` fields are converted to immutable dicts."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "DiXwyZ5M5KFW"
+      },
+      "outputs": [],
+      "source": [
+        "class MaskedTensor(tf.experimental.ExtensionType):\n",
+        "  values: tf.Tensor\n",
+        "  mask: tf.Tensor\n",
+        "\n",
+        "# Constructor takes one parameter for each field.\n",
+        "mt = MaskedTensor(values=[[1, 2, 3], [4, 5, 6]],\n",
+        "                  mask=[[True, True, False], [True, False, True]])\n",
+        "\n",
+        "# Fields are type-checked and converted to the declared types.\n",
+        "# For example, `mt.values` is converted to a Tensor.\n",
+        "print(mt.values)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ezNDe1cYF0Qb"
+      },
+      "source": [
+        "The constructor raises an `TypeError` if a field value can not be converted to its declared type:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "6HnrMaabF5VS"
+      },
+      "outputs": [],
+      "source": [
+        "try:\n",
+        "  MaskedTensor([1, 2, 3], None)\n",
+        "except TypeError as e:\n",
+        "  print(f\"Got expected TypeError: {e}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "FwQUI3X02s20"
+      },
+      "source": [
+        "The default value for a field can be specified by setting its value at the class level:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "GbzDT9fz20JA"
+      },
+      "outputs": [],
+      "source": [
+        "class Pencil(tf.experimental.ExtensionType):\n",
+        "  color: str = \"black\"\n",
+        "  has_erasor: bool = True\n",
+        "  length: tf.Tensor = 1.0\n",
+        "\n",
+        "Pencil()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "nOW7lS9P4Foc"
+      },
+      "outputs": [],
+      "source": [
+        "Pencil(length=0.5, color=\"blue\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "S5Eivtg07Aau"
+      },
+      "source": [
+        "### Printable representation\n",
+        "\n",
+        "`ExtensionType` adds a default printable representation method (`__repr__`) that includes the class name and the value for each field:\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "5SyiKTe55krG"
+      },
+      "outputs": [],
+      "source": [
+        "print(MaskedTensor(values=[1, 2, 3], mask=[True, True, False]))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "q4l_gnQh6nXR"
+      },
+      "source": [
+        "### Equality operators\n",
+        "\n",
+        "`ExtensionType` adds default equality operators (`__eq__` and `__ne__`) that consider two values equal if they have the same type and all their fields are equal. Tensor fields are considered equal if they have the same shape and are elementwise equal for all elements."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "bHdLg13V52Xm"
+      },
+      "outputs": [],
+      "source": [
+        "a = MaskedTensor([1, 2], [True, False])\n",
+        "b = MaskedTensor([[3, 4], [5, 6]], [[False, True], [True, True]])\n",
+        "print(f\"a == a: {a==a}\")\n",
+        "print(f\"a == b: {a==b}\")\n",
+        "print(f\"a == a.values: {a==a.values}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "O3HqsO3jZlQq"
+      },
+      "source": [
+        "**Note:** if any field contains a `Tensor`, then `__eq__` may return a scalar boolean `Tensor` (rather than a Python boolean value)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hCpBfkKqCuip"
+      },
+      "source": [
+        "### Validation method\n",
+        "\n",
+        "`ExtensionType` adds a `__validate__` method, which can be overridden to perform validation checks on fields. It is run after the constructor is called, and after fields have been type-checked and converted to their declared types, so it can assume that all fields have their declared types.\n",
+        "\n",
+        "The following example updates `MaskedTensor` to validate the `shape`s and `dtype`s of its fields:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dgZOJRINDn00"
+      },
+      "outputs": [],
+      "source": [
+        "class MaskedTensor(tf.experimental.ExtensionType):\n",
+        "  \"\"\"A tensor paired with a boolean mask, indicating which values are valid.\"\"\"\n",
+        "  values: tf.Tensor\n",
+        "  mask: tf.Tensor\n",
+        "  def __validate__(self):\n",
+        "    self.values.shape.assert_is_compatible_with(self.mask.shape)\n",
+        "    assert self.mask.dtype.is_bool, 'mask.dtype must be bool'"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ajSgkGUUn9WL"
+      },
+      "outputs": [],
+      "source": [
+        "try:\n",
+        "  MaskedTensor([1, 2, 3], [0, 1, 0])  # Wrong `dtype` for mask.\n",
+        "except AssertionError as e:\n",
+        "  print(f\"Got expected AssertionError: {e}\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Fhb96luJn9K7"
+      },
+      "outputs": [],
+      "source": [
+        "try:\n",
+        "  MaskedTensor([1, 2, 3], [True, False])  # shapes don't match.\n",
+        "except ValueError as e:\n",
+        "  print(f\"Got expected ValueError: {e}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "pjIPAF1OCAdO"
+      },
+      "source": [
+        "### Enforced immutability\n",
+        "\n",
+        "`ExtensionType` overrides the `__setattr__` and `__delattr__` methods to prevent mutation, ensuring that extension type values are immutable."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "NgmJ1C7ilN5C"
+      },
+      "outputs": [],
+      "source": [
+        "mt = MaskedTensor([1, 2, 3], [True, False, True])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "cMYmJr3RoFKp"
+      },
+      "outputs": [],
+      "source": [
+        "try:\n",
+        "  mt.mask = [True, True, True]\n",
+        "except AttributeError as e:\n",
+        "  print(f\"Got expected AttributeError: {e}\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ZWwA-zWdzqlU"
+      },
+      "outputs": [],
+      "source": [
+        "try:\n",
+        "  mt.mask[0] = False\n",
+        "except TypeError as e:\n",
+        "  print(f\"Got expected TypeError: {e}\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "PN_txJVKoFoF"
+      },
+      "outputs": [],
+      "source": [
+        "try:\n",
+        "  del mt.mask\n",
+        "except AttributeError as e:\n",
+        "  print(f\"Got expected AttributeError: {e}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "FBVFtCYn69Ou"
+      },
+      "source": [
+        "### Nested TypeSpec\n",
+        "\n",
+        "Each `ExtensionType` class has a corresponding `TypeSpec` class, which is created automatically and stored as `<extension_type_name>.Spec`.\n",
+        "\n",
+        "This class captures all the information from a value *except* for the values of any nested tensors. In particular, the `TypeSpec` for a value is created by replacing any nested Tensor, ExtensionType, or CompositeTensor with its `TypeSpec`.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "GRjANkGYKGnV"
+      },
+      "outputs": [],
+      "source": [
+        "class Player(tf.experimental.ExtensionType):\n",
+        "  name: tf.Tensor\n",
+        "  attributes: Mapping[str, tf.Tensor]\n",
+        "\n",
+        "anne = Player(\"Anne\", {\"height\": 8.3, \"speed\": 28.1})\n",
+        "anne_spec = tf.type_spec_from_value(anne)\n",
+        "print(anne_spec.name)  # Records `dtype` and `shape`, but not the string value.\n",
+        "print(anne_spec.attributes)  # Records keys and TensorSpecs for values."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "I2fkgckxO564"
+      },
+      "source": [
+        "`TypeSpec` values can be constructed explicitly, or they can be built from an `ExtensionType` value using `tf.type_spec_from_value`:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "1ehAa7d9OGai"
+      },
+      "outputs": [],
+      "source": [
+        "spec1 = Player.Spec(name=tf.TensorSpec([], tf.float32), attributes={})\n",
+        "spec2 = tf.type_spec_from_value(anne)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "owcFG3cAMCwA"
+      },
+      "source": [
+        "`TypeSpec`s are used by TensorFlow to divide values into a **static component** and a **dynamic component**:\n",
+        "\n",
+        "* The **static component** (which is fixed at graph-construction time) is encoded with a `tf.TypeSpec`.\n",
+        "* The **dynamic component** (which can vary each time the graph is run) is encoded as a list of `tf.Tensor`s.\n",
+        "\n",
+        "For example, `tf.function` retraces its wrapped function whenever an argument has a previously unseen `TypeSpec`:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "pg-m5YLRM1Nd"
+      },
+      "outputs": [],
+      "source": [
+        "@tf.function\n",
+        "def anonymize_player(player):\n",
+        "  print(\"<<TRACING>>\")\n",
+        "  return Player(\"<anonymous>\", player.attributes)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "0CCGm7cpeIq-"
+      },
+      "outputs": [],
+      "source": [
+        "# Function gets traced (first time the function has been called):\n",
+        "anonymize_player(Player(\"Anne\", {\"height\": 8.3, \"speed\": 28.1}))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "WB7bt7s83mFE"
+      },
+      "outputs": [],
+      "source": [
+        "# Function does NOT get traced (same TypeSpec: just tensor values changed)\n",
+        "anonymize_player(Player(\"Bart\", {\"height\": 8.1, \"speed\": 25.3}))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dNm7vLpR3nMH"
+      },
+      "outputs": [],
+      "source": [
+        "# Function gets traced (new TypeSpec: keys for attributes changed):\n",
+        "anonymize_player(Player(\"Chuck\", {\"height\": 11.0, \"jump\": 5.3}))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "U5rN1HPq25xC"
+      },
+      "source": [
+        "For more information, see the [tf.function Guide](https://www.tensorflow.org/guide/function#rules_of_tracing)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gX613uRk0qLz"
+      },
+      "source": [
+        "## Customizing `ExtensionType`s\n",
+        "\n",
+        "In addition to simply declaring fields and their types, extension types may:\n",
+        "\n",
+        "* Override the default printable representation (`__repr__`).\n",
+        "* Define methods.\n",
+        "* Define `classmethod`s and `staticmethod`s.\n",
+        "* Define properties.\n",
+        "* Override the default constructor (`__init__`).\n",
+        "* Override the default equality operator (`__eq__`).\n",
+        "* Define operators (such as `__add__` and `__lt__`).\n",
+        "* Declare default values for fields.\n",
+        "* Define subclasses.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MK-ePVDj-ROE"
+      },
+      "source": [
+        "### Overriding the default printable representation\n",
+        "\n",
+        "You can override this default string conversion operator for extension types. The following example updates the `MaskedTensor` class to generate a more readable string representation when values are printed in Eager mode."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "gdPhjYEr8IGO"
+      },
+      "outputs": [],
+      "source": [
+        "class MaskedTensor(tf.experimental.ExtensionType):\n",
+        "  \"\"\"A tensor paired with a boolean mask, indicating which values are valid.\"\"\"\n",
+        "  values: tf.Tensor\n",
+        "  mask: tf.Tensor       # shape=values.shape; false for invalid values.\n",
+        "\n",
+        "  def __repr__(self):\n",
+        "    return masked_tensor_str(self.values, self.mask)\n",
+        "\n",
+        "def masked_tensor_str(values, mask):\n",
+        "  if isinstance(values, tf.Tensor):\n",
+        "    if hasattr(values, 'numpy') and hasattr(mask, 'numpy'):\n",
+        "      return f'<MaskedTensor {masked_tensor_str(values.numpy(), mask.numpy())}>'\n",
+        "    else:\n",
+        "      return f'MaskedTensor(values={values}, mask={mask})'\n",
+        "  if len(values.shape) == 1:\n",
+        "    items = [repr(v) if m else '_' for (v, m) in zip(values, mask)]\n",
+        "  else:\n",
+        "    items = [masked_tensor_str(v, m) for (v, m) in zip(values, mask)]\n",
+        "  return '[%s]' % ', '.join(items)\n",
+        "\n",
+        "mt = MaskedTensor(values=[[1, 2, 3], [4, 5, 6]],\n",
+        "                  mask=[[True, True, False], [True, False, True]])\n",
+        "print(mt)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_MLQU2_v8VjG"
+      },
+      "source": [
+        "### Defining methods\n",
+        "\n",
+        "Extension types may define methods, just like any normal Python class. For example, the `MaskedTensor` type could define a `with_default` method that returns a copy of `self` with masked values replaced by a given `default` value. Methods may optionally be annotated with the `@tf.function` decorator."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "7RR-tqee8ZdP"
+      },
+      "outputs": [],
+      "source": [
+        "class MaskedTensor(tf.experimental.ExtensionType):\n",
+        "  values: tf.Tensor\n",
+        "  mask: tf.Tensor\n",
+        "\n",
+        "  def with_default(self, default):\n",
+        "    return tf.where(self.mask, self.values, default)\n",
+        "\n",
+        "MaskedTensor([1, 2, 3], [True, False, True]).with_default(0)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Qwd_gGKp9RP0"
+      },
+      "source": [
+        "### Defining `classmethod`s and `staticmethod`s\n",
+        "\n",
+        "Extension types may define methods using the `@classmethod` and `@staticmethod` decorators. For example, the `MaskedTensor` type could define a factory method that masks any element with a given value:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "BacCEJYU9sBR"
+      },
+      "outputs": [],
+      "source": [
+        "class MaskedTensor(tf.experimental.ExtensionType):\n",
+        "  values: tf.Tensor\n",
+        "  mask: tf.Tensor\n",
+        "\n",
+        "  def __repr__(self):\n",
+        "    return masked_tensor_str(self.values, self.mask)\n",
+        "\n",
+        "  @staticmethod\n",
+        "  def from_tensor_and_value_to_mask(values, value_to_mask):\n",
+        "    return MaskedTensor(values, values != value_to_mask)\n",
+        "\n",
+        "x = tf.constant([[1, 0, 2], [3, 0, 0]])\n",
+        "MaskedTensor.from_tensor_and_value_to_mask(x, 0)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "xIPf9PZX9AwL"
+      },
+      "source": [
+        "### Defining properties\n",
+        "Extension types may define properties using the `@property` decorator, just like any normal Python class. For example, the `MaskedTensor` type could define a `dtype` property that's a shorthand for the `dtype` of the values:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "16E68wZ-9KXp"
+      },
+      "outputs": [],
+      "source": [
+        "class MaskedTensor(tf.experimental.ExtensionType):\n",
+        "  values: tf.Tensor\n",
+        "  mask: tf.Tensor\n",
+        "\n",
+        "  @property\n",
+        "  def dtype(self):\n",
+        "    return self.values.dtype\n",
+        "\n",
+        "MaskedTensor([1, 2, 3], [True, False, True]).dtype"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Mm5gxoG57nf3"
+      },
+      "source": [
+        "### Overriding the default constructor\n",
+        "\n",
+        "You can override the default constructor for extension types. Custom constructors must set a value for every declared field; and after the custom constructor returns, all fields will be type-checked, and values will be converted as described above."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "-8K3KeB08G1S"
+      },
+      "outputs": [],
+      "source": [
+        "class Toy(tf.experimental.ExtensionType):\n",
+        "  name: str\n",
+        "  price: tf.Tensor\n",
+        "  def __init__(self, name, price, discount=0):\n",
+        "    self.name = name\n",
+        "    self.price = price * (1 - discount)\n",
+        "\n",
+        "print(Toy(\"ball\", 5.0, discount=0.2))  # On sale -- 20% off!"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "qyQxMlwLFQt7"
+      },
+      "source": [
+        "Alternatively, you might consider leaving the default constructor as-is, but adding one or more factory methods. For example:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "jiApK4hzFY89"
+      },
+      "outputs": [],
+      "source": [
+        "class Toy(tf.experimental.ExtensionType):\n",
+        "  name: str\n",
+        "  price: tf.Tensor\n",
+        "\n",
+        "  @staticmethod\n",
+        "  def new_toy_with_discount(name, price, discount):\n",
+        "    return Toy(name, price * (1 - discount))\n",
+        "\n",
+        "print(Toy.new_toy_with_discount(\"ball\", 5.0, discount=0.2))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "pdVcRBhG-Uee"
+      },
+      "source": [
+        "### Overriding the default equality operator (`__eq__`)\n",
+        "\n",
+        "You can override the default `__eq__` operator for extension types. The following example updates `MaskedTensor` to ignore masked elements when comparing for equality."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dA7DyjfB-Yz0"
+      },
+      "outputs": [],
+      "source": [
+        "class MaskedTensor(tf.experimental.ExtensionType):\n",
+        "  values: tf.Tensor\n",
+        "  mask: tf.Tensor\n",
+        "\n",
+        "  def __repr__(self):\n",
+        "    return masked_tensor_str(self.values, self.mask)\n",
+        "\n",
+        "  def __eq__(self, other):\n",
+        "    result = tf.math.equal(self.values, other.values)\n",
+        "    result = result | ~(self.mask & other.mask)\n",
+        "    return tf.reduce_all(result)\n",
+        "\n",
+        "x = MaskedTensor([1, 2, 3, 4], [True, True, False, True])\n",
+        "y = MaskedTensor([5, 2, 0, 4], [False, True, False, True])\n",
+        "print(x == y)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "n1mZ1Lkyi14B"
+      },
+      "source": [
+        "**Note:** You generally don't need to override `__ne__`, since its default implementation simply calls `__eq__` and negates the result."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "A_Jib1SQD1-z"
+      },
+      "source": [
+        "### Using forward references\n",
+        "\n",
+        "If the type for a field has not been defined yet, you may use a string containing the name of the type instead. In the following example, the string `\"Node\"` is used to annotate the `children` field because the `Node` type hasn't been (fully) defined yet.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "_Z029QKED0Ao"
+      },
+      "outputs": [],
+      "source": [
+        "class Node(tf.experimental.ExtensionType):\n",
+        "  value: tf.Tensor\n",
+        "  children: Tuple[\"Node\", ...] = ()\n",
+        "\n",
+        "Node(3, [Node(5), Node(2)])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "boaNg1zHgoVn"
+      },
+      "source": [
+        "### Defining subclasses\n",
+        "\n",
+        "Extension types may be subclassed using the standard Python syntax. Extension type subclasses may add new fields, methods, and properties; and may override the constructor, the printable representation, and the equality operator. The following example defines a basic `TensorGraph` class that uses three `Tensor` fields to encode a set of edges between nodes. It then defines a subclass that adds a `Tensor` field to record a \"feature value\" for each node. The subclass also defines a method to propagate the feature values along the edges."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "58r6qRiK-uZh"
+      },
+      "outputs": [],
+      "source": [
+        "class TensorGraph(tf.experimental.ExtensionType):\n",
+        "  num_nodes: tf.Tensor\n",
+        "  edge_src: tf.Tensor   # edge_src[e] = index of src node for edge e.\n",
+        "  edge_dst: tf.Tensor   # edge_dst[e] = index of dst node for edge e.\n",
+        "\n",
+        "class TensorGraphWithNodeFeature(TensorGraph):\n",
+        "  node_features: tf.Tensor  # node_features[n] = feature value for node n.\n",
+        "\n",
+        "  def propagate_features(self, weight=1.0) -> 'TensorGraphWithNodeFeature':\n",
+        "    updates = tf.gather(self.node_features, self.edge_src) * weight\n",
+        "    new_node_features = tf.tensor_scatter_nd_add(\n",
+        "        self.node_features, tf.expand_dims(self.edge_dst, 1), updates)\n",
+        "    return TensorGraphWithNodeFeature(\n",
+        "        self.num_nodes, self.edge_src, self.edge_dst, new_node_features)\n",
+        "\n",
+        "g = TensorGraphWithNodeFeature(  # Edges: 0->1, 4->3, 2->2, 2->1\n",
+        "    num_nodes=5, edge_src=[0, 4, 2, 2], edge_dst=[1, 3, 2, 1],\n",
+        "    node_features=[10.0, 0.0, 2.0, 5.0, -1.0, 0.0])\n",
+        "\n",
+        "print(\"Original features:\", g.node_features)\n",
+        "print(\"After propagating:\", g.propagate_features().node_features)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "U_oElT5HzqSG"
+      },
+      "source": [
+        "### Defining private fields\n",
+        "\n",
+        "An extension type's fields may be marked private by prefixing them with an underscore (following standard Python conventions). This does not impact the way that TensorFlow treats the fields in any way; but simply serves as a signal to any users of the extension type that those fields are private.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "oMdH7ORqh8Pl"
+      },
+      "source": [
+        "### Customizing the `ExtensionType`'s `TypeSpec`\n",
+        "\n",
+        "Each `ExtensionType` class has a corresponding `TypeSpec` class, which is created automatically and stored as `<extension_type_name>.Spec`. For more information, see the section \"Nested TypeSpec\" above.\n",
+        "\n",
+        "To customize the `TypeSpec`, simply define your own nested class named `Spec`, and `ExtensionType` will use that as the basis for the automatically constructed `TypeSpec`. You can customize the `Spec` class by:\n",
+        "\n",
+        "* Overriding the default printable representation.\n",
+        "* Overriding the default constructor.\n",
+        "* Defining methods, `classmethod`s, `staticmethod`s, and properties.\n",
+        "\n",
+        "The following example customizes the `MaskedTensor.Spec` class to make it easier to use:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Gm4RaqbkLlNG"
+      },
+      "outputs": [],
+      "source": [
+        "class MaskedTensor(tf.experimental.ExtensionType):\n",
+        "  values: tf.Tensor\n",
+        "  mask: tf.Tensor\n",
+        "\n",
+        "  shape = property(lambda self: self.values.shape)\n",
+        "  dtype = property(lambda self: self.values.dtype)\n",
+        "\n",
+        "  def __repr__(self):\n",
+        "    return masked_tensor_str(self.values, self.mask)\n",
+        "\n",
+        "  def with_values(self, new_values):\n",
+        "    return MaskedTensor(new_values, self.mask)\n",
+        "\n",
+        "  class Spec:\n",
+        "    def __init__(self, shape, dtype=tf.float32):\n",
+        "      self.values = tf.TensorSpec(shape, dtype)\n",
+        "      self.mask = tf.TensorSpec(shape, tf.bool)\n",
+        "\n",
+        "    def __repr__(self):\n",
+        "      return f\"MaskedTensor.Spec(shape={self.shape}, dtype={self.dtype})\"\n",
+        "\n",
+        "    shape = property(lambda self: self.values.shape)\n",
+        "    dtype = property(lambda self: self.values.dtype)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "s3zzUXPSNF72"
+      },
+      "source": [
+        "**Note**: The custom `Spec` class may not use any instance variables that were not declared in the original `ExtensionType`."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rip4GCuYPL7o"
+      },
+      "source": [
+        "## Tensor API dispatch\n",
+        "\n",
+        "Extension types can be \"tensor-like\", in the sense that they specialize or extend the interface defined by the `tf.Tensor` type. Examples of tensor-like extension types include `RaggedTensor`, `SparseTensor`, and `MaskedTensor`. ***Dispatch decorators*** can be used to override the default behavior of TensorFlow operations when applied to tensor-like extension types. TensorFlow currently defines three dispatch decorators:\n",
+        "\n",
+        "* `@tf.experimental.dispatch_for_api(tf_api)`\n",
+        "* `@tf.experimental.dispatch_for_unary_elementwise_apis(x_type)`\n",
+        "* `@tf.experimental.dispatch_for_binary_elementwise_apis(x_type, y_type)`"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5BTQHcY4gHwZ"
+      },
+      "source": [
+        "### Dispatch for a single API\n",
+        "\n",
+        "The `tf.experimental.dispatch_for_api` decorator overrides the default behavior of a specified TensorFlow operation when it is called with the specified signature. For example, you can use this decorator to specify how `tf.stack` should process `MaskedTensor` values:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "B4QgO_fUW2o2"
+      },
+      "outputs": [],
+      "source": [
+        "@tf.experimental.dispatch_for_api(tf.stack)\n",
+        "def masked_stack(values: List[MaskedTensor], axis = 0):\n",
+        "  return MaskedTensor(tf.stack([v.values for v in values], axis),\n",
+        "                      tf.stack([v.mask for v in values], axis))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "FxKcKWNUaLvm"
+      },
+      "source": [
+        "This overrides the default implementation for `tf.stack` whenever it is called with a list of `MaskedTensor` values (since the `values` argument is annotated with `typing.List[MaskedTensor]`):"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "RqpFjaAvaA19"
+      },
+      "outputs": [],
+      "source": [
+        "x = MaskedTensor([1, 2, 3], [True, True, False])\n",
+        "y = MaskedTensor([4, 5, 6], [False, True, True])\n",
+        "tf.stack([x, y])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "loGi8taCa265"
+      },
+      "source": [
+        "To allow `tf.stack` to handle lists of mixed `MaskedTensor` and `Tensor` values,  you can refine the type annotation for the `values` parameter and update the body of the function appropriately:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "_xySkm0ganAI"
+      },
+      "outputs": [],
+      "source": [
+        "tf.experimental.unregister_dispatch_for(masked_stack)\n",
+        "\n",
+        "def convert_to_masked_tensor(x):\n",
+        "  if isinstance(x, MaskedTensor):\n",
+        "    return x\n",
+        "  else:\n",
+        "    return MaskedTensor(x, tf.ones_like(x, tf.bool))\n",
+        "\n",
+        "@tf.experimental.dispatch_for_api(tf.stack)\n",
+        "def masked_stack_v2(values: List[Union[MaskedTensor, tf.Tensor]], axis = 0):\n",
+        "  values = [convert_to_masked_tensor(v) for v in values]\n",
+        "  return MaskedTensor(tf.stack([v.values for v in values], axis),\n",
+        "                      tf.stack([v.mask for v in values], axis))\n",
+        "x = MaskedTensor([1, 2, 3], [True, True, False])\n",
+        "y = tf.constant([4, 5, 6])\n",
+        "tf.stack([x, y, x])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ITioFCyjQm8V"
+      },
+      "source": [
+        "For a list of APIs that can be overridden, see the API documentation for `tf.experimental.dispatch_for_api`."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "f91SaHSqc-jO"
+      },
+      "source": [
+        "### Dispatch for all unary elementwise APIs\n",
+        "\n",
+        "The `tf.experimental.dispatch_for_unary_elementwise_apis` decorator overrides the default behavior of ***all*** unary elementwise ops (such as `tf.math.cos`) whenever the value for the first argument (typically named `x`) matches the type annotation `x_type`. The decorated function should take two arguments:\n",
+        "\n",
+        "* `api_func`: A function that takes a single parameter and performs the elementwise operation (for example, `tf.abs`).\n",
+        "* `x`: The first argument to the elementwise operation.\n",
+        "\n",
+        "The following example updates all unary elementwise operations to handle the `MaskedTensor` type:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "cv5fV4xxZI9q"
+      },
+      "outputs": [],
+      "source": [
+        " @tf.experimental.dispatch_for_unary_elementwise_apis(MaskedTensor)\n",
+        " def masked_tensor_unary_elementwise_api_handler(api_func, x):\n",
+        "   return MaskedTensor(api_func(x.values), x.mask)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "qiK4n6vaeFwo"
+      },
+      "source": [
+        "This function will now be used whenever a unary elementwise operation is called on a `MaskedTensor`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "SkH0xi5gd_41"
+      },
+      "outputs": [],
+      "source": [
+        " x = MaskedTensor([1, -2, -3], [True, False, True])\n",
+        " print(tf.abs(x))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2Ej5fxLBfaXW"
+      },
+      "outputs": [],
+      "source": [
+        "print(tf.ones_like(x, dtype=tf.float32))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Z9OgLyfEejqc"
+      },
+      "source": [
+        "### Dispatch for binary all elementwise APIs\n",
+        "\n",
+        "Similarly, `tf.experimental.dispatch_for_binary_elementwise_apis` can be used to update all binary elementwise operations to handle the `MaskedTensor` type:\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Z8Du-GPofpCW"
+      },
+      "outputs": [],
+      "source": [
+        "@tf.experimental.dispatch_for_binary_elementwise_apis(MaskedTensor, MaskedTensor)\n",
+        "def masked_tensor_binary_elementwise_api_handler(api_func, x, y):\n",
+        "  return MaskedTensor(api_func(x.values, y.values), x.mask & y.mask)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "gghVHDfSfyi2"
+      },
+      "outputs": [],
+      "source": [
+        "x = MaskedTensor([1, -2, -3], [True, False, True])\n",
+        "y = MaskedTensor([[4], [5]], [[True], [False]])\n",
+        "tf.math.add(x, y)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "txTGg9pzG0Ux"
+      },
+      "source": [
+        "For a list of the elementwise APIs that are overridden, go to the API documentation for `tf.experimental.dispatch_for_unary_elementwise_apis` and `tf.experimental.dispatch_for_binary_elementwise_apis`."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "UseRtohYKiE5"
+      },
+      "source": [
+        "## Batchable `ExtensionType`s\n",
+        "\n",
+        "An `ExtensionType` is *batchable* if a single instance can be used to represent a batch of values. Typically, this is accomplished by adding batch dimensions to all nested `Tensor`s. The following TensorFlow APIs require that any extension type inputs be batchable:\n",
+        "\n",
+        "* `tf.data.Dataset` (`batch`, `unbatch`, `from_tensor_slices`)\n",
+        "* `tf.keras` (`fit`, `evaluate`, `predict`)\n",
+        "* `tf.map_fn`"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hWPauKGj_yRz"
+      },
+      "source": [
+        "By default, `BatchableExtensionType` creates batched values by batching any nested `Tensor`s, `CompositeTensor`s, and `ExtensionType`s. If this is not appropriate for your class, then you will need to use `tf.experimental.ExtensionTypeBatchEncoder` to override this default behavior. For example, it would not be appropriate to create a batch of `tf.SparseTensor` values by simply stacking individual sparse tensors' `values`, `indices`, and `dense_shape` fields -- in most cases, you can't stack these tensors, since they have incompatible shapes; and even if you could, the result would not be a valid `SparseTensor`.\n",
+        "\n",
+        "\n",
+        "**Note**: `BatchableExtensionType`s do *not* automatically define dispatchers for `tf.stack`, `tf.concat`, `tf.slice`, etc. If your class needs to be supported by these APIs, then use the dispatch decorators described above."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "xkOJ8ke8GH7s"
+      },
+      "source": [
+        "### `BatchableExtensionType` example: `Network`\n",
+        "As an example, consider a simple `Network` class used for load balancing, which tracks how much work is left to do at each node, and how much bandwidth is available to move work between nodes:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "tOeEXwCcfrPd"
+      },
+      "outputs": [],
+      "source": [
+        "class Network(tf.experimental.ExtensionType):  # This version is not batchable.\n",
+        "  work: tf.Tensor       # work[n] = work left to do at node n\n",
+        "  bandwidth: tf.Tensor  # bandwidth[n1, n2] = bandwidth from n1->n2\n",
+        "\n",
+        "net1 = Network([5., 3, 8], [[0., 2, 0], [2, 0, 3], [0, 3, 0]])\n",
+        "net2 = Network([3., 4, 2], [[0., 2, 2], [2, 0, 2], [2, 2, 0]])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PaOzUev6g3wT"
+      },
+      "source": [
+        "To make this type batchable, change the base type to `BatchableExtensionType`, and adjust the shape of each field to include optional batch dimensions. The following example also adds a `shape` field to keep track of the batch shape. This `shape` field is not required by `tf.data.Dataset` or `tf.map_fn`, but it *is* required by `tf.keras`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "T03WWBSMg2XC"
+      },
+      "outputs": [],
+      "source": [
+        "class Network(tf.experimental.BatchableExtensionType):\n",
+        "  shape: tf.TensorShape  # batch shape. A single network has shape=[].\n",
+        "  work: tf.Tensor        # work[*shape, n] = work left to do at node n\n",
+        "  bandwidth: tf.Tensor   # bandwidth[*shape, n1, n2] = bandwidth from n1->n2\n",
+        "\n",
+        "  def __init__(self, work, bandwidth):\n",
+        "    self.work = tf.convert_to_tensor(work)\n",
+        "    self.bandwidth = tf.convert_to_tensor(bandwidth)\n",
+        "    work_batch_shape = self.work.shape[:-1]\n",
+        "    bandwidth_batch_shape = self.bandwidth.shape[:-2]\n",
+        "    self.shape = work_batch_shape.merge_with(bandwidth_batch_shape)\n",
+        "\n",
+        "  def __repr__(self):\n",
+        "    return network_repr(self)\n",
+        "\n",
+        "def network_repr(network):\n",
+        "  work = network.work\n",
+        "  bandwidth = network.bandwidth\n",
+        "  if hasattr(work, 'numpy'):\n",
+        "    work = ' '.join(str(work.numpy()).split())\n",
+        "  if hasattr(bandwidth, 'numpy'):\n",
+        "    bandwidth = ' '.join(str(bandwidth.numpy()).split())\n",
+        "  return (f\"<Network shape={network.shape} work={work} bandwidth={bandwidth}>\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "NUUJe9HuIPel"
+      },
+      "outputs": [],
+      "source": [
+        "net1 = Network([5., 3, 8], [[0., 2, 0], [2, 0, 3], [0, 3, 0]])\n",
+        "net2 = Network([3., 4, 2], [[0., 2, 2], [2, 0, 2], [2, 2, 0]])\n",
+        "batch_of_networks = Network(\n",
+        "    work=tf.stack([net1.work, net2.work]),\n",
+        "    bandwidth=tf.stack([net1.bandwidth, net2.bandwidth]))\n",
+        "print(f\"net1={net1}\")\n",
+        "print(f\"net2={net2}\")\n",
+        "print(f\"batch={batch_of_networks}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "r0qWur5JGc3d"
+      },
+      "source": [
+        "You can then use `tf.data.Dataset` to iterate through a batch of networks:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "BN_kixAUFZtv"
+      },
+      "outputs": [],
+      "source": [
+        "dataset = tf.data.Dataset.from_tensor_slices(batch_of_networks)\n",
+        "for i, network in enumerate(dataset):\n",
+        "  print(f\"Batch element {i}: {network}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "aXENhTzIIjbM"
+      },
+      "source": [
+        "And you can also use `map_fn` to apply a function to each batch element:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "j1XEsSWj9a3D"
+      },
+      "outputs": [],
+      "source": [
+        "def balance_work_greedy(network):\n",
+        "  delta = (tf.expand_dims(network.work, -1) - tf.expand_dims(network.work, -2))\n",
+        "  delta /= 4\n",
+        "  delta = tf.maximum(tf.minimum(delta, network.bandwidth), -network.bandwidth)\n",
+        "  new_work = network.work + tf.reduce_sum(delta, -1)\n",
+        "  return Network(new_work, network.bandwidth)\n",
+        "\n",
+        "tf.map_fn(balance_work_greedy, batch_of_networks)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "f_HLsTT02Xul"
+      },
+      "source": [
+        "## TensorFlow APIs that support `ExtensionType`s"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "NNiQad2U2alT"
+      },
+      "source": [
+        "### @tf.function\n",
+        "\n",
+        "[`tf.function`](https://www.tensorflow.org/guide/function) is a decorator that precomputes TensorFlow graphs for Python functions, which can substantially improve the performance of your TensorFlow code. Extension type values can be used transparently with `@tf.function`-decorated functions."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "jQ_rAvrA6qEb"
+      },
+      "outputs": [],
+      "source": [
+        "class Pastry(tf.experimental.ExtensionType):\n",
+        "  sweetness: tf.Tensor  # 2d embedding that encodes sweetness\n",
+        "  chewiness: tf.Tensor  # 2d embedding that encodes chewiness\n",
+        "\n",
+        "@tf.function\n",
+        "def combine_pastry_features(x: Pastry):\n",
+        "  return (x.sweetness + x.chewiness) / 2\n",
+        "\n",
+        "cookie = Pastry(sweetness=[1.2, 0.4], chewiness=[0.8, 0.2])\n",
+        "combine_pastry_features(cookie)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "u1P-0Udg71Vx"
+      },
+      "source": [
+        "If you wish to explicitly specify the `input_signature` for  `tf.function`, then you can do so using the extension type's `TypeSpec`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "0df90E4x78d7"
+      },
+      "outputs": [],
+      "source": [
+        "pastry_spec = Pastry.Spec(tf.TensorSpec([2]), tf.TensorSpec(2))\n",
+        "\n",
+        "@tf.function(input_signature=[pastry_spec])\n",
+        "def increase_sweetness(x: Pastry, delta=1.0):\n",
+        "  return Pastry(x.sweetness + delta, x.chewiness)\n",
+        "\n",
+        "increase_sweetness(cookie)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "CdTfc5nD9JpD"
+      },
+      "source": [
+        "#### Concrete functions\n",
+        "Concrete functions encapsulate individual traced graphs that are built by `tf.function`. Extension types can be used transparently with concrete functions.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "FyHBBQWk9xz2"
+      },
+      "outputs": [],
+      "source": [
+        "cf = combine_pastry_features.get_concrete_function(pastry_spec)\n",
+        "cf(cookie)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "LYas8gtG5IMA"
+      },
+      "source": [
+        "### Control flow operations\n",
+        "\n",
+        "Extension types are supported by TensorFlow's control-flow operations:\n",
+        "\n",
+        "* `tf.cond`\n",
+        "* `tf.case`\n",
+        "* `tf.while_loop`\n",
+        "* `tf.identity`\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "6G2XE9ZtJu8z"
+      },
+      "outputs": [],
+      "source": [
+        "# Example: using tf.cond to select between two MaskedTensors. Note that the\n",
+        "# two MaskedTensors don't need to have the same shape.\n",
+        "a = MaskedTensor([1., 2, 3], [True, False, True])\n",
+        "b = MaskedTensor([22., 33, 108, 55], [True, True, True, False])\n",
+        "condition = tf.constant(True)\n",
+        "print(tf.cond(condition, lambda: a, lambda: b))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2NwLOw1kKSek"
+      },
+      "outputs": [],
+      "source": [
+        "# Example: using tf.while_loop with MaskedTensor.\n",
+        "cond = lambda i, _: i < 10\n",
+        "def body(i, mt):\n",
+        "  return i + 1, mt.with_values(mt.values + 3 / 7)\n",
+        "print(tf.while_loop(cond, body, [0, b])[1])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "zkN7IuWVMRzn"
+      },
+      "source": [
+        "### Autograph control flow\n",
+        "\n",
+        "Extension types are also supported by control flow statements in `tf.function` (using autograph). In the following example, the `if` statement and `for` statements are automatically converted to `tf.cond` and `tf.while_loop` operations, which support extension types."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "4RFySEl8gZ8w"
+      },
+      "outputs": [],
+      "source": [
+        "@tf.function\n",
+        "def fn(x, b):\n",
+        "  if b:\n",
+        "    x = MaskedTensor(x, tf.less(x, 0))\n",
+        "  else:\n",
+        "    x = MaskedTensor(x, tf.greater(x, 0))\n",
+        "  for i in tf.range(5 if b else 7):\n",
+        "    x = x.with_values(x.values + 1 / 2)\n",
+        "  return x\n",
+        "\n",
+        "print(fn(tf.constant([1., -2, 3]), tf.constant(True)))\n",
+        "print(fn(tf.constant([1., -2, 3]), tf.constant(False)))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-FjZt2ohfja4"
+      },
+      "source": [
+        "### Keras\n",
+        "\n",
+        "[tf.keras](https://www.tensorflow.org/guide/keras) is TensorFlow's high-level API for building and training deep learning models. Extension types may be passed as inputs to a Keras model, passed between Keras layers, and returned by Keras models. Keras currently puts two requirements on extension types:\n",
+        "\n",
+        "* They must be batchable (go to \"Batchable `ExtensionType`s\" above).\n",
+        "* They must have a field or property named `shape`. `shape[0]` is assumed to be the batch dimension.\n",
+        "\n",
+        "The following two subsections give examples showing how extension types can be used with Keras.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "QH1TXQYiGv8u"
+      },
+      "source": [
+        "#### Keras example: `Network`\n",
+        "\n",
+        "For the first example, consider the `Network` class defined in the \"Batchable `ExtensionType`s\" section above, which can be used for load balancing work between nodes. Its definition is repeated here:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "zHj1RIS2PK50"
+      },
+      "outputs": [],
+      "source": [
+        "class Network(tf.experimental.BatchableExtensionType):\n",
+        "  shape: tf.TensorShape  # batch shape. A single network has shape=[].\n",
+        "  work: tf.Tensor        # work[*shape, n] = work left to do at node n\n",
+        "  bandwidth: tf.Tensor   # bandwidth[*shape, n1, n2] = bandwidth from n1->n2\n",
+        "\n",
+        "  def __init__(self, work, bandwidth):\n",
+        "    self.work = tf.convert_to_tensor(work)\n",
+        "    self.bandwidth = tf.convert_to_tensor(bandwidth)\n",
+        "    work_batch_shape = self.work.shape[:-1]\n",
+        "    bandwidth_batch_shape = self.bandwidth.shape[:-2]\n",
+        "    self.shape = work_batch_shape.merge_with(bandwidth_batch_shape)\n",
+        "\n",
+        "  def __repr__(self):\n",
+        "    return network_repr(self)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "w9LPTEVJD0FD"
+      },
+      "outputs": [],
+      "source": [
+        "single_network = Network(  # A single network with 4 nodes.\n",
+        "    work=[8.0, 5, 12, 2],\n",
+        "    bandwidth=[[0.0, 1, 2, 2], [1, 0, 0, 2], [2, 0, 0, 1], [2, 2, 1, 0]])\n",
+        "\n",
+        "batch_of_networks = Network(  # Batch of 2 networks, each w/ 2 nodes.\n",
+        "    work=[[8.0, 5], [3, 2]],\n",
+        "    bandwidth=[[[0.0, 1], [1, 0]], [[0, 2], [2, 0]]])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "IUfWi3SDD0dj"
+      },
+      "source": [
+        "You can define a new Keras layer that processes `Network`s."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2WSYt58r4SF1"
+      },
+      "outputs": [],
+      "source": [
+        "class BalanceNetworkLayer(tf.keras.layers.Layer):\n",
+        "  \"\"\"Layer that balances work between nodes in a network.\n",
+        "\n",
+        "  Shifts work from more busy nodes to less busy nodes, constrained by bandwidth.\n",
+        "  \"\"\"\n",
+        "  def call(self, inputs):\n",
+        "    # This function is defined above in the \"Batchable `ExtensionType`s\" section.\n",
+        "    return balance_work_greedy(inputs)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "VWwFJNb1E03q"
+      },
+      "source": [
+        "You can then use these layers to create a simple model. To feed an `ExtensionType` into a model, you can use a `tf.keras.layer.Input` layer with `type_spec` set to the extension type's `TypeSpec`. If the Keras model will be used to process batches, then the `type_spec` must include the batch dimension."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "plTyqISRExA4"
+      },
+      "outputs": [],
+      "source": [
+        "input_spec = Network.Spec(shape=None,\n",
+        "                          work=tf.TensorSpec(None, tf.float32),\n",
+        "                          bandwidth=tf.TensorSpec(None, tf.float32))\n",
+        "model = tf.keras.Sequential([\n",
+        "    tf.keras.layers.Input(type_spec=input_spec),\n",
+        "    BalanceNetworkLayer(),\n",
+        "    ])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hyeAbt1WFIiO"
+      },
+      "source": [
+        "Finally, you can apply the model to a single network and to a batch of networks."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "hH1EtA5lFHdN"
+      },
+      "outputs": [],
+      "source": [
+        "model(single_network)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "V7eM67M7FYYM"
+      },
+      "outputs": [],
+      "source": [
+        "model(batch_of_networks)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tOxtt9Z1HDCv"
+      },
+      "source": [
+        "#### Keras example: MaskedTensor\n",
+        "\n",
+        "In this example, `MaskedTensor` is extended to support `Keras`. `shape` is defined as a property that is calculated from the `values` field. Keras requires that you add this property to both the extension type and its `TypeSpec`. `MaskedTensor` also defines a `__name__` variable, which will be required for `SavedModel` serialization (below)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "1JBZ_t48Ht7e"
+      },
+      "outputs": [],
+      "source": [
+        "class MaskedTensor(tf.experimental.BatchableExtensionType):\n",
+        "  # __name__ is required for serialization in SavedModel; see below for details.\n",
+        "  __name__ = 'extension_type_colab.MaskedTensor'\n",
+        "\n",
+        "  values: tf.Tensor\n",
+        "  mask: tf.Tensor\n",
+        "\n",
+        "  shape = property(lambda self: self.values.shape)\n",
+        "  dtype = property(lambda self: self.values.dtype)\n",
+        "\n",
+        "  def with_default(self, default):\n",
+        "    return tf.where(self.mask, self.values, default)\n",
+        "\n",
+        "  def __repr__(self):\n",
+        "    return masked_tensor_str(self.values, self.mask)\n",
+        "\n",
+        "  class Spec:\n",
+        "    def __init__(self, shape, dtype=tf.float32):\n",
+        "      self.values = tf.TensorSpec(shape, dtype)\n",
+        "      self.mask = tf.TensorSpec(shape, tf.bool)\n",
+        "\n",
+        "    shape = property(lambda self: self.values.shape)\n",
+        "    dtype = property(lambda self: self.values.dtype)\n",
+        "\n",
+        "    def with_shape(self):\n",
+        "      return MaskedTensor.Spec(tf.TensorSpec(shape, self.values.dtype),\n",
+        "                               tf.TensorSpec(shape, self.mask.dtype))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "oer8BVc8H7_V"
+      },
+      "source": [
+        "Next, the dispatch decorators are used to override the default behavior of several TensorFlow APIs. Since these APIs are used by standard Keras layers (such as the `Dense` layer), overriding these will allow us to use those layers with `MaskedTensor`. For the purposes of this example, `matmul` for masked tensors is defined to treat the masked values as zeros (that is, to not include them in the product)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "xy0dhQ_b-ca_"
+      },
+      "outputs": [],
+      "source": [
+        "@tf.experimental.dispatch_for_unary_elementwise_apis(MaskedTensor)\n",
+        "def unary_elementwise_op_handler(op, x):\n",
+        " return MaskedTensor(op(x.values), x.mask)\n",
+        "\n",
+        "@tf.experimental.dispatch_for_binary_elementwise_apis(\n",
+        "    Union[MaskedTensor, tf.Tensor],\n",
+        "    Union[MaskedTensor, tf.Tensor])\n",
+        "def binary_elementwise_op_handler(op, x, y):\n",
+        "  x = convert_to_masked_tensor(x)\n",
+        "  y = convert_to_masked_tensor(y)\n",
+        "  return MaskedTensor(op(x.values, y.values), x.mask & y.mask)\n",
+        "\n",
+        "@tf.experimental.dispatch_for_api(tf.matmul)\n",
+        "def masked_matmul(a: MaskedTensor, b,\n",
+        "                  transpose_a=False, transpose_b=False,\n",
+        "                  adjoint_a=False, adjoint_b=False,\n",
+        "                  a_is_sparse=False, b_is_sparse=False,\n",
+        "                  output_type=None,\n",
+        "                  grad_a=False, grad_b=False,\n",
+        "                  name=None,\n",
+        "                  ):\n",
+        "  if isinstance(a, MaskedTensor):\n",
+        "    a = a.with_default(0)\n",
+        "  if isinstance(b, MaskedTensor):\n",
+        "    b = b.with_default(0)\n",
+        "  return tf.matmul(a, b, transpose_a, transpose_b, adjoint_a,\n",
+        "                   adjoint_b, a_is_sparse, b_is_sparse,\n",
+        "                   output_type)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "osJ_L-fKJusI"
+      },
+      "source": [
+        "You can then construct a Keras model that accepts `MaskedTensor` inputs, using standard Keras layers:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "IS6JCVbk1rd0"
+      },
+      "outputs": [],
+      "source": [
+        "input_spec = MaskedTensor.Spec([None, 2], tf.float32)\n",
+        "\n",
+        "masked_tensor_model = tf.keras.Sequential([\n",
+        "    tf.keras.layers.Input(type_spec=input_spec),\n",
+        "    tf.keras.layers.Dense(16, activation=\"relu\"),\n",
+        "    tf.keras.layers.Dense(1)])\n",
+        "masked_tensor_model.compile(loss='binary_crossentropy', optimizer='rmsprop')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "SB1WUSzn1RPj"
+      },
+      "outputs": [],
+      "source": [
+        "a = MaskedTensor([[1., 2], [3, 4], [5, 6]],\n",
+        "                  [[True, False], [False, True], [True, True]])\n",
+        "masked_tensor_model.fit(a, tf.constant([[1], [0], [1]]), epochs=3)\n",
+        "print(masked_tensor_model(a))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "msmd9XcL2bqb"
+      },
+      "source": [
+        "### SavedModel\n",
+        "\n",
+        "A [SavedModel](https://www.tensorflow.org/guide/saved_model) is a serialized TensorFlow program, including both weights and computation. It can be built from a Keras model or from a custom model. In either case, extension types can be used transparently with the functions and methods defined by a SavedModel.\n",
+        "\n",
+        "SavedModel can save models, layers, and functions that process extension types, as long as the extension types have a `__name__` field. This name is used to register the extension type, so it can be located when the model is loaded."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PEtbFrz6-Vku"
+      },
+      "source": [
+        "#### Example: saving a Keras model\n",
+        "\n",
+        "Keras models that use extension types may be saved using `SavedModel`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ecxQMnybSzV6"
+      },
+      "outputs": [],
+      "source": [
+        "masked_tensor_model_path = tempfile.mkdtemp()\n",
+        "tf.saved_model.save(masked_tensor_model, masked_tensor_model_path)\n",
+        "imported_model = tf.saved_model.load(masked_tensor_model_path)\n",
+        "imported_model(a)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ne2nu3r6-XMr"
+      },
+      "source": [
+        "#### Example: saving a custom model\n",
+        "\n",
+        "SavedModel can also be used to save custom `tf.Module` subclasses with functions that process extension types."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2V6hV3yOT2vz"
+      },
+      "outputs": [],
+      "source": [
+        "class CustomModule(tf.Module):\n",
+        "  def __init__(self, variable_value):\n",
+        "    super().__init__()\n",
+        "    self.v = tf.Variable(variable_value)\n",
+        "\n",
+        "  @tf.function\n",
+        "  def grow(self, x: MaskedTensor):\n",
+        "    \"\"\"Increase values in `x` by multiplying them by `self.v`.\"\"\"\n",
+        "    return MaskedTensor(x.values * self.v, x.mask)\n",
+        "\n",
+        "module = CustomModule(100.0)\n",
+        "\n",
+        "module.grow.get_concrete_function(MaskedTensor.Spec(shape=None,\n",
+        "                                                    dtype=tf.float32))\n",
+        "custom_module_path = tempfile.mkdtemp()\n",
+        "tf.saved_model.save(module, custom_module_path)\n",
+        "imported_model = tf.saved_model.load(custom_module_path)\n",
+        "imported_model.grow(MaskedTensor([1., 2, 3], [False, True, False]))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "o6beljh576ee"
+      },
+      "source": [
+        "#### Loading a SavedModel when the `ExtensionType` is unavailable\n",
+        "\n",
+        "If you load a `SavedModel` that uses an `ExtensionType`, but that `ExtensionType` is not available (that is, it has not been imported), then you will get a warning and TensorFlow will fall back to using an \"anonymous extension type\" object. This object will have the same fields as the original type, but will lack any further customization you have added for the type, such as custom methods or properties."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ec9PcUkJ9bFK"
+      },
+      "source": [
+        "#### Using `ExtensionType`s with TensorFlow Serving\n",
+        "\n",
+        "Currently, [TensorFlow Serving](https://www.tensorflow.org/tfx/guide/serving) (and other consumers of the SavedModel \"signatures\" dictionary) require that all inputs and outputs be raw tensors. If you wish to use TensorFlow Serving with a model that uses extension types, then you can add wrapper methods that compose or decompose extension type values from tensors. For example:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "4VnzAwVo9tTc"
+      },
+      "outputs": [],
+      "source": [
+        "class CustomModuleWrapper(tf.Module):\n",
+        "  def __init__(self, variable_value):\n",
+        "    super().__init__()\n",
+        "    self.v = tf.Variable(variable_value)\n",
+        "\n",
+        "  @tf.function\n",
+        "  def var_weighted_mean(self, x: MaskedTensor):\n",
+        "    \"\"\"Mean value of unmasked values in x, weighted by self.v.\"\"\"\n",
+        "    x = MaskedTensor(x.values * self.v, x.mask)\n",
+        "    return (tf.reduce_sum(x.with_default(0)) /\n",
+        "            tf.reduce_sum(tf.cast(x.mask, x.dtype)))\n",
+        "\n",
+        "  @tf.function()\n",
+        "  def var_weighted_mean_wrapper(self, x_values, x_mask):\n",
+        "    \"\"\"Raw tensor wrapper for var_weighted_mean.\"\"\"\n",
+        "    return self.var_weighted_mean(MaskedTensor(x_values, x_mask))\n",
+        "\n",
+        "module = CustomModuleWrapper([3., 2., 8., 5.])\n",
+        "\n",
+        "module.var_weighted_mean_wrapper.get_concrete_function(\n",
+        "    tf.TensorSpec(None, tf.float32), tf.TensorSpec(None, tf.bool))\n",
+        "custom_module_path = tempfile.mkdtemp()\n",
+        "tf.saved_model.save(module, custom_module_path)\n",
+        "imported_model = tf.saved_model.load(custom_module_path)\n",
+        "x = MaskedTensor([1., 2., 3., 4.], [False, True, False, True])\n",
+        "imported_model.var_weighted_mean_wrapper(x.values, x.mask)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4dwBadWQ5G9_"
+      },
+      "source": [
+        "### `Dataset`s\n",
+        "\n",
+        "[`tf.data`](https://www.tensorflow.org/guide/data) is an API that enables you to build complex input pipelines from simple, reusable pieces. Its core data structure is `tf.data.Dataset`, which represents a sequence of elements, in which each element consists of one or more components."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GcIR19FuwRJV"
+      },
+      "source": [
+        "#### Building `Dataset`s with extension types\n",
+        "\n",
+        "Datasets can be built from extension type values using `Dataset.from_tensors`, `Dataset.from_tensor_slices`, or `Dataset.from_generator`:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Oe7fRCkzwdub"
+      },
+      "outputs": [],
+      "source": [
+        "ds = tf.data.Dataset.from_tensors(Pastry(5, 5))\n",
+        "iter(ds).next()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "fk9CD2fZx6yT"
+      },
+      "outputs": [],
+      "source": [
+        "mt = MaskedTensor(tf.reshape(range(20), [5, 4]), tf.ones([5, 4]))\n",
+        "ds = tf.data.Dataset.from_tensor_slices(mt)\n",
+        "for value in ds:\n",
+        "  print(value)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "DGw8y87awsOJ"
+      },
+      "outputs": [],
+      "source": [
+        "def value_gen():\n",
+        "  for i in range(2, 7):\n",
+        "    yield MaskedTensor(range(10), [j%i != 0 for j in range(10)])\n",
+        "\n",
+        "ds = tf.data.Dataset.from_generator(\n",
+        "    value_gen, output_signature=MaskedTensor.Spec(shape=[10], dtype=tf.int32))\n",
+        "for value in ds:\n",
+        "  print(value)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wfEm4NInyqtj"
+      },
+      "source": [
+        "#### Batching and unbatching `Dataset`s with extension types\n",
+        "\n",
+        "Datasets with extension types can be batchand and unbatched using `Dataset.batch` and `Dataset.unbatch`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "snoOUE1ay1rO"
+      },
+      "outputs": [],
+      "source": [
+        "batched_ds = ds.batch(2)\n",
+        "for value in batched_ds:\n",
+        "  print(value)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "f8PTky6EzBVY"
+      },
+      "outputs": [],
+      "source": [
+        "unbatched_ds = batched_ds.unbatch()\n",
+        "for value in unbatched_ds:\n",
+        "  print(value)"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "extension_type.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/guide/function.ipynb b/site/en/guide/function.ipynb
index 7a97c8337b0..f4677f21eb8 100644
--- a/site/en/guide/function.ipynb
+++ b/site/en/guide/function.ipynb
@@ -61,7 +61,7 @@
         "id": "J122XQYG7W6w"
       },
       "source": [
-        "In TensorFlow 2, [eager execution](eager.ipynb) is turned on by default. The user interface is intuitive and flexible (running one-off operations is much easier and faster), but this can come at the expense of performance and deployability.\n",
+        "In TensorFlow 2, [eager execution](basics.ipynb) is turned on by default. The user interface is intuitive and flexible (running one-off operations is much easier and faster), but this can come at the expense of performance and deployability.\n",
         "\n",
         "You can use `tf.function` to make graphs out of your programs. It is a transformation tool that creates Python-independent dataflow graphs out of your Python code. This will help you create performant and portable models, and it is required to use `SavedModel`.\n",
         "\n",
@@ -146,7 +146,7 @@
       "source": [
         "### Usage\n",
         "\n",
-        "A `Function` you define (for example by applying the `@tf.function` decorator) is just like a core TensorFlow operation: You can execute it eagerly; you can compute gradients; and so on."
+        "A `tf.function` that you define (for example by applying the `@tf.function` decorator) is just like a core TensorFlow operation: You can execute it eagerly; you can compute gradients; and so on."
       ]
     },
     {
@@ -157,7 +157,7 @@
       },
       "outputs": [],
       "source": [
-        "@tf.function  # The decorator converts `add` into a `Function`.\n",
+        "@tf.function  # The decorator converts `add` into a `PolymorphicFunction`.\n",
         "def add(a, b):\n",
         "  return a + b\n",
         "\n",
@@ -184,7 +184,7 @@
         "id": "ocWZvqrmHnmX"
       },
       "source": [
-        "You can use `Function`s inside other `Function`s."
+        "You can use `tf.function`s inside other `tf.function`s."
       ]
     },
     {
@@ -208,7 +208,7 @@
         "id": "piBhz7gYsHqU"
       },
       "source": [
-        "`Function`s can be faster than eager code, especially for graphs with many small ops. But for graphs with a few expensive ops (like convolutions), you may not see much speedup.\n"
+        "`tf.function`s can be faster than eager code, especially for graphs with many small ops. But for graphs with a few expensive ops (like convolutions), you may not see much speedup.\n"
       ]
     },
     {
@@ -242,7 +242,7 @@
       "source": [
         "### Tracing\n",
         "\n",
-        "This section exposes how `Function` works under the hood, including implementation details *which may change in the future*. However, once you understand why and when tracing happens, it's much easier to use `tf.function` effectively!"
+        "This section exposes how `tf.function` works under the hood, including implementation details *which may change in the future*. However, once you understand why and when tracing happens, it's much easier to use `tf.function` effectively!"
       ]
     },
     {
@@ -253,17 +253,17 @@
       "source": [
         "#### What is \"tracing\"?\n",
         "\n",
-        "A `Function` runs your program in a [TensorFlow Graph](https://www.tensorflow.org/guide/intro_to_graphs#what_are_graphs). However, a `tf.Graph` cannot represent all the things that you'd write in an eager TensorFlow program. For instance, Python supports polymorphism, but `tf.Graph` requires its inputs to have a specified data type and dimension. Or you may perform side tasks like reading command-line arguments, raising an error, or working with a more complex Python object; none of these things can run in a `tf.Graph`.\n",
+        "A `tf.function` runs your program in a [TensorFlow Graph](https://www.tensorflow.org/guide/intro_to_graphs#what_are_graphs). However, a `tf.Graph` cannot represent all the things that you'd write in an eager TensorFlow program. For instance, Python supports polymorphism, but `tf.Graph` requires its inputs to have a specified data type and dimension. Or you may perform side tasks like reading command-line arguments, raising an error, or working with a more complex Python object; none of these things can run in a `tf.Graph`.\n",
         "\n",
-        "`Function` bridges this gap by separating your code in two stages:\n",
+        "`tf.function` bridges this gap by separating your code in two stages:\n",
         "\n",
-        "  1)  In the first stage, referred to as \"**tracing**\", `Function` creates a new `tf.Graph`. Python code runs normally, but all TensorFlow operations (like adding two Tensors) are *deferred*: they are captured by the `tf.Graph` and not run.\n",
+        "  1)  In the first stage, referred to as \"**tracing**\", `tf.function` creates a new `tf.Graph`. Python code runs normally, but all TensorFlow operations (like adding two Tensors) are *deferred*: they are captured by the `tf.Graph` and not run.\n",
         "\n",
         "  2) In the second stage, a `tf.Graph` which contains everything that was deferred in the first stage is run. This stage is much faster than the tracing stage.\n",
         "\n",
-        "Depending on its inputs, `Function` will not always run the first stage when it is called.  See [\"Rules of tracing\"](#rules_of_tracing) below to get a better sense of how it makes that determination. Skipping the first stage and only executing the second stage is what gives you TensorFlow's high performance.\n",
+        "Depending on its inputs, `tf.function` will not always run the first stage when it is called.  See [\"Rules of tracing\"](#rules_of_tracing) below to get a better sense of how it makes that determination. Skipping the first stage and only executing the second stage is what gives you TensorFlow's high performance.\n",
         "\n",
-        "When `Function` does decide to trace, the tracing stage is immediately followed by the second stage, so calling the `Function` both creates and runs the `tf.Graph`. Later you will see how you can run only the tracing stage with [`get_concrete_function`](#obtaining_concrete_functions)."
+        "When `tf.function` does decide to trace, the tracing stage is immediately followed by the second stage, so calling the `tf.function` both creates and runs the `tf.Graph`. Later you will see how you can run only the tracing stage with [`get_concrete_function`](#obtaining_concrete_functions)."
       ]
     },
     {
@@ -272,7 +272,7 @@
         "id": "K7scSzLx662f"
       },
       "source": [
-        "When we pass arguments of different types into a `Function`, both stages are run:\n"
+        "When you pass arguments of different types into a `tf.function`, both stages are run:\n"
       ]
     },
     {
@@ -302,7 +302,7 @@
         "id": "QPfouGUQrcNb"
       },
       "source": [
-        "Note that if you repeatedly call a `Function` with the same argument type, TensorFlow will skip the tracing stage and reuse a previously traced graph, as the generated graph would be identical."
+        "Note that if you repeatedly call a `tf.function` with the same argument type, TensorFlow will skip the tracing stage and reuse a previously traced graph, as the generated graph would be identical."
       ]
     },
     {
@@ -346,10 +346,11 @@
         "So far, you've seen that `tf.function` creates a cached, dynamic dispatch layer over TensorFlow's graph tracing logic. To be more specific about the terminology:\n",
         "\n",
         "- A `tf.Graph` is the raw, language-agnostic, portable representation of a TensorFlow computation.\n",
-        "- A `ConcreteFunction` wraps a `tf.Graph`.\n",
-        "- A `Function` manages a cache of `ConcreteFunction`s and picks the right one for your inputs.\n",
-        "- `tf.function` wraps a Python function, returning a `Function` object.\n",
-        "- **Tracing** creates a `tf.Graph` and wraps it in a `ConcreteFunction`, also known as a **trace.**\n"
+        "- Tracing is the process through which new `tf.Graph`s are generated from Python code.\n",
+        "- An instance of `tf.Graph` is specialized to the specific input types it was traced with. Differing types require retracing.\n",
+        "- Each traced `tf.Graph` has a corresponding `ConcreteFunction`.\n",
+        "- A `tf.function` manages a cache of `ConcreteFunction`s and picks the right one for your inputs.\n",
+        "- `tf.function` wraps the Python function that will be traced, returning a `tf.types.experimental.PolymorphicFunction` object.\n"
       ]
     },
     {
@@ -360,20 +361,22 @@
       "source": [
         "#### Rules of tracing\n",
         "\n",
-        "A `Function` determines whether to reuse a traced `ConcreteFunction` by computing a **cache key** from an input's args and kwargs. A **cache key** is a key that identifies a `ConcreteFunction` based on the input args and kwargs of the `Function` call, according to the following rules (which may change):\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "h62XoXho6EWN"
-      },
-      "source": [
-        "- The key generated for a `tf.Tensor` is its shape and dtype.\n",
-        "- The key generated for a `tf.Variable` is a unique variable id.\n",
-        "- The key generated for a Python primitive (like `int`, `float`, `str`) is its value. \n",
-        "- The key generated for nested `dict`s, `list`s, `tuple`s, `namedtuple`s, and [`attr`](https://www.attrs.org/en/stable/)s is the flattened tuple of leaf-keys (see `nest.flatten`). (As a result of this flattening, calling a concrete function with a different nesting structure than the one used during tracing will result in a TypeError).\n",
-        "- For all other Python types the key is unique to the object. This way a function or method is traced independently for each instance it is called with.\n"
+        "When called, a `tf.function` first evaluates the type of each input argument using the `tf.types.experimental.TraceType` of each argument. This is used to construct a `tf.types.experimental.FunctionType` describing the signature of the desired `ConcreteFunction`. We compare this `FunctionType` to the `FunctionType`s of existing `ConcreteFunction`s. If a matching `ConcreteFunction` is found, the call is dispatched to it. If no match is found, a new `ConcreteFunction` is traced for the desired `FunctionType`.\n",
+        "\n",
+        "If multiple matches are found, the most specific signature is chosen. Matching is done by [subtyping](https://en.wikipedia.org/wiki/Subtyping), much like normal function calls in C++ or Java, for instance. For example, `TensorShape([1, 2])` is a subtype of `TensorShape([None, None])` and so a call to the tf.function with `TensorShape([1, 2])` can be dispatched to the `ConcreteFunction` produced with `TensorShape([None, None])` but if a `ConcreteFunction` with `TensorShape([1, None])` also exists then it will be prioritized since it is more specific.\n",
+        "\n",
+        "The `TraceType` is determined from input arguments as follows:\n",
+        "* For `Tensor`, the type is parameterized by the `Tensor`'s `dtype` and `shape`; ranked shapes are a subtype of unranked shapes; fixed dimensions are a subtype of unknown dimensions\n",
+        "* For `Variable`, the type is similar to `Tensor`, but also includes a unique resource ID of the variable, necessary to correctly wire control dependencies\n",
+        "* For Python primitive values, the type corresponds to the **value** itself. For example, the `TraceType` of the value `3` is `LiteralTraceType<3>`, not `int`.\n",
+        "* For Python ordered containers such as `list` and `tuple`, etc., the type is parameterized by the types of their elements; for example, the type of `[1, 2]` is `ListTraceType<LiteralTraceType<1>, LiteralTraceType<2>>` and the type for   `[2, 1]` is `ListTraceType<LiteralTraceType<2>, LiteralTraceType<1>>` which is different.\n",
+        "* For Python mappings such as `dict`, the type is also a mapping from the same keys but to the types of values instead of the actual values. For example, the type of `{1: 2, 3: 4}`, is `MappingTraceType<<KeyValue<1, LiteralTraceType<2>>>, <KeyValue<3, LiteralTraceType<4>>>>`. However, unlike ordered containers, `{1: 2, 3: 4}` and `{3: 4, 1: 2}` have equivalent types.\n",
+        "* For Python objects which implement the `__tf_tracing_type__` method, the type is whatever that method returns.\n",
+        "* For any other Python objects, the type is a generic `TraceType`, and the matching precedure is:\n",
+        "  * First it checks if the object is the same object used in the previous trace (using Python `id()` or `is`). Note that this will still match if the object has changed, so if you use Python objects as `tf.function` arguments it's best to use *immutable* ones.\n",
+        "  * Next it checks if the object is equal to the object used in the previous trace (using Python `==`).\n",
+        "      \n",
+        "  Note that this procedure only keeps a [weakref](https://docs.python.org/3/library/weakref.html) to the object and hence only works as long as the object is in scope/not deleted.\n"
       ]
     },
     {
@@ -382,7 +385,7 @@
         "id": "GNNN4lgRzpIs"
       },
       "source": [
-        "Note: Cache keys are based on the `Function` input parameters so changes to global and [free variables](https://docs.python.org/3/reference/executionmodel.html#binding-of-names) alone will not create a new trace. See [this section](#depending_on_python_global_and_free_variables) for recommended practices when dealing with Python global and free variables."
+        "Note: `TraceType` is based on the `tf.function` input parameters so changes to global and [free variables](https://docs.python.org/3/reference/executionmodel.html#binding-of-names) alone will not create a new trace. See [this section](#depending_on_python_global_and_free_variables) for recommended practices when dealing with Python global and free variables."
       ]
     },
     {
@@ -391,9 +394,9 @@
         "id": "PEDwbumO32Wh"
       },
       "source": [
-        "#### Controlling retracing\n",
+        "### Controlling retracing\n",
         "\n",
-        "Retracing, which is when your `Function` creates more than one trace, helps ensures that TensorFlow generates correct graphs for each set of inputs. However, tracing is an expensive operation! If your `Function` retraces a new graph for every call, you'll find that your code executes more slowly than if you didn't use `tf.function`.\n",
+        "Retracing, which is when your `tf.function` creates more than one trace, helps ensure that TensorFlow generates correct graphs for each set of inputs. However, tracing is an expensive operation! If your `tf.function` retraces a new graph for every call, you'll find that your code executes more slowly than if you didn't use `tf.function`.\n",
         "\n",
         "To control the tracing behavior, you can use the following techniques:"
       ]
@@ -404,7 +407,9 @@
         "id": "EUtycWJa34TT"
       },
       "source": [
-        "- Specify `input_signature` in `tf.function` to limit tracing."
+        "#### Pass a fixed `input_signature` to `tf.function`\n",
+        "\n",
+        "This forces `tf.function` to constrain itself to only one `tf.types.experimental.FunctionType` composed of the types enumerated by the `input_signature`. Calls that cannot be dispatched to this `FunctionType` will throw an error."
       ]
     },
     {
@@ -422,11 +427,11 @@
         "\n",
         "print(next_collatz(tf.constant([1, 2])))\n",
         "# You specified a 1-D tensor in the input signature, so this should fail.\n",
-        "with assert_raises(ValueError):\n",
+        "with assert_raises(TypeError):\n",
         "  next_collatz(tf.constant([[1, 2], [3, 4]]))\n",
         "\n",
         "# You specified an int32 dtype in the input signature, so this should fail.\n",
-        "with assert_raises(ValueError):\n",
+        "with assert_raises(TypeError):\n",
         "  next_collatz(tf.constant([1.0, 2.0]))\n"
       ]
     },
@@ -436,9 +441,9 @@
         "id": "ocxX-HVk7P2o"
       },
       "source": [
-        "- Specify a \\[None\\] dimension in `tf.TensorSpec` to allow for flexibility in trace reuse.\n",
+        "#### Use unknown dimensions for flexibility\n",
         "\n",
-        "  Since TensorFlow matches tensors based on their shape, using a `None` dimension as a wildcard will allow `Function`s to reuse traces for variably-sized input. Variably-sized input can occur if you have sequences of different length, or images of different sizes for each batch (See the [Transformer](../tutorials/text/transformer.ipynb) and [Deep Dream](../tutorials/generative/deepdream.ipynb) tutorials for example)."
+        "  Since TensorFlow matches tensors based on their shape, using a `None` dimension as a wildcard will allow `tf.function`s to reuse traces for variably-sized input. Variably-sized input can occur if you have sequences of different length, or images of different sizes for each batch. You can check out the [Transformer](https://www.tensorflow.org/text/tutorials/transformer) and [Deep Dream](../tutorials/generative/deepdream.ipynb) tutorials for examples."
       ]
     },
     {
@@ -459,13 +464,48 @@
         "print(g(tf.constant([1, 2, 3, 4, 5])))\n"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "37cc12f93cbd"
+      },
+      "source": [
+        "#### Use `reduce_retracing` for automatic flexibility\n",
+        "\n",
+        "When `reduce_retracing` is enabled, `tf.function` automatically identifies supertypes of the input types it is observing and chooses to trace more generalized graphs automatically. It is less efficient than setting the `input_signature` directly but useful when many types need to be supported."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "0403fae03a1f"
+      },
+      "outputs": [],
+      "source": [
+        "@tf.function(reduce_retracing=True)\n",
+        "def g(x):\n",
+        "  print('Tracing with', x)\n",
+        "  return x\n",
+        "\n",
+        "# Traces once.\n",
+        "print(g(tf.constant([1, 2, 3])))\n",
+        "\n",
+        "# Traces again, but more generalized this time.\n",
+        "print(g(tf.constant([1, 2, 3, 4, 5])))\n",
+        "\n",
+        "# No more tracing!\n",
+        "print(g(tf.constant([1, 2, 3, 4, 5, 6, 7])))\n",
+        "print(g(tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9])))"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "AY5oiQN0XIyA"
       },
       "source": [
-        "- Cast Python arguments to Tensors to reduce retracing.\n",
+        "#### Pass tensors instead of python literals\n",
         "\n",
         "  Often, Python arguments are used to control hyperparameters and graph constructions - for example, `num_layers=10` or `training=True` or `nonlinearity='relu'`. So, if the Python argument changes, it makes sense that you'd have to retrace the graph.\n",
         "\n",
@@ -506,7 +546,7 @@
         "id": "4pJqkDR_Q2wz"
       },
       "source": [
-        "If you need to force retracing, create a new `Function`. Separate `Function` objects are guaranteed not to share traces."
+        "If you need to force retracing, create a new `tf.function`. Separate `tf.function` objects are guaranteed not to share traces."
       ]
     },
     {
@@ -525,6 +565,92 @@
         "tf.function(f)()"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-tZoWrA6INvc"
+      },
+      "source": [
+        "#### Use the tracing protocol\n",
+        "\n",
+        "Where possible, you should prefer converting the Python type into a `tf.experimental.ExtensionType` instead. Moreover, the `TraceType` of an `ExtensionType` is the `tf.TypeSpec` associated with it. Therefore, if needed, you can simply override the default `tf.TypeSpec` to take control of an `ExtensionType`'s `Tracing Protocol`. Refer to the _Customizing the ExtensionType's TypeSpec_ section in the [Extension types](extension_type.ipynb) guide for details.\n",
+        "\n",
+        "Otherwise, for direct control over when `tf.function` should retrace in regards to a particular Python type, you can implement the `Tracing Protocol` for it yourself."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "gZkIh7UaIKc6"
+      },
+      "outputs": [],
+      "source": [
+        "@tf.function\n",
+        "def get_mixed_flavor(fruit_a, fruit_b):\n",
+        "  return fruit_a.flavor + fruit_b.flavor\n",
+        "\n",
+        "class Fruit:\n",
+        "  flavor = tf.constant([0, 0])\n",
+        "\n",
+        "class Apple(Fruit):\n",
+        "  flavor = tf.constant([1, 2])\n",
+        "\n",
+        "class Mango(Fruit):\n",
+        "  flavor = tf.constant([3, 4])\n",
+        "\n",
+        "# As described in the above rules, a generic TraceType for `Apple` and `Mango`\n",
+        "# is generated (and a corresponding ConcreteFunction is traced) but it fails to\n",
+        "# match the second function call since the first pair of Apple() and Mango()\n",
+        "# have gone out out of scope by then and deleted.\n",
+        "get_mixed_flavor(Apple(), Mango()) # Traces a new concrete function\n",
+        "get_mixed_flavor(Apple(), Mango()) # Traces a new concrete function again\n",
+        "\n",
+        "# However, each subclass of the `Fruit` class has a fixed flavor, and you\n",
+        "# can reuse an existing traced concrete function if it was the same\n",
+        "# subclass. Avoiding such unnecessary tracing of concrete functions\n",
+        "# can have significant performance benefits.\n",
+        "\n",
+        "class FruitTraceType(tf.types.experimental.TraceType):\n",
+        "  def __init__(self, fruit):\n",
+        "    self.fruit_type = type(fruit)\n",
+        "    self.fruit_value = fruit\n",
+        "\n",
+        "  def is_subtype_of(self, other):\n",
+        "      # True if self subtypes `other` and `other`'s type matches FruitTraceType.\n",
+        "      return (type(other) is FruitTraceType and\n",
+        "              self.fruit_type is other.fruit_type)\n",
+        "\n",
+        "  def most_specific_common_supertype(self, others):\n",
+        "      # `self` is the specific common supertype if all input types match it.\n",
+        "      return self if all(self == other for other in others) else None\n",
+        "\n",
+        "  def placeholder_value(self, placeholder_context=None):\n",
+        "      # Use the fruit itself instead of the type for correct tracing.\n",
+        "      return self.fruit_value\n",
+        "\n",
+        "  def __eq__(self, other):\n",
+        "    return type(other) is FruitTraceType and self.fruit_type == other.fruit_type\n",
+        "\n",
+        "  def __hash__(self):\n",
+        "    return hash(self.fruit_type)\n",
+        "\n",
+        "class FruitWithTraceType:\n",
+        "\n",
+        "  def __tf_tracing_type__(self, context):\n",
+        "    return FruitTraceType(self)\n",
+        "\n",
+        "class AppleWithTraceType(FruitWithTraceType):\n",
+        "  flavor = tf.constant([1, 2])\n",
+        "\n",
+        "class MangoWithTraceType(FruitWithTraceType):\n",
+        "  flavor = tf.constant([3, 4])\n",
+        "\n",
+        "# Now if you try calling it again:\n",
+        "get_mixed_flavor(AppleWithTraceType(), MangoWithTraceType()) # Traces a new concrete function\n",
+        "get_mixed_flavor(AppleWithTraceType(), MangoWithTraceType()) # Re-uses the traced concrete function"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -601,8 +727,7 @@
       },
       "outputs": [],
       "source": [
-        "print(double_strings.structured_input_signature)\n",
-        "print(double_strings.structured_outputs)"
+        "print(double_strings.function_type)"
       ]
     },
     {
@@ -673,7 +798,7 @@
       "source": [
         "### Obtaining graphs\n",
         "\n",
-        "Each concrete function is a callable wrapper around a `tf.Graph`. Although retrieving the actual `tf.Graph` object is not something you'll normally need to do, you can obtain it easily from any concrete function."
+        "Although retrieving the actual `tf.Graph` object is not something you'll normally need to do, you can obtain it easily from any concrete function."
       ]
     },
     {
@@ -689,6 +814,36 @@
         "  print(f'{node.input} -> {node.name}')\n"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2d49c486ccd4"
+      },
+      "source": [
+        "In reality, `tf.Graph`s are not directly callable. We actually use an `tf.types.experimental.AtomicFunction` to perform the computations described by the `tf.Graph`. You can access the `AtomicFunction` describing the traced `tf.Graph` and call it directly instead of the `ConcreteFunction`:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "4c3879aa0be0"
+      },
+      "outputs": [],
+      "source": [
+        "atomic_fn = double_strings.inference_fn\n",
+        "atomic_fn(tf.constant(\"a\"))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "c3bd1036c18c"
+      },
+      "source": [
+        "This has the advantage of having lower Python overhead for high-performance scenarios. But it should only be used for forward inference (no gradient support), and captured tensor values (if any) would need to be explicitly supplied."
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -745,7 +900,7 @@
         "id": "KxwJ8znPI0Cg"
       },
       "source": [
-        "If you're curious you can inspect the code autograph generates."
+        "If you're curious you can inspect the code AutoGraph generates."
       ]
     },
     {
@@ -884,9 +1039,9 @@
         "id": "JeD2U-yrbfVb"
       },
       "source": [
-        "When wrapping Python/NumPy data in a Dataset, be mindful of `tf.data.Dataset.from_generator` versus ` tf.data.Dataset.from_tensors`. The former will keep the data in Python and fetch it via `tf.py_function` which can have performance implications, whereas the latter will bundle a copy of the data as one large `tf.constant()` node in the graph, which can have memory implications.\n",
+        "When wrapping Python/NumPy data in a Dataset, be mindful of `tf.data.Dataset.from_generator` versus ` tf.data.Dataset.from_tensor_slices`. The former will keep the data in Python and fetch it via `tf.py_function` which can have performance implications, whereas the latter will bundle a copy of the data as one large `tf.constant()` node in the graph, which can have memory implications.\n",
         "\n",
-        "Reading data from files via `TFRecordDataset`, `CsvDataset`, etc. is the most effective way to consume data, as then TensorFlow itself can manage the asynchronous loading and prefetching of data, without having to involve Python. To learn more, see the [`tf.data`: Build TensorFlow input pipelines](../../guide/data) guide."
+        "Reading data from files via `TFRecordDataset`, `CsvDataset`, etc. is the most effective way to consume data, as then TensorFlow itself can manage the asynchronous loading and prefetching of data, without having to involve Python. To learn more, see the [`tf.data`: Build TensorFlow input pipelines](data.ipynb) guide."
       ]
     },
     {
@@ -927,7 +1082,7 @@
         "    state = rnn_step(input_data[i], state)\n",
         "    states = states.write(i, state)\n",
         "  return tf.transpose(states.stack(), [1, 0, 2])\n",
-        "  \n",
+        "\n",
         "dynamic_rnn(rnn_step,\n",
         "            tf.random.uniform([batch_size, seq_len, feature_size]),\n",
         "            tf.zeros([batch_size, feature_size]))"
@@ -941,7 +1096,7 @@
       "source": [
         "## Limitations\n",
         "\n",
-        "TensorFlow `Function` has a few limitations by design that you should be aware of when converting a Python function to a `Function`."
+        "`tf.function` has a few limitations by design that you should be aware of when converting a Python function to a `tf.function`."
       ]
     },
     {
@@ -952,7 +1107,7 @@
       "source": [
         "### Executing Python side effects\n",
         "\n",
-        "Side effects, like printing, appending to lists, and mutating globals, can behave unexpectedly inside a `Function`, sometimes executing twice or not all. They only happen the first time you call a `Function` with a set of inputs.  Afterwards, the traced `tf.Graph` is reexecuted, without executing the Python code.\n",
+        "Side effects, like printing, appending to lists, and mutating globals, can behave unexpectedly inside a `tf.function`, sometimes executing twice or not all. They only happen the first time you call a `tf.function` with a set of inputs. Afterwards, the traced `tf.Graph` is reexecuted, without executing the Python code.\n",
         "\n",
         "The general rule of thumb is to avoid relying on Python side effects in your logic and only use them to debug your traces. Otherwise, TensorFlow APIs like `tf.data`, `tf.print`, `tf.summary`, `tf.Variable.assign`, and `tf.TensorArray` are the best way to ensure your code will be executed by the TensorFlow runtime with each call."
       ]
@@ -981,7 +1136,66 @@
         "id": "e1I0dPiqTV8H"
       },
       "source": [
-        "If you would like to execute Python code during each invocation of a `Function`, `tf.py_function` is an exit hatch. The drawback of `tf.py_function` is that it's not portable or particularly performant, cannot be saved with SavedModel, and does not work well in distributed (multi-GPU, TPU) setups. Also, since `tf.py_function` has to be wired into the graph, it casts all inputs/outputs to tensors."
+        "If you would like to execute Python code during each invocation of a `tf.function`, `tf. py_function` is an exit hatch. The drawbacks of `tf.py_function` are that it's not portable or particularly performant, cannot be saved with `SavedModel`, and does not work well in distributed (multi-GPU, TPU) setups. Also, since `tf.py_function` has to be wired into the graph, it casts all inputs/outputs to tensors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ZbI7XA_e6yA2"
+      },
+      "outputs": [],
+      "source": [
+        "@tf.py_function(Tout=tf.float32)\n",
+        "def py_plus(x, y):\n",
+        "  print('Executing eagerly.')\n",
+        "  return x + y\n",
+        "\n",
+        "@tf.function\n",
+        "def tf_wrapper(x, y):\n",
+        "  print('Tracing.')\n",
+        "  return py_plus(x, y)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "h5ttN_sI7TdQ"
+      },
+      "source": [
+        "The `tf.function` will trace the first time:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "mAK4XINl7Ldy"
+      },
+      "outputs": [],
+      "source": [
+        "tf_wrapper(tf.constant(1.0), tf.constant(2.0)).numpy()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Atxvrd_o7dSy"
+      },
+      "source": [
+        "But the `tf.py_function` inside executes eagerly every time:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "vv7qTiTU7bjy"
+      },
+      "outputs": [],
+      "source": [
+        "tf_wrapper(tf.constant(1.0), tf.constant(2.0)).numpy()"
       ]
     },
     {
@@ -1017,13 +1231,86 @@
         "assert len(external_list) == 1"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5eZTFRv_k_nR"
+      },
+      "source": [
+        "Sometimes unexpected behaviors are very hard to notice. In the example below, the `counter` is intended to safeguard the increment of a variable. However because it is a python integer and not a TensorFlow object, it's value is captured during the first trace. When the `tf.function` is used, the `assign_add` will be recorded unconditionally in the underlying graph. Therefore `v` will increase by 1, every time the `tf.function` is called. This issue is common among users that try to migrate their Graph-mode Tensorflow code to Tensorflow 2 using `tf.function` decorators, when python side-effects (the `counter` in the example) are used to determine what ops to run (`assign_add` in the example). Usually, users realize this only after seeing suspicious numerical results, or significantly lower performance than expected (e.g. if the guarded operation is very costly)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "5r6p7-9jk_3L"
+      },
+      "outputs": [],
+      "source": [
+        "class Model(tf.Module):\n",
+        "  def __init__(self):\n",
+        "    self.v = tf.Variable(0)\n",
+        "    self.counter = 0\n",
+        "\n",
+        "  @tf.function\n",
+        "  def __call__(self):\n",
+        "    if self.counter == 0:\n",
+        "      # A python side-effect\n",
+        "      self.counter += 1\n",
+        "      self.v.assign_add(1)\n",
+        "\n",
+        "    return self.v\n",
+        "\n",
+        "m = Model()\n",
+        "for n in range(3):\n",
+        "  print(m().numpy()) # prints 1, 2, 3"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tXCTcHoVcxhX"
+      },
+      "source": [
+        "A workaround to achieve the expected behavior is using [`tf.init_scope`](https://www.tensorflow.org/api_docs/python/tf/init_scope) to lift the operations outside of the function graph. This ensures that the variable increment is only done once during tracing time. It should be noted `init_scope` has other side effects including cleared control flow and gradient tape. Sometimes the usage of `init_scope` can become too complex to manage realistically."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "An4MrIbrcvi8"
+      },
+      "outputs": [],
+      "source": [
+        "class Model(tf.Module):\n",
+        "  def __init__(self):\n",
+        "    self.v = tf.Variable(0)\n",
+        "    self.counter = 0\n",
+        "\n",
+        "  @tf.function\n",
+        "  def __call__(self):\n",
+        "    if self.counter == 0:\n",
+        "      # Lifts ops out of function-building graphs\n",
+        "      with tf.init_scope():\n",
+        "        self.counter += 1\n",
+        "        self.v.assign_add(1)\n",
+        "\n",
+        "    return self.v\n",
+        "\n",
+        "m = Model()\n",
+        "for n in range(3):\n",
+        "  print(m().numpy()) # prints 1, 1, 1"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "pbFG5CX4LwQA"
       },
       "source": [
-        "You should avoid mutating containers like lists, dicts, other objects that live outside the `Function`. Instead, use arguments and TF objects. For example, the section [\"Accumulating values in a loop\"](#accumulating_values_in_a_loop) has one example of how list-like operations can be implemented.\n",
+        "In summary, as a rule of thumb, you should avoid mutating python objects such as integers or containers like lists that live outside the `tf.function`. Instead, use arguments and TF objects. For example, the section [\"Accumulating values in a loop\"](#accumulating_values_in_a_loop) has one example of how list-like operations can be implemented.\n",
         "\n",
         "You can, in some cases, capture and manipulate state if it is a [`tf.Variable`](https://www.tensorflow.org/guide/variable). This is how the weights of Keras models are updated with repeated calls to the same `ConcreteFunction`."
       ]
@@ -1094,43 +1381,6 @@
         "good_consume_next(iterator)"
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "FHQ0UeU-vWo8"
-      },
-      "source": [
-        "### Deleting tf.Variables between `Function` calls\n",
-        "\n",
-        "Another error you may encounter is a garbage-collected variable. `ConcreteFunction`s only retain [WeakRefs](https://docs.python.org/3/library/weakref.html) to the variables they close over, so you must retain a reference to any variables."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "uMiRPfETjpt-"
-      },
-      "outputs": [],
-      "source": [
-        "external_var = tf.Variable(3)\n",
-        "@tf.function\n",
-        "def f(x):\n",
-        "  return x * external_var\n",
-        "\n",
-        "traced_f = f.get_concrete_function(4)\n",
-        "print(\"Calling concrete function...\")\n",
-        "print(traced_f(4))\n",
-        "\n",
-        "# The original variable object gets garbage collected, since there are no more\n",
-        "# references to it.\n",
-        "external_var = tf.Variable(4)\n",
-        "print()\n",
-        "print(\"Calling concrete function after garbage collecting its closed Variable...\")\n",
-        "with assert_raises(tf.errors.FailedPreconditionError):\n",
-        "  traced_f(4)"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -1166,9 +1416,10 @@
         "correct_a = leaky_function(tf.constant(1))\n",
         "\n",
         "print(correct_a.numpy())  # Good - value obtained from function's returns\n",
-        "with assert_raises(AttributeError):\n",
+        "try:\n",
         "  x.numpy()  # Bad - tensor leaked from inside the function, cannot be used here\n",
-        "print(x)"
+        "except AttributeError as expected:\n",
+        "  print(expected)"
       ]
     },
     {
@@ -1197,9 +1448,10 @@
         "correct_a = leaky_function(tf.constant(1))\n",
         "\n",
         "print(correct_a.numpy())  # Good - value obtained from function's returns\n",
-        "with assert_raises(AttributeError):\n",
+        "try:\n",
         "  x.numpy()  # Bad - tensor leaked from inside the function, cannot be used here\n",
-        "print(x)\n",
+        "except AttributeError as expected:\n",
+        "  print(expected)\n",
         "\n",
         "@tf.function\n",
         "def captures_leaked_tensor(b):\n",
@@ -1244,6 +1496,64 @@
         "  external_object.field = a  # Bad - leaks tensor"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "g-XVQcD-wf5K"
+      },
+      "source": [
+        "### Recursive tf.functions are not supported\n",
+        "\n",
+        "Recursive `tf.function`s are not supported and could cause infinite loops. For example,"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "QSN-T1m5EFcR"
+      },
+      "outputs": [],
+      "source": [
+        "@tf.function\n",
+        "def recursive_fn(n):\n",
+        "  if n > 0:\n",
+        "    return recursive_fn(n - 1)\n",
+        "  else:\n",
+        "    return 1\n",
+        "\n",
+        "with assert_raises(Exception):\n",
+        "  recursive_fn(tf.constant(5))  # Bad - maximum recursion error."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "LyRyooKGUxNV"
+      },
+      "source": [
+        "Even if a recursive `tf.function` seems to work, the Python function will be traced multiple times and could have performance implications. For example,"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "7FlmTqfMUwmT"
+      },
+      "outputs": [],
+      "source": [
+        "@tf.function\n",
+        "def recursive_fn(n):\n",
+        "  if n > 0:\n",
+        "    print('tracing')\n",
+        "    return recursive_fn(n - 1)\n",
+        "  else:\n",
+        "    return 1\n",
+        "\n",
+        "recursive_fn(5)  # Warning - multiple tracings"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -1252,7 +1562,7 @@
       "source": [
         "## Known Issues\n",
         "\n",
-        "If your `Function` is not evaluating correctly, the error may be explained by these known issues which are planned to be fixed in the future."
+        "If your `tf.function` is not evaluating correctly, the error may be explained by these known issues which are planned to be fixed in the future."
       ]
     },
     {
@@ -1263,9 +1573,9 @@
       "source": [
         "### Depending on Python global and free variables\n",
         "\n",
-        "`Function` creates a new `ConcreteFunction` when called with a new value of a Python argument. However, it does not do that for the Python closure, globals, or nonlocals of that `Function`. If their value changes in between calls to the `Function`, the `Function` will still use the values they had when it was traced. This is different from how regular Python functions work.\n",
+        "`tf.function` creates a new `ConcreteFunction` when called with a new value of a Python argument. However, it does not do that for the Python closure, globals, or nonlocals of that `tf.function`. If their value changes in between calls to the `tf.function`, the `tf.function` will still use the values they had when it was traced. This is different from how regular Python functions work.\n",
         "\n",
-        "For that reason, we recommend a functional programming style that uses arguments instead of closing over outer names."
+        "For that reason, you should follow a functional programming style that uses arguments instead of closing over outer names."
       ]
     },
     {
@@ -1306,10 +1616,39 @@
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "Tu0SnPwaL7pI"
+        "id": "ZoPg5w1Pjqnb"
+      },
+      "source": [
+        "Another way to update a global value is to make it a `tf.Variable` and use the `Variable.assign` method instead.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "oeJMdXd3M0cc"
       },
+      "outputs": [],
       "source": [
-        "You can close over outer names, as long as you don't update their values.\n"
+        "@tf.function\n",
+        "def variable_add():\n",
+        "  return 1 + foo\n",
+        "\n",
+        "foo = tf.Variable(1)\n",
+        "print(\"Variable:\", variable_add())\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "L3q7sUJWZOSd"
+      },
+      "outputs": [],
+      "source": [
+        "print(\"Updating the value of `foo` to 100!\")\n",
+        "foo.assign(100)\n",
+        "print(\"Variable:\", variable_add())"
       ]
     },
     {
@@ -1318,7 +1657,7 @@
         "id": "hvwe9gTIWfx6"
       },
       "source": [
-        "#### Depending on Python objects"
+        "### Depending on Python objects"
       ]
     },
     {
@@ -1327,7 +1666,11 @@
         "id": "BJkZS-SwPvOQ"
       },
       "source": [
-        "The recommendation to pass Python objects as arguments into `tf.function` has a number of known issues, that are expected to be fixed in the future. In general, you can rely on consistent tracing if you use a Python primitive or `tf.nest`-compatible structure as an argument or pass in a *different* instance of an object into a `Function`. However, `Function` will *not* create a new trace when you pass **the same object and only change its attributes**."
+        "Passing custom Python objects as arguments to `tf.function` is supported but has certain limitations.\n",
+        "\n",
+        "For maximum feature coverage, consider transforming the objects into [Extension types](extension_type.ipynb) before passing them to `tf.function`. You can also use Python primitives and `tf.nest`-compatible structures.\n",
+        "\n",
+        "However, as covered in the [rules of tracing](#rules_of_tracing), when a custom `TraceType` is not provided by the custom Python class, `tf.function` is forced to use instance-based equality which means it will **not create a new trace** when you pass the **same object with modified attributes**."
       ]
     },
     {
@@ -1372,11 +1715,11 @@
         "id": "Ytcgg2qFWaBF"
       },
       "source": [
-        "Using the same `Function` to evaluate the updated instance of the model will be buggy since the updated model has the [same cache key](#rules_of_tracing) as the original model.\n",
+        "Using the same `tf.function` to evaluate the modified instance of the model will be buggy since it still has the [same instance-based TraceType](#rules_of_tracing) as the original model.\n",
         "\n",
-        "For that reason, we recommend that you write your `Function` to avoid depending on mutable object attributes or create new objects.\n",
+        "For that reason, you're recommended to write your `tf.function` to avoid depending on mutable object attributes or implement the [Tracing Protocol](#use_the_tracing_protocol) for the objects to inform `tf.function` about such attributes.\n",
         "\n",
-        "If that is not possible, one workaround is to make new `Function`s each time you modify your object to force retracing:"
+        "If that is not possible, one workaround is to make new `tf.function`s each time you modify your object to force retracing:"
       ]
     },
     {
@@ -1392,8 +1735,8 @@
         "\n",
         "new_model = SimpleModel()\n",
         "evaluate_no_bias = tf.function(evaluate).get_concrete_function(new_model, x)\n",
-        "# Don't pass in `new_model`, `Function` already captured its state during tracing.\n",
-        "print(evaluate_no_bias(x))  "
+        "# Don't pass in `new_model`. `tf.function` already captured its state during tracing.\n",
+        "print(evaluate_no_bias(x))"
       ]
     },
     {
@@ -1406,7 +1749,7 @@
       "source": [
         "print(\"Adding bias!\")\n",
         "new_model.bias += 5.0\n",
-        "# Create new Function and ConcreteFunction since you modified new_model.\n",
+        "# Create new `tf.function` and `ConcreteFunction` since you modified `new_model`.\n",
         "evaluate_with_bias = tf.function(evaluate).get_concrete_function(new_model, x)\n",
         "print(evaluate_with_bias(x)) # Don't pass in `new_model`."
       ]
@@ -1463,7 +1806,7 @@
       "source": [
         "### Creating tf.Variables\n",
         "\n",
-        "`Function` only supports singleton `tf.Variable`s created once on the first call, and reused across subsequent function calls. The code snippet below would create a new `tf.Variable` in every function call, which results in a `ValueError` exception.\n",
+        "`tf.function` only supports singleton `tf.Variable`s created once on the first call, and reused across subsequent function calls. The code snippet below would create a new `tf.Variable` in every function call, which results in a `ValueError` exception.\n",
         "\n",
         "Example:"
       ]
@@ -1524,7 +1867,7 @@
       },
       "source": [
         "#### Using with multiple Keras optimizers\n",
-        "You may encounter `ValueError: tf.function only supports singleton tf.Variables created on the first call.` when using more than one Keras optimizer with a `tf.function`. This error occurs because optimizers internally create `tf.Variables` when they apply gradients for the first time."
+        "You may encounter `ValueError: tf.function only supports singleton tf.Variables created on the first call.` when using more than one Keras optimizer with a `tf.function`. This error occurs because optimizers internally create `tf.Variable`s when they apply gradients for the first time."
       ]
     },
     {
@@ -1537,7 +1880,7 @@
       "source": [
         "opt1 = tf.keras.optimizers.Adam(learning_rate = 1e-2)\n",
         "opt2 = tf.keras.optimizers.Adam(learning_rate = 1e-3)\n",
-        " \n",
+        "\n",
         "@tf.function\n",
         "def train_step(w, x, y, optimizer):\n",
         "   with tf.GradientTape() as tape:\n",
@@ -1561,7 +1904,46 @@
         "id": "7Q8BRPCThTjB"
       },
       "source": [
-        "If you need to change the optimizer during training, a workaround is to create a new `Function` for each optimizer, calling the [`ConcreteFunction`](#obtaining_concrete_functions) directly."
+        "If you need to change a stateful object between calls, it's simplest to define a `tf.Module` subclass, and create instances to hold those objects:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "3P59ocmIslHz"
+      },
+      "outputs": [],
+      "source": [
+        "class TrainStep(tf.Module):\n",
+        "  def __init__(self, optimizer):\n",
+        "    self.optimizer = optimizer\n",
+        "\n",
+        "  @tf.function\n",
+        "  def __call__(self, w, x, y):\n",
+        "    with tf.GradientTape() as tape:\n",
+        "       L = tf.reduce_sum(tf.square(w*x - y))\n",
+        "    gradients = tape.gradient(L, [w])\n",
+        "    self.optimizer.apply_gradients(zip(gradients, [w]))\n",
+        "\n",
+        "\n",
+        "opt1 = tf.keras.optimizers.Adam(learning_rate = 1e-2)\n",
+        "opt2 = tf.keras.optimizers.Adam(learning_rate = 1e-3)\n",
+        "\n",
+        "train_o1 = TrainStep(opt1)\n",
+        "train_o2 = TrainStep(opt2)\n",
+        "\n",
+        "train_o1(w, x, y)\n",
+        "train_o2(w, x, y)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dUHUi881smHF"
+      },
+      "source": [
+        "You could also do this manually by creating multiple instances of the `@tf.function` wrapper, one for each optimizer:"
       ]
     },
     {
@@ -1586,14 +1968,14 @@
         "x = tf.constant([-1.])\n",
         "y = tf.constant([2.])\n",
         "\n",
-        "# Make a new Function and ConcreteFunction for each optimizer.\n",
-        "train_step_1 = tf.function(train_step).get_concrete_function(w, x, y, opt1)\n",
-        "train_step_2 = tf.function(train_step).get_concrete_function(w, x, y, opt2)\n",
+        "# Make a new tf.function and ConcreteFunction for each optimizer.\n",
+        "train_step_1 = tf.function(train_step)\n",
+        "train_step_2 = tf.function(train_step)\n",
         "for i in range(10):\n",
         "  if i % 2 == 0:\n",
-        "    train_step_1(w, x, y) # `opt1` is not used as a parameter. \n",
+        "    train_step_1(w, x, y, opt1)\n",
         "  else:\n",
-        "    train_step_2(w, x, y) # `opt2` is not used as a parameter."
+        "    train_step_2(w, x, y, opt2)"
       ]
     },
     {
@@ -1604,9 +1986,9 @@
       "source": [
         "#### Using with multiple Keras models\n",
         "\n",
-        "You may also encounter `ValueError: tf.function only supports singleton tf.Variables created on the first call.` when passing different model instances to the same `Function`.\n",
+        "You may also encounter `ValueError: tf.function only supports singleton tf.Variables created on the first call.` when passing different model instances to the same `tf.function`.\n",
         "\n",
-        "This error occurs because Keras models (which [do not have their input shape defined](https://www.tensorflow.org/guide/keras/custom_layers_and_models#best_practice_deferring_weight_creation_until_the_shape_of_the_inputs_is_known)) and Keras layers create `tf.Variables`s when they are first called. You may be attempting to initialize those variables inside a `Function`, which has already been called. To avoid this error, try calling `model.build(input_shape)` to initialize all the weights before training the model.\n"
+        "This error occurs because Keras models (which [do not have their input shape defined](https://www.tensorflow.org/guide/keras/custom_layers_and_models#best_practice_deferring_weight_creation_until_the_shape_of_the_inputs_is_known)) and Keras layers create `tf.Variable`s when they are first called. You may be attempting to initialize those variables inside a `tf.function`, which has already been called. To avoid this error, try calling `model.build(input_shape)` to initialize all the weights before training the model.\n"
       ]
     },
     {
@@ -1617,14 +1999,14 @@
       "source": [
         "## Further reading\n",
         "\n",
-        "To learn about how to export and load a `Function`, see the [SavedModel guide](../../guide/saved_model). To learn more about graph optimizations that are performed after tracing, see the [Grappler guide](../../guide/graph_optimization). To learn how to optimize your data pipeline and profile your model, see the [Profiler guide](../../guide/profiler.md)."
+        "To learn about how to export and load a `tf.function`, see the [SavedModel guide](../../guide/saved_model). To learn more about graph optimizations that are performed after tracing, see the [Grappler guide](../../guide/graph_optimization). To learn how to optimize your data pipeline and profile your model, see the [Profiler guide](../../guide/profiler.md)."
       ]
     }
   ],
   "metadata": {
     "colab": {
-      "collapsed_sections": [],
       "name": "function.ipynb",
+      "private_outputs": true,
       "provenance": [],
       "toc_visible": true
     },
diff --git a/site/en/guide/gpu_performance_analysis.md b/site/en/guide/gpu_performance_analysis.md
index ecbb03ad3ad..2832686d8f1 100644
--- a/site/en/guide/gpu_performance_analysis.md
+++ b/site/en/guide/gpu_performance_analysis.md
@@ -169,8 +169,8 @@ the trace viewer, you should look at the model code between steps and check if
 disabling callbacks/metrics improves performance. Some details of these ops are
 also on the trace viewer (both device and host side).The recommendation in this
 scenario is to amortize the overhead of these ops by executing them after a
-fixed number of steps instead of every step. When using the `compile` method in
-the `tf.keras` API, setting the `experimental_steps_per_execution` flag does
+fixed number of steps instead of every step. When using the `Model.compile` method in
+the `tf.keras` API, setting the `steps_per_execution` flag does
 this automatically. For custom training loops, use `tf.while_loop`.
 
 #### 2. Achieve higher device utilization
diff --git a/site/en/guide/graph_optimization.ipynb b/site/en/guide/graph_optimization.ipynb
index 50eedda621c..063d8817489 100644
--- a/site/en/guide/graph_optimization.ipynb
+++ b/site/en/guide/graph_optimization.ipynb
@@ -90,7 +90,7 @@
         "* *Constant folding optimizer -* Statically infers the value of tensors when possible by folding constant nodes in the graph and materializes the result using constants.\n",
         "* *Arithmetic optimizer -* Simplifies arithmetic operations by eliminating common subexpressions and simplifying arithmetic statements.  \n",
         "* *Layout optimizer -* Optimizes tensor layouts to execute data format dependent operations such as convolutions more efficiently.\n",
-        "* *Remapper optimizer -* Remaps subgraphs onto more efficient implementations by replacing commonly occuring subgraphs with optimized fused monolithic kernels.\n",
+        "* *Remapper optimizer -* Remaps subgraphs onto more efficient implementations by replacing commonly occurring subgraphs with optimized fused monolithic kernels.\n",
         "* *Memory optimizer -* Analyzes the graph to inspect the peak memory usage for each operation and inserts CPU-GPU memory copy operations for swapping GPU memory to CPU to reduce the peak memory usage.\n",
         "* *Dependency optimizer -* Removes or rearranges control dependencies to shorten the critical path for a model step or enables other\n",
         "optimizations. Also removes nodes that are effectively no-ops such as Identity.\n",
@@ -101,7 +101,7 @@
         "* *Loop optimizer -* Optimizes the graph control flow by hoisting loop-invariant subgraphs out of loops and by removing redundant stack operations in loops. Also optimizes loops with statically known trip counts and removes statically known dead branches in conditionals.\n",
         "* *Scoped allocator optimizer -* Introduces scoped allocators to reduce data movement and to consolidate some operations.\n",
         "* *Pin to host optimizer -* Swaps small operations onto the CPU. This optimizer is turned OFF by default. \n",
-        "* *Auto mixed precision optimizer -* Converts data types to float16 where applicable to improve performance. Currently applies only to GPUs.\n",
+        "* *Auto mixed precision optimizer -* Converts data types to float16 where applicable to improve performance. Currently applies to GPUs and the latest Intel Xeon CPUs.\n",
         "* *Debug stripper -* Strips nodes related to debugging operations such as `tf.debugging.Assert`, `tf.debugging.check_numerics`, and `tf.print` from the graph. This optimizer is turned OFF by default."
       ]
     },
@@ -166,7 +166,7 @@
       "source": [
         "## Compare execution performance with and without Grappler\n",
         "\n",
-        "TensorFlow 2 and beyond executes [eagerly](../eager.md) by default. Use `tf.function` to switch the default execution to Graph mode. Grappler runs automatically in the background to apply the graph optimizations above and improve execution performance. \n"
+        "TensorFlow 2 and beyond executes eagerly by default. Use `tf.function` to switch the default execution to Graph mode. Grappler runs automatically in the background to apply the graph optimizations above and improve execution performance. \n"
       ]
     },
     {
diff --git a/site/en/guide/images/new_type_promotion/type_promotion_lattice.png b/site/en/guide/images/new_type_promotion/type_promotion_lattice.png
new file mode 100644
index 00000000000..501698965a2
Binary files /dev/null and b/site/en/guide/images/new_type_promotion/type_promotion_lattice.png differ
diff --git a/site/en/guide/images/new_type_promotion/type_promotion_table.png b/site/en/guide/images/new_type_promotion/type_promotion_table.png
new file mode 100644
index 00000000000..62bb465212a
Binary files /dev/null and b/site/en/guide/images/new_type_promotion/type_promotion_table.png differ
diff --git a/site/en/guide/images/tensor/shape2.png b/site/en/guide/images/tensor/shape2.png
index 3609ff2c263..a316359c8fc 100644
Binary files a/site/en/guide/images/tensor/shape2.png and b/site/en/guide/images/tensor/shape2.png differ
diff --git a/site/en/guide/intro_to_graphs.ipynb b/site/en/guide/intro_to_graphs.ipynb
index 38cfcb967d8..4fe442632ba 100644
--- a/site/en/guide/intro_to_graphs.ipynb
+++ b/site/en/guide/intro_to_graphs.ipynb
@@ -70,13 +70,13 @@
       "source": [
         "## Overview\n",
         "\n",
-        "This guide goes beneath the surface of TensorFlow and Keras to demonstrate how TensorFlow works. If you instead want to immediately get started with Keras, check out the [collection of Keras guides](keras/).\n",
+        "This guide goes beneath the surface of TensorFlow and Keras to demonstrate how TensorFlow works. If you instead want to immediately get started with Keras, check out the [collection of Keras guides](https://www.tensorflow.org/guide/keras/).\n",
         "\n",
         "In this guide, you'll learn how TensorFlow allows you to make simple changes to your code to get graphs, how graphs are stored and represented, and how you can use them to accelerate your models.\n",
         "\n",
         "Note: For those of you who are only familiar with TensorFlow 1.x, this guide demonstrates a very different view of graphs.\n",
         "\n",
-        "**This is a big-picture overview that covers how `tf.function` allows you to switch from eager execution to graph execution.** For a more complete specification of `tf.function`, go to the [`tf.function` guide](function).\n"
+        "**This is a big-picture overview that covers how `tf.function` allows you to switch from eager execution to graph execution.** For a more complete specification of `tf.function`, go to the [Better performance with `tf.function`](./function.ipynb) guide.\n"
       ]
     },
     {
@@ -87,13 +87,13 @@
       "source": [
         "### What are graphs?\n",
         "\n",
-        "In the previous three guides, you ran TensorFlow **eagerly**. This means TensorFlow operations are executed by Python, operation by operation, and returning results back to Python.\n",
+        "In the previous three guides, you ran TensorFlow **eagerly**. This means TensorFlow operations are executed by Python, operation by operation, and return results back to Python.\n",
         "\n",
         "While eager execution has several unique advantages, graph execution enables portability outside Python and tends to offer better performance. **Graph execution** means that tensor computations are executed as a *TensorFlow graph*, sometimes referred to as a `tf.Graph` or simply a \"graph.\"\n",
         "\n",
         "**Graphs are data structures that contain a set of `tf.Operation` objects, which represent units of computation; and `tf.Tensor` objects, which represent the units of data that flow between operations.** They are defined in a `tf.Graph` context. Since these graphs are data structures, they can be saved, run, and restored all without the original Python code.\n",
         "\n",
-        "This is what a TensorFlow graph representing a two-layer neural network looks like when visualized in TensorBoard.\n"
+        "This is what a TensorFlow graph representing a two-layer neural network looks like when visualized in TensorBoard:"
       ]
     },
     {
@@ -113,7 +113,7 @@
       "source": [
         "### The benefits of graphs\n",
         "\n",
-        "With a graph, you have a great deal of flexibility.  You can use your TensorFlow graph in environments that don't have a Python interpreter, like mobile applications, embedded devices, and backend servers.  TensorFlow uses graphs as the format for [saved models](saved_model) when it exports them from Python.\n",
+        "With a graph, you have a great deal of flexibility.  You can use your TensorFlow graph in environments that don't have a Python interpreter, like mobile applications, embedded devices, and backend servers. TensorFlow uses graphs as the format for [saved models](./saved_model.ipynb) when it exports them from Python.\n",
         "\n",
         "Graphs are also easily optimized, allowing the compiler to do transformations like:\n",
         "\n",
@@ -144,6 +144,15 @@
         "## Setup"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0d1689fa928f"
+      },
+      "source": [
+        "Import some necessary libraries:"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -165,7 +174,7 @@
       "source": [
         "## Taking advantage of graphs\n",
         "\n",
-        "You create and run a graph in TensorFlow by using `tf.function`, either as a direct call or as a decorator. `tf.function` takes a regular function as input and returns a `Function`. **A `Function` is a Python callable that builds TensorFlow graphs from the Python function. You use a `Function` in the same way as its Python equivalent.**\n"
+        "You create and run a graph in TensorFlow by using `tf.function`, either as a direct call or as a decorator. `tf.function` takes a regular function as input and returns a `tf.types.experimental.PolymorphicFunction`. **A `PolymorphicFunction` is a Python callable that builds TensorFlow graphs from the Python function. You use a `tf.function` in the same way as its Python equivalent.**\n"
       ]
     },
     {
@@ -182,7 +191,8 @@
         "  x = x + b\n",
         "  return x\n",
         "\n",
-        "# `a_function_that_uses_a_graph` is a TensorFlow `Function`.\n",
+        "# The Python type of `a_function_that_uses_a_graph` will now be a\n",
+        "# `PolymorphicFunction`.\n",
         "a_function_that_uses_a_graph = tf.function(a_regular_function)\n",
         "\n",
         "# Make some tensors.\n",
@@ -191,7 +201,7 @@
         "b1 = tf.constant(4.0)\n",
         "\n",
         "orig_value = a_regular_function(x1, y1, b1).numpy()\n",
-        "# Call a `Function` like a Python function.\n",
+        "# Call a `tf.function` like a Python function.\n",
         "tf_function_value = a_function_that_uses_a_graph(x1, y1, b1).numpy()\n",
         "assert(orig_value == tf_function_value)"
       ]
@@ -202,7 +212,7 @@
         "id": "PNvuAYpdrTOf"
       },
       "source": [
-        "On the outside, a `Function` looks like a regular function you write using TensorFlow operations. [Underneath](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/eager/def_function.py), however, it is *very different*. A `Function` **encapsulates [several `tf.Graph`s behind one API](#polymorphism_one_function_many_graphs).** That is how `Function` is able to give you the [benefits of graph execution](#the_benefits_of_graphs), like speed and deployability."
+        "On the outside, a `tf.function` looks like a regular function you write using TensorFlow operations. [Underneath](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/eager/polymorphic_function/polymorphic_function.py), however, it is *very different*. The underlying `PolymorphicFunction` **encapsulates several `tf.Graph`s behind one API** (learn more in the _Polymorphism_ section). That is how a `tf.function` is able to give you the benefits of graph execution, like speed and deployability (refer to _The benefits of graphs_ above)."
       ]
     },
     {
@@ -227,7 +237,8 @@
         "  x = x + b\n",
         "  return x\n",
         "\n",
-        "# Use the decorator to make `outer_function` a `Function`.\n",
+        "# Using the `tf.function` decorator makes `outer_function` into a\n",
+        "# `PolymorphicFunction`.\n",
         "@tf.function\n",
         "def outer_function(x):\n",
         "  y = tf.constant([[2.0], [3.0]])\n",
@@ -274,7 +285,8 @@
         "  else:\n",
         "    return 0\n",
         "\n",
-        "# `tf_simple_relu` is a TensorFlow `Function` that wraps `simple_relu`.\n",
+        "# Using `tf.function` makes `tf_simple_relu` a `PolymorphicFunction` that wraps\n",
+        "# `simple_relu`.\n",
         "tf_simple_relu = tf.function(simple_relu)\n",
         "\n",
         "print(\"First branch, with graph:\", tf_simple_relu(tf.constant(1)).numpy())\n",
@@ -320,7 +332,7 @@
         "id": "GZ4Ieg6tBE6l"
       },
       "source": [
-        "Most of the time, `tf.function` will work without  special considerations.  However, there are some caveats, and the [tf.function guide](./function.ipynb) can help here, as well as the [complete AutoGraph reference](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/autograph/g3doc/reference/index.md)"
+        "Most of the time, `tf.function` will work without  special considerations. However, there are some caveats, and the [`tf.function` guide](./function.ipynb) can help here, as well as the [complete AutoGraph reference](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/autograph/g3doc/reference/index.md)."
       ]
     },
     {
@@ -329,13 +341,13 @@
         "id": "sIpc_jfjEZEg"
       },
       "source": [
-        "### Polymorphism: one `Function`, many graphs\n",
+        "### Polymorphism: one `tf.function`, many graphs\n",
         "\n",
-        "A `tf.Graph` is specialized to a specific type of inputs (for example, tensors with a specific [`dtype`](https://www.tensorflow.org/api_docs/python/tf/dtypes/DType) or objects with the same [`id()`](https://docs.python.org/3/library/functions.html#id])).\n",
+        "A `tf.Graph` is specialized to a specific type of inputs (for example, tensors with a specific [`dtype`](https://www.tensorflow.org/api_docs/python/tf/dtypes/DType) or objects with the same [`id()`](https://docs.python.org/3/library/functions.html#id)).\n",
         "\n",
-        "Each time you invoke a `Function` with new `dtypes` and shapes in its arguments, `Function` creates a new `tf.Graph` for the new arguments. The `dtypes` and shapes of a `tf.Graph`'s inputs are known as an **input signature** or just a **signature**.\n",
+        "Each time you invoke a `tf.function` with a set of arguments that can't be handled by any of its existing graphs (such as arguments with new `dtypes` or incompatible shapes), it creates a new `tf.Graph` specialized to those new arguments. The type specification of a `tf.Graph`'s inputs is represented by `tf.types.experimental.FunctionType`, also referred to as the **signature**. For more information regarding when a new `tf.Graph` is generated, how that can be controlled, and how `FunctionType` can be useful, go to the _Rules of tracing_ section of the [Better performance with `tf.function`](./function.ipynb) guide.\n",
         "\n",
-        "The `Function` stores the `tf.Graph` corresponding to that signature in a `ConcreteFunction`. **A `ConcreteFunction` is a wrapper around a `tf.Graph`.**\n"
+        "The `tf.function` stores the `tf.Graph` corresponding to that signature in a `ConcreteFunction`. **A `ConcreteFunction` can be thought of as a wrapper around a `tf.Graph`.**\n"
       ]
     },
     {
@@ -350,7 +362,7 @@
         "def my_relu(x):\n",
         "  return tf.maximum(0., x)\n",
         "\n",
-        "# `my_relu` creates new graphs as it observes more signatures.\n",
+        "# `my_relu` creates new graphs as it observes different input types.\n",
         "print(my_relu(tf.constant(5.5)))\n",
         "print(my_relu([1, -1]))\n",
         "print(my_relu(tf.constant([3., -3.])))"
@@ -362,7 +374,7 @@
         "id": "1qRtw7R4KL9X"
       },
       "source": [
-        "If the `Function` has already been called with that signature, `Function` does not create a new `tf.Graph`."
+        "If the `tf.function` has already been called with the same input types, it does not create a new `tf.Graph`."
       ]
     },
     {
@@ -374,8 +386,8 @@
       "outputs": [],
       "source": [
         "# These two calls do *not* create new graphs.\n",
-        "print(my_relu(tf.constant(-2.5))) # Signature matches `tf.constant(5.5)`.\n",
-        "print(my_relu(tf.constant([-1., 1.]))) # Signature matches `tf.constant([3., -3.])`."
+        "print(my_relu(tf.constant(-2.5))) # Input type matches `tf.constant(5.5)`.\n",
+        "print(my_relu(tf.constant([-1., 1.]))) # Input type matches `tf.constant([3., -3.])`."
       ]
     },
     {
@@ -384,7 +396,7 @@
         "id": "UohRmexhIpvQ"
       },
       "source": [
-        "Because it's backed by multiple graphs, a `Function` is **polymorphic**. That enables it to support more input types than a single `tf.Graph` could represent, as well as to optimize each `tf.Graph` for better performance."
+        "Because it's backed by multiple graphs, a `tf.function` is (as the name \"PolymorphicFunction\" suggests) **polymorphic**. That enables it to support more input types than a single `tf.Graph` could represent, and to optimize each `tf.Graph` for better performance."
       ]
     },
     {
@@ -419,7 +431,7 @@
       "source": [
         "### Graph execution vs. eager execution\n",
         "\n",
-        "The code in a `Function` can be executed both eagerly and as a graph. By default, `Function` executes its code as a graph:\n"
+        "The code in a `tf.function` can be executed both eagerly and as a graph. By default, `tf.function` executes its code as a graph:\n"
       ]
     },
     {
@@ -467,7 +479,7 @@
         "id": "cyZNCRcQorGO"
       },
       "source": [
-        "To verify that your `Function`'s graph is doing the same computation as its equivalent Python function, you can make it execute eagerly with `tf.config.run_functions_eagerly(True)`.  This is a switch that **turns off `Function`'s ability to create and run graphs**, instead executing the code normally."
+        "To verify that your `tf.function`'s graph is doing the same computation as its equivalent Python function, you can make it execute eagerly with `tf.config.run_functions_eagerly(True)`. This is a switch that **turns off `tf.function`'s ability to create and run graphs**, instead of executing the code normally."
       ]
     },
     {
@@ -510,7 +522,7 @@
         "id": "DKT3YBsqy0x4"
       },
       "source": [
-        "However, `Function` can behave differently under graph and eager execution. The Python [`print`](https://docs.python.org/3/library/functions.html#print) function is one example of how these two modes differ. Let's check out what happens when you insert a `print` statement to your function and call it repeatedly.\n"
+        "However, `tf.function` can behave differently under graph and eager execution. The Python [`print`](https://docs.python.org/3/library/functions.html#print) function is one example of how these two modes differ. Let's check out what happens when you insert a `print` statement to your function and call it repeatedly."
       ]
     },
     {
@@ -558,7 +570,7 @@
       "source": [
         "Is the output surprising? **`get_MSE` only printed once even though it was called *three* times.**\n",
         "\n",
-        "To explain, the `print` statement is executed when `Function` runs the original code in order to create the graph in a process known as [\"tracing\"](function.ipynb#tracing). **Tracing captures the TensorFlow operations into a graph, and `print`  is not captured in the graph.**  That graph is then executed for all three calls **without ever running the Python code again**.\n",
+        "To explain, the `print` statement is executed when `tf.function` runs the original code in order to create the graph in a process known as \"tracing\" (refer to the _Tracing_ section of the [`tf.function` guide](./function.ipynb). **Tracing captures the TensorFlow operations into a graph, and `print` is not captured in the graph.**  That graph is then executed for all three calls **without ever running the Python code again**.\n",
         "\n",
         "As a sanity check, let's turn off graph execution to compare:"
       ]
@@ -606,7 +618,7 @@
         "id": "PUR7qC_bquCn"
       },
       "source": [
-        "`print` is a *Python side effect*, and there are [other differences](function#limitations) that you should be aware of when converting a function into a `Function`."
+        "`print` is a *Python side effect*, and there are other differences that you should be aware of when converting a function into a `tf.function`. Learn more in the _Limitations_ section of the [Better performance with `tf.function`](./function.ipynb) guide."
       ]
     },
     {
@@ -628,7 +640,7 @@
         "\n",
         "<a id=\"non-strict\"></a>\n",
         "\n",
-        "Graph execution only executes the operations necessary to produce the observable effects, which includes:\n",
+        "Graph execution only executes the operations necessary to produce the observable effects, which include:\n",
         "\n",
         "- The return value of the function\n",
         "- Documented well-known side-effects such as:\n",
@@ -676,7 +688,7 @@
         "  tf.gather(x, [1]) # unused\n",
         "  return x\n",
         "\n",
-        "# Only needed operations are run during graph exection. The error is not raised.\n",
+        "# Only needed operations are run during graph execution. The error is not raised.\n",
         "print(unused_return_graph(tf.constant([0.0])))"
       ]
     },
@@ -686,16 +698,16 @@
         "id": "def6MupG9R0O"
       },
       "source": [
-        "###`tf.function` best practices\n",
+        "### `tf.function` best practices\n",
         "\n",
-        "It may take some time to get used to the behavior of `Function`.  To get started quickly, first-time users should play around with decorating toy functions with `@tf.function` to get experience with going from eager to graph execution.\n",
+        "It may take some time to get used to the behavior of `tf.function`.  To get started quickly, first-time users should play around with decorating toy functions with `@tf.function` to get experience with going from eager to graph execution.\n",
         "\n",
         "*Designing for `tf.function`* may be your best bet for writing graph-compatible TensorFlow programs. Here are some tips:\n",
         "-  Toggle between eager and graph execution early and often with `tf.config.run_functions_eagerly` to pinpoint if/ when the two modes diverge.\n",
         "- Create `tf.Variable`s\n",
-        "outside the Python function and modify them on the inside. The same goes for objects that use `tf.Variable`, like `keras.layers`, `keras.Model`s and `tf.optimizers`.\n",
-        "- Avoid writing functions that [depend on outer Python variables](function#depending_on_python_global_and_free_variables), excluding `tf.Variable`s and Keras objects.\n",
-        "- Prefer to write functions which take tensors and other TensorFlow types as input. You can pass in other object types but [be careful](function#depending_on_python_objects)!\n",
+        "outside the Python function and modify them on the inside. The same goes for objects that use `tf.Variable`, like `tf.keras.layers`, `tf.keras.Model`s and `tf.keras.optimizers`.\n",
+        "- Avoid writing functions that depend on outer Python variables, excluding `tf.Variable`s and Keras objects. Learn more in _Depending on Python global and free variables_ of the [`tf.function` guide](./function.ipynb).\n",
+        "- Prefer to write functions which take tensors and other TensorFlow types as input. You can pass in other object types but be careful! Learn more in _Depending on Python objects_ of the [`tf.function` guide](./function.ipynb).\n",
         "- Include as much computation as possible under a `tf.function` to maximize the performance gain. For example, decorate a whole training step or the entire training loop.\n"
       ]
     },
@@ -742,7 +754,7 @@
       },
       "outputs": [],
       "source": [
-        "print(\"Eager execution:\", timeit.timeit(lambda: power(x, 100), number=1000))"
+        "print(\"Eager execution:\", timeit.timeit(lambda: power(x, 100), number=1000), \"seconds\")"
       ]
     },
     {
@@ -754,7 +766,7 @@
       "outputs": [],
       "source": [
         "power_as_graph = tf.function(power)\n",
-        "print(\"Graph execution:\", timeit.timeit(lambda: power_as_graph(x, 100), number=1000))"
+        "print(\"Graph execution:\", timeit.timeit(lambda: power_as_graph(x, 100), number=1000), \"seconds\")"
       ]
     },
     {
@@ -763,9 +775,9 @@
         "id": "Q1Pfo5YwwILi"
       },
       "source": [
-        "`tf.function` is commonly used to speed up training loops, and you can learn more about it in [Writing a training loop from scratch](keras/writing_a_training_loop_from_scratch#speeding-up_your_training_step_with_tffunction) with Keras.\n",
+        "`tf.function` is commonly used to speed up training loops, and you can learn more about it in the _Speeding-up your training step with `tf.function`_ section of the [Writing a training loop from scratch](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch) with Keras guide.\n",
         "\n",
-        "Note: You can also try [`tf.function(jit_compile=True)`](https://www.tensorflow.org/xla#explicit_compilation_with_tffunctionjit_compiletrue) for a more significant performance boost, especially if your code is heavy on TF control flow and uses many small tensors."
+        "Note: You can also try `tf.function(jit_compile=True)` for a more significant performance boost, especially if your code is heavy on TensorFlow control flow and uses many small tensors. Learn more in the _Explicit compilation with `tf.function(jit_compile=True)`_ section of the [XLA overview](https://www.tensorflow.org/xla)."
       ]
     },
     {
@@ -778,7 +790,7 @@
         "\n",
         "Graphs can speed up your code, but the process of creating them has some overhead. For some functions, the creation of the graph takes more time than the execution of the graph. **This investment is usually quickly paid back with the performance boost of subsequent executions, but it's important to be aware that the first few  steps of any large model training can be slower due to tracing.**\n",
         "\n",
-        "No matter how large your model, you want to avoid tracing frequently. The `tf.function` guide discusses [how to set input specifications and use tensor arguments](function#controlling_retracing) to avoid retracing.  If you find you are getting unusually poor performance, it's a good idea to check if you are retracing accidentally."
+        "No matter how large your model, you want to avoid tracing frequently. In the _Controlling retracing_ section, the [`tf.function` guide](./function.ipynb) discusses how to set input specifications and use tensor arguments to avoid retracing. If you find you are getting unusually poor performance, it's a good idea to check if you are retracing accidentally."
       ]
     },
     {
@@ -787,9 +799,9 @@
         "id": "F4InDaTjwmBA"
       },
       "source": [
-        "## When is a `Function` tracing?\n",
+        "## When is a `tf.function` tracing?\n",
         "\n",
-        "To figure out when your `Function` is tracing, add a `print` statement to its code. As a rule of thumb, `Function` will execute the `print` statement every time it traces."
+        "To figure out when your `tf.function` is tracing, add a `print` statement to its code. As a rule of thumb, `tf.function` will execute the `print` statement every time it traces."
       ]
     },
     {
@@ -843,13 +855,12 @@
       "source": [
         "## Next steps\n",
         "\n",
-        "You can learn more about `tf.function` on the API reference page and by following the [Better performance with `tf.function`](function) guide."
+        "You can learn more about `tf.function` on the API reference page and by following the [Better performance with `tf.function`](./function.ipynb) guide."
       ]
     }
   ],
   "metadata": {
     "colab": {
-      "collapsed_sections": [],
       "name": "intro_to_graphs.ipynb",
       "toc_visible": true
     },
diff --git a/site/en/guide/intro_to_modules.ipynb b/site/en/guide/intro_to_modules.ipynb
index 93b677b6e7e..79bbe89ca56 100644
--- a/site/en/guide/intro_to_modules.ipynb
+++ b/site/en/guide/intro_to_modules.ipynb
@@ -91,6 +91,7 @@
       "outputs": [],
       "source": [
         "import tensorflow as tf\n",
+        "import keras\n",
         "from datetime import datetime\n",
         "\n",
         "%load_ext tensorboard"
@@ -102,10 +103,12 @@
         "id": "yt5HEbsYAbw1"
       },
       "source": [
-        "## Defining models and layers in TensorFlow\n",
+        "## TensorFlow Modules\n",
         "\n",
         "Most models are made of layers. Layers are functions with a known mathematical structure that can be reused and have trainable variables.  In TensorFlow, most high-level implementations of layers and models, such as Keras or [Sonnet](https://github.com/deepmind/sonnet), are built on the same foundational class: `tf.Module`.\n",
         "\n",
+        "### Building Modules\n",
+        "\n",
         "Here's an example of a very simple `tf.Module` that operates on a scalar tensor:\n"
       ]
     },
@@ -337,7 +340,7 @@
         "id": "JOLVVBT8J_dl"
       },
       "source": [
-        "## Saving weights\n",
+        "### Saving weights\n",
         "\n",
         "You can save a `tf.Module` as both a [checkpoint](./checkpoint.ipynb) and a [SavedModel](./saved_model.ipynb).\n",
         "\n",
@@ -403,7 +406,7 @@
         "id": "4eGaNiQWcK4j"
       },
       "source": [
-        "During distributed (multi-machine) training they can be sharded,  which is why they are numbered (e.g., '00000-of-00001').  In this case, though, there is only have one shard.\n",
+        "During distributed (multi-machine) training they can be sharded,  which is why they are numbered (e.g., '00000-of-00001').  In this case, though, there is only one shard.\n",
         "\n",
         "When you load models back in, you overwrite the values in your Python object."
       ]
@@ -439,7 +442,7 @@
         "id": "pSZebVuWxDXu"
       },
       "source": [
-        "## Saving functions\n",
+        "### Saving functions\n",
         "\n",
         "TensorFlow can run models without the original Python objects, as demonstrated by [TensorFlow Serving](https://tensorflow.org/tfx) and [TensorFlow Lite](https://tensorflow.org/lite), even when you download a trained model from [TensorFlow Hub](https://tensorflow.org/hub).\n",
         "\n",
@@ -696,7 +699,26 @@
         "\n",
         "Note that up until this point, there is no mention of Keras. You can build your own high-level API on top of `tf.Module`, and people have.  \n",
         "\n",
-        "In this section, you will examine how Keras uses `tf.Module`.  A complete user guide to Keras models can be found in the [Keras guide](keras/sequential_model.ipynb).\n"
+        "In this section, you will examine how Keras uses `tf.Module`.  A complete user guide to Keras models can be found in the [Keras guide](https://www.tensorflow.org/guide/keras/sequential_model).\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ds08u3touwe4t"
+      },
+      "source": [
+        "Keras layers and models have a lot more extra features including:\n",
+        "\n",
+        "* Optional losses\n",
+        "* Support for [metrics](https://keras.io/api/layers/base_layer/#add_metric-method)\n",
+        "* Built-in support for an optional `training` argument to differentiate between training and inference use\n",
+        "* Saving and restoring python objects instead of just black-box functions\n",
+        "* `get_config` and `from_config` methods that allow you to accurately store configurations to allow model cloning in Python\n",
+        "\n",
+        "These features allow for far more complex models through subclassing, such as a custom GAN or a Variational AutoEncoder (VAE) model. Read about them in the [full guide](./keras/custom_layers_and_models.ipynb) to custom layers and models.\n",
+        "\n",
+        "Keras models also come with extra functionality that makes them easy to train, evaluate, load, save, and even train on multiple machines."
       ]
     },
     {
@@ -874,22 +896,6 @@
         "  print(\"Failed:\", e)"
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "YnporXiudF1I"
-      },
-      "source": [
-        "Keras layers have a lot more extra features including:\n",
-        "\n",
-        "* Optional losses\n",
-        "* Support for metrics\n",
-        "* Built-in support for an optional `training` argument to differentiate between training and inference use\n",
-        "* `get_config` and `from_config` methods that allow you to accurately store configurations to allow model cloning in Python\n",
-        "\n",
-        "Read about them in the [full guide](./keras/custom_layers_and_models.ipynb) to custom layers and models."
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -900,7 +906,7 @@
         "\n",
         "You can define your model as nested Keras layers.\n",
         "\n",
-        "However, Keras also provides a full-featured model class called `tf.keras.Model`. It inherits from `tf.keras.layers.Layer`, so a Keras model can be used, nested, and saved in the same way as Keras layers. Keras models come with extra functionality that makes them easy to train, evaluate, load, save, and even train on multiple machines.\n",
+        "However, Keras also provides a full-featured model class called `tf.keras.Model`. It inherits from `tf.keras.layers.Layer`, so a Keras model can be used and nested in the same way as Keras layers. Keras models come with extra functionality that makes them easy to train, evaluate, load, save, and even train on multiple machines.\n",
         "\n",
         "You can define the `SequentialModule` from above with nearly identical code, again converting `__call__` to `call()` and changing the parent:"
       ]
@@ -913,6 +919,7 @@
       },
       "outputs": [],
       "source": [
+        "@keras.saving.register_keras_serializable()\n",
         "class MySequentialModel(tf.keras.Model):\n",
         "  def __init__(self, name=None, **kwargs):\n",
         "    super().__init__(**kwargs)\n",
@@ -938,7 +945,7 @@
       "source": [
         "All the same features are available, including tracking variables and submodules.\n",
         "\n",
-        "Note: To emphasize the note above, a raw `tf.Module` nested inside a Keras layer or model will not get its variables collected for training or saving.  Instead, nest Keras layers inside of Keras layers."
+        "Note: A raw `tf.Module` nested inside a Keras layer or model will not get its variables collected for training or saving.  Instead, nest Keras layers inside of Keras layers."
       ]
     },
     {
@@ -1022,11 +1029,9 @@
         "id": "qI9aXLnaHEFF"
       },
       "source": [
-        "## Saving Keras models\n",
-        "\n",
-        "Keras models can be checkpointed, and that will look the same as `tf.Module`.\n",
+        "### Saving Keras models\n",
         "\n",
-        "Keras models can also be saved with `tf.saved_model.save()`, as they are modules.  However, Keras models have convenience methods and other functionality:"
+        "Keras models have their own specialized zip archive saving format, marked by the `.keras` extension. When calling `tf.keras.Model.save`, add a `.keras` extension to the filename. For example:"
       ]
     },
     {
@@ -1037,7 +1042,7 @@
       },
       "outputs": [],
       "source": [
-        "my_sequential_model.save(\"exname_of_file\")"
+        "my_sequential_model.save(\"exname_of_file.keras\")"
       ]
     },
     {
@@ -1057,7 +1062,7 @@
       },
       "outputs": [],
       "source": [
-        "reconstructed_model = tf.keras.models.load_model(\"exname_of_file\")"
+        "reconstructed_model = tf.keras.models.load_model(\"exname_of_file.keras\")"
       ]
     },
     {
@@ -1066,7 +1071,7 @@
         "id": "EA7P_MNvpviZ"
       },
       "source": [
-        "Keras `SavedModels` also save metric, loss, and optimizer states.\n",
+        "Keras zip archives — `.keras` files — also save metric, loss, and optimizer states.\n",
         "\n",
         "This reconstructed model can be used and will produce the same result when called on the same data:"
       ]
@@ -1082,13 +1087,24 @@
         "reconstructed_model(tf.constant([[2.0, 2.0, 2.0]]))"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "seLIUG2354s"
+      },
+      "source": [
+        "### Checkpointing Keras models\n",
+        "\n",
+        "Keras models can also be checkpointed, and that will look the same as `tf.Module`."
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "xKyjlkceqjwD"
       },
       "source": [
-        "There is more to know about saving and serialization of Keras models, including providing configuration methods for custom layers for feature support. Check out the [guide to saving and serialization](keras/save_and_serialize)."
+        "There is more to know about saving and serialization of Keras models, including providing configuration methods for custom layers for feature support. Check out the [guide to saving and serialization](https://www.tensorflow.org/guide/keras/save_and_serialize)."
       ]
     },
     {
diff --git a/site/en/guide/jax2tf.ipynb b/site/en/guide/jax2tf.ipynb
new file mode 100644
index 00000000000..613c622658d
--- /dev/null
+++ b/site/en/guide/jax2tf.ipynb
@@ -0,0 +1,851 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ckM5wJMsNTYL"
+      },
+      "source": [
+        "##### Copyright 2023 The TensorFlow Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "NKvERjPVNWxu"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bqePLdDjNhNk"
+      },
+      "source": [
+        "# Import a JAX model using JAX2TF"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gw3w46yhNiK_"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/guide/jax2tf\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/guide/jax2tf.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/guide/jax2tf.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/guide/jax2tf.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "IyrsY3uTOmPY"
+      },
+      "source": [
+        "This notebook provides a complete, runnable example of creating a model using [JAX](https://jax.readthedocs.io/en/latest/) and bringing it into TensorFlow to continue training. This is made possible by [JAX2TF](https://github.com/google/jax/tree/main/jax/experimental/jax2tf), a lightweight API that provides a pathway from the JAX ecosystem to the TensorFlow ecosystem. \n",
+        "\n",
+        "JAX is a high-performance array computing library. To create the model, this notebook uses [Flax](https://flax.readthedocs.io/en/latest/), a neural network library for JAX. To train it, it uses [Optax](https://optax.readthedocs.io), an optimization library for JAX.\n",
+        "\n",
+        "If you're a researcher using JAX, JAX2TF gives you a path to production using TensorFlow's proven tools.\n",
+        "\n",
+        "There are many ways this can be useful, here are just a few:\n",
+        "\n",
+        "* Inference: Taking a model written for JAX and deploying it either on a server using TF Serving, on-device using TFLite, or on the web using TensorFlow.js. \n",
+        "\n",
+        "* Fine-tuning: Taking a model that was trained using JAX, you can bring its components to TF using JAX2TF, and continue training it in TensorFlow with your existing training data and setup.\n",
+        "\n",
+        "* Fusion: Combining parts of models that were trained using JAX with those trained using TensorFlow, for maximum flexibility.\n",
+        "\n",
+        "The key to enabling this kind of interoperation between JAX and TensorFlow is `jax2tf.convert`, which takes in model components created on top of JAX (your loss function, prediction function, etc) and creates equivalent representations of them as TensorFlow functions, which can then be exported as a TensorFlow SavedModel."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "G6rtu96yOepm"
+      },
+      "source": [
+        "## Setup\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9yqxfHzr0LPF"
+      },
+      "outputs": [],
+      "source": [
+        "import tensorflow as tf\n",
+        "import numpy as np\n",
+        "import jax\n",
+        "import jax.numpy as jnp\n",
+        "import flax\n",
+        "import optax\n",
+        "import os\n",
+        "from matplotlib import pyplot as plt\n",
+        "from jax.experimental import jax2tf\n",
+        "from threading import Lock # Only used in the visualization utility.\n",
+        "from functools import partial"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "SDnTaZO0r872"
+      },
+      "outputs": [],
+      "source": [
+        "# Needed for TensorFlow and JAX to coexist in GPU memory.\n",
+        "os.environ['XLA_PYTHON_CLIENT_PREALLOCATE'] = \"false\"\n",
+        "gpus = tf.config.list_physical_devices('GPU')\n",
+        "if gpus:\n",
+        "  try:\n",
+        "    for gpu in gpus:\n",
+        "      tf.config.experimental.set_memory_growth(gpu, True)\n",
+        "  except RuntimeError as e:\n",
+        "    # Memory growth must be set before GPUs have been initialized.\n",
+        "    print(e)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "BXOjCNJxDLil"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Visualization utilities\n",
+        "\n",
+        "plt.rcParams[\"figure.figsize\"] = (20,8)\n",
+        "\n",
+        "# The utility for displaying training and validation curves.\n",
+        "def display_train_curves(loss, avg_loss, eval_loss, eval_accuracy, epochs, steps_per_epochs, ignore_first_n=10):\n",
+        "\n",
+        "  ignore_first_n_epochs = int(ignore_first_n/steps_per_epochs)\n",
+        "\n",
+        "  # The losses.\n",
+        "  ax = plt.subplot(121)\n",
+        "  if loss is not None:\n",
+        "    x = np.arange(len(loss)) / steps_per_epochs #* epochs\n",
+        "    ax.plot(x, loss)\n",
+        "  ax.plot(range(1, epochs+1), avg_loss, \"-o\", linewidth=3)\n",
+        "  ax.plot(range(1, epochs+1), eval_loss, \"-o\", linewidth=3)\n",
+        "  ax.set_title('Loss')\n",
+        "  ax.set_ylabel('loss')\n",
+        "  ax.set_xlabel('epoch')\n",
+        "  if loss is not None:\n",
+        "    ax.set_ylim(0, np.max(loss[ignore_first_n:]))\n",
+        "    ax.legend(['train', 'avg train', 'eval'])\n",
+        "  else:\n",
+        "    ymin = np.min(avg_loss[ignore_first_n_epochs:])\n",
+        "    ymax = np.max(avg_loss[ignore_first_n_epochs:])\n",
+        "    ax.set_ylim(ymin-(ymax-ymin)/10, ymax+(ymax-ymin)/10)\n",
+        "    ax.legend(['avg train', 'eval'])\n",
+        "\n",
+        "  # The accuracy.\n",
+        "  ax = plt.subplot(122)\n",
+        "  ax.set_title('Eval Accuracy')\n",
+        "  ax.set_ylabel('accuracy')\n",
+        "  ax.set_xlabel('epoch')\n",
+        "  ymin = np.min(eval_accuracy[ignore_first_n_epochs:])\n",
+        "  ymax = np.max(eval_accuracy[ignore_first_n_epochs:])\n",
+        "  ax.set_ylim(ymin-(ymax-ymin)/10, ymax+(ymax-ymin)/10)\n",
+        "  ax.plot(range(1, epochs+1), eval_accuracy, \"-o\", linewidth=3)\n",
+        "\n",
+        "class Progress:\n",
+        "    \"\"\"Text mode progress bar.\n",
+        "    Usage:\n",
+        "            p = Progress(30)\n",
+        "            p.step()\n",
+        "            p.step()\n",
+        "            p.step(reset=True) # to restart form 0%\n",
+        "    The progress bar displays a new header at each restart.\"\"\"\n",
+        "    def __init__(self, maxi, size=100, msg=\"\"):\n",
+        "        \"\"\"\n",
+        "        :param maxi: the number of steps required to reach 100%\n",
+        "        :param size: the number of characters taken on the screen by the progress bar\n",
+        "        :param msg: the message displayed in the header of the progress bar\n",
+        "        \"\"\"\n",
+        "        self.maxi = maxi\n",
+        "        self.p = self.__start_progress(maxi)()  # `()`: to get the iterator from the generator.\n",
+        "        self.header_printed = False\n",
+        "        self.msg = msg\n",
+        "        self.size = size\n",
+        "        self.lock = Lock()\n",
+        "\n",
+        "    def step(self, reset=False):\n",
+        "        with self.lock:\n",
+        "            if reset:\n",
+        "                self.__init__(self.maxi, self.size, self.msg)\n",
+        "            if not self.header_printed:\n",
+        "                self.__print_header()\n",
+        "            next(self.p)\n",
+        "\n",
+        "    def __print_header(self):\n",
+        "        print()\n",
+        "        format_string = \"0%{: ^\" + str(self.size - 6) + \"}100%\"\n",
+        "        print(format_string.format(self.msg))\n",
+        "        self.header_printed = True\n",
+        "\n",
+        "    def __start_progress(self, maxi):\n",
+        "        def print_progress():\n",
+        "            # Bresenham's algorithm. Yields the number of dots printed.\n",
+        "            # This will always print 100 dots in max invocations.\n",
+        "            dx = maxi\n",
+        "            dy = self.size\n",
+        "            d = dy - dx\n",
+        "            for x in range(maxi):\n",
+        "                k = 0\n",
+        "                while d >= 0:\n",
+        "                    print('=', end=\"\", flush=True)\n",
+        "                    k += 1\n",
+        "                    d -= dx\n",
+        "                d += dy\n",
+        "                yield k\n",
+        "            # Keep yielding the last result if there are too many steps.\n",
+        "            while True:\n",
+        "              yield k\n",
+        "\n",
+        "        return print_progress"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6xgS_8nDDIu8"
+      },
+      "source": [
+        "## Download and prepare the MNIST dataset"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "nbN7rmuF0VFB"
+      },
+      "outputs": [],
+      "source": [
+        "(x_train, train_labels), (x_test, test_labels) = tf.keras.datasets.mnist.load_data()\n",
+        "\n",
+        "train_data = tf.data.Dataset.from_tensor_slices((x_train, train_labels))\n",
+        "train_data = train_data.map(lambda x,y: (tf.expand_dims(tf.cast(x, tf.float32)/255.0, axis=-1),\n",
+        "                                         tf.one_hot(y, depth=10)))\n",
+        "\n",
+        "BATCH_SIZE = 256\n",
+        "train_data = train_data.batch(BATCH_SIZE, drop_remainder=True)\n",
+        "train_data = train_data.cache()\n",
+        "train_data = train_data.shuffle(5000, reshuffle_each_iteration=True)\n",
+        "\n",
+        "test_data = tf.data.Dataset.from_tensor_slices((x_test, test_labels))\n",
+        "test_data = test_data.map(lambda x,y: (tf.expand_dims(tf.cast(x, tf.float32)/255.0, axis=-1),\n",
+        "                                         tf.one_hot(y, depth=10)))\n",
+        "test_data = test_data.batch(10000)\n",
+        "test_data = test_data.cache()\n",
+        "\n",
+        "(one_batch, one_batch_labels) = next(iter(train_data)) # just one batch\n",
+        "(all_test_data, all_test_labels) = next(iter(test_data)) # all in one batch since batch size is 10000"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "LuZTo7SM3W_n"
+      },
+      "source": [
+        "## Configure training\n",
+        "This notebook will create and train a simple model for demonstration purposes."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "3vbKB4yZ3aTL"
+      },
+      "outputs": [],
+      "source": [
+        "# Training hyperparameters.\n",
+        "JAX_EPOCHS = 3\n",
+        "TF_EPOCHS = 7\n",
+        "STEPS_PER_EPOCH = len(train_labels)//BATCH_SIZE\n",
+        "LEARNING_RATE = 0.01\n",
+        "LEARNING_RATE_EXP_DECAY = 0.6\n",
+        "\n",
+        "# The learning rate schedule for JAX (with Optax).\n",
+        "jlr_decay = optax.exponential_decay(LEARNING_RATE, transition_steps=STEPS_PER_EPOCH, decay_rate=LEARNING_RATE_EXP_DECAY, staircase=True)\n",
+        "\n",
+        "# THe learning rate schedule for TensorFlow.\n",
+        "tflr_decay = tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=LEARNING_RATE, decay_steps=STEPS_PER_EPOCH, decay_rate=LEARNING_RATE_EXP_DECAY, staircase=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Od3sMwQxtC34"
+      },
+      "source": [
+        "## Create the model using Flax"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "-ybqQF2zd2QX"
+      },
+      "outputs": [],
+      "source": [
+        "class ConvModel(flax.linen.Module):\n",
+        "\n",
+        "  @flax.linen.compact\n",
+        "  def __call__(self, x, train):\n",
+        "    x = flax.linen.Conv(features=12, kernel_size=(3,3), padding=\"SAME\", use_bias=False)(x)\n",
+        "    x = flax.linen.BatchNorm(use_running_average=not train, use_scale=False, use_bias=True)(x)\n",
+        "    x = x.reshape((x.shape[0], -1))  # flatten\n",
+        "    x = flax.linen.Dense(features=200, use_bias=True)(x)\n",
+        "    x = flax.linen.BatchNorm(use_running_average=not train, use_scale=False, use_bias=True)(x)\n",
+        "    x = flax.linen.Dropout(rate=0.3, deterministic=not train)(x)\n",
+        "    x = flax.linen.relu(x)\n",
+        "    x = flax.linen.Dense(features=10)(x)\n",
+        "    #x = flax.linen.log_softmax(x)\n",
+        "    return x\n",
+        "\n",
+        "  # JAX differentiation requires a function `f(params, other_state, data, labels)` -> `loss` (as a single number).\n",
+        "  # `jax.grad` will differentiate it against the fist argument.\n",
+        "  # The user must split trainable and non-trainable variables into `params` and `other_state`.\n",
+        "  # Must pass a different RNG key each time for the dropout mask to be different.\n",
+        "  def loss(self, params, other_state, rng, data, labels, train):\n",
+        "    logits, batch_stats = self.apply({'params': params, **other_state},\n",
+        "                                     data,\n",
+        "                                     mutable=['batch_stats'],\n",
+        "                                     rngs={'dropout': rng},\n",
+        "                                     train=train)\n",
+        "    # The loss averaged across the batch dimension.\n",
+        "    loss = optax.softmax_cross_entropy(logits, labels).mean()\n",
+        "    return loss, batch_stats\n",
+        "\n",
+        "  def predict(self, state, data):\n",
+        "    logits = self.apply(state, data, train=False) # predict and accuracy disable dropout and use accumulated batch norm stats (train=False)\n",
+        "    probabilities = flax.linen.log_softmax(logits)\n",
+        "    return probabilities\n",
+        "\n",
+        "  def accuracy(self, state, data, labels):\n",
+        "    probabilities = self.predict(state, data)\n",
+        "    predictions = jnp.argmax(probabilities, axis=-1)\n",
+        "    dense_labels = jnp.argmax(labels, axis=-1)\n",
+        "    accuracy = jnp.equal(predictions, dense_labels).mean()\n",
+        "    return accuracy"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "7Cr0FRNFtHN4"
+      },
+      "source": [
+        "## Write the training step function"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "tmDwApcpgZzw"
+      },
+      "outputs": [],
+      "source": [
+        "# The training step.\n",
+        "@partial(jax.jit, static_argnums=[0]) # this forces jax.jit to recompile for every new model\n",
+        "def train_step(model, state, optimizer_state, rng, data, labels):\n",
+        "\n",
+        "  other_state, params = state.pop('params') # differentiate only against 'params' which represents trainable variables\n",
+        "  (loss, batch_stats), grads = jax.value_and_grad(model.loss, has_aux=True)(params, other_state, rng, data, labels, train=True)\n",
+        "\n",
+        "  updates, optimizer_state = optimizer.update(grads, optimizer_state)\n",
+        "  params = optax.apply_updates(params, updates)\n",
+        "  new_state = state.copy(add_or_replace={**batch_stats, 'params': params})\n",
+        "\n",
+        "  rng, _ = jax.random.split(rng)\n",
+        "\n",
+        "  return new_state, optimizer_state, rng, loss"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Zr16g6NzV4O9"
+      },
+      "source": [
+        "## Write the training loop"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "zbl5w-KUV7Qw"
+      },
+      "outputs": [],
+      "source": [
+        "def train(model, state, optimizer_state, train_data, epochs, losses, avg_losses, eval_losses, eval_accuracies):\n",
+        "  p = Progress(STEPS_PER_EPOCH)\n",
+        "  rng = jax.random.PRNGKey(0)\n",
+        "  for epoch in range(epochs):\n",
+        "\n",
+        "    # This is where the learning rate schedule state is stored in the optimizer state.\n",
+        "    optimizer_step = optimizer_state[1].count\n",
+        "\n",
+        "    # Run an epoch of training.\n",
+        "    for step, (data, labels) in enumerate(train_data):\n",
+        "      p.step(reset=(step==0))\n",
+        "      state, optimizer_state, rng, loss = train_step(model, state, optimizer_state, rng, data.numpy(), labels.numpy())\n",
+        "      losses.append(loss)\n",
+        "    avg_loss = np.mean(losses[-step:])\n",
+        "    avg_losses.append(avg_loss)\n",
+        "\n",
+        "    # Run one epoch of evals (10,000 test images in a single batch).\n",
+        "    other_state, params = state.pop('params')\n",
+        "    # Gotcha: must discard modified batch_stats here\n",
+        "    eval_loss, _ = model.loss(params, other_state, rng, all_test_data.numpy(), all_test_labels.numpy(), train=False)\n",
+        "    eval_losses.append(eval_loss)\n",
+        "    eval_accuracy = model.accuracy(state, all_test_data.numpy(), all_test_labels.numpy())\n",
+        "    eval_accuracies.append(eval_accuracy)\n",
+        "\n",
+        "    print(\"\\nEpoch\", epoch, \"train loss:\", avg_loss, \"eval loss:\", eval_loss, \"eval accuracy\", eval_accuracy, \"lr:\", jlr_decay(optimizer_step))\n",
+        "\n",
+        "  return state, optimizer_state"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DGB3W5g0Wt1H"
+      },
+      "source": [
+        "## Create the model and the optimizer (with Optax)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "mW5mkmCWtN8W"
+      },
+      "outputs": [],
+      "source": [
+        "# The model.\n",
+        "model = ConvModel()\n",
+        "state = model.init({'params':jax.random.PRNGKey(0), 'dropout':jax.random.PRNGKey(0)}, one_batch, train=True) # Flax allows a separate RNG for \"dropout\"\n",
+        "\n",
+        "# The optimizer.\n",
+        "optimizer = optax.adam(learning_rate=jlr_decay) # Gotcha: it does not seem to be possible to pass just a callable as LR, must be an Optax Schedule\n",
+        "optimizer_state = optimizer.init(state['params'])\n",
+        "\n",
+        "losses=[]\n",
+        "avg_losses=[]\n",
+        "eval_losses=[]\n",
+        "eval_accuracies=[]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "FJdsKghBNF"
+      },
+      "source": [
+        "## Train the model"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "nmcofTTBZSIb"
+      },
+      "outputs": [],
+      "source": [
+        "new_state, new_optimizer_state = train(model, state, optimizer_state, train_data, JAX_EPOCHS+TF_EPOCHS, losses, avg_losses, eval_losses, eval_accuracies)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "n_20vgvDXB5r"
+      },
+      "outputs": [],
+      "source": [
+        "display_train_curves(losses, avg_losses, eval_losses, eval_accuracies, len(eval_losses), STEPS_PER_EPOCH, ignore_first_n=1*STEPS_PER_EPOCH)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0lT3cdENCBzL"
+      },
+      "source": [
+        "## Partially train the model\n",
+        "\n",
+        "You will continue training the model in TensorFlow shortly."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "KT-xqj5N7C6L"
+      },
+      "outputs": [],
+      "source": [
+        "model = ConvModel()\n",
+        "state = model.init({'params':jax.random.PRNGKey(0), 'dropout':jax.random.PRNGKey(0)}, one_batch, train=True) # Flax allows a separate RNG for \"dropout\"\n",
+        "\n",
+        "# The optimizer.\n",
+        "optimizer = optax.adam(learning_rate=jlr_decay) # LR must be an Optax LR Schedule\n",
+        "optimizer_state = optimizer.init(state['params'])\n",
+        "\n",
+        "losses, avg_losses, eval_losses, eval_accuracies = [], [], [], []"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "oa362HMDbzDE"
+      },
+      "outputs": [],
+      "source": [
+        "state, optimizer_state = train(model, state, optimizer_state, train_data, JAX_EPOCHS, losses, avg_losses, eval_losses, eval_accuracies)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "0IyZtUPPCt0y"
+      },
+      "outputs": [],
+      "source": [
+        "display_train_curves(losses, avg_losses, eval_losses, eval_accuracies, len(eval_losses), STEPS_PER_EPOCH, ignore_first_n=1*STEPS_PER_EPOCH)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "uNtlSaOCCumB"
+      },
+      "source": [
+        "## Save just enough for inference\n",
+        "\n",
+        "If your goal is to deploy your JAX model (so you can run inference using `model.predict()`), simply exporting it to [SavedModel](https://www.tensorflow.org/guide/saved_model) is sufficient. This section demonstrates how to accomplish that."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "O653B3-5H8FL"
+      },
+      "outputs": [],
+      "source": [
+        "# Test data with a different batch size to test polymorphic shapes.\n",
+        "x, y = next(iter(train_data.unbatch().batch(13)))\n",
+        "\n",
+        "m = tf.Module()\n",
+        "# Wrap the JAX state in `tf.Variable` (needed when calling the converted JAX function.\n",
+        "state_vars = tf.nest.map_structure(tf.Variable, state)\n",
+        "# Keep the wrapped state as flat list (needed in TensorFlow fine-tuning).\n",
+        "m.vars = tf.nest.flatten(state_vars)\n",
+        "# Convert the desired JAX function (`model.predict`).\n",
+        "predict_fn = jax2tf.convert(model.predict, polymorphic_shapes=[\"...\", \"(b, 28, 28, 1)\"])\n",
+        "# Wrap the converted function in `tf.function` with the correct `tf.TensorSpec` (necessary for dynamic shapes to work).\n",
+        "@tf.function(autograph=False, input_signature=[tf.TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float32)])\n",
+        "def predict(data):\n",
+        "    return predict_fn(state_vars, data)\n",
+        "m.predict = predict\n",
+        "tf.saved_model.save(m, \"./\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "8HFx67zStgvo"
+      },
+      "outputs": [],
+      "source": [
+        "# Test the converted function.\n",
+        "print(\"Converted function predictions:\", np.argmax(m.predict(x).numpy(), axis=-1))\n",
+        "# Reload the model.\n",
+        "reloaded_model = tf.saved_model.load(\"./\")\n",
+        "# Test the reloaded converted function (the result should be the same).\n",
+        "print(\"Reloaded  function predictions:\", np.argmax(reloaded_model.predict(x).numpy(), axis=-1))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "eEk8wv4HJu94"
+      },
+      "source": [
+        "## Save everything\n",
+        "If your goal is a comprehensive export (useful if you're planning on brining the model into TensorFlow for fine-tuning, fusion, etc), this section demonstrates how to save the model so you can access methods including:\n",
+        "\n",
+        " - model.predict\n",
+        " - model.accuracy\n",
+        " - model.loss (including train=True/False bool, RNG for dropout and BatchNorm state updates)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9mty52pmvDDp"
+      },
+      "outputs": [],
+      "source": [
+        "from collections import abc\n",
+        "\n",
+        "def _fix_frozen(d):\n",
+        "  \"\"\"Changes any mappings (e.g. frozendict) back to dict.\"\"\"\n",
+        "  if isinstance(d, list):\n",
+        "    return [_fix_frozen(v) for v in d]\n",
+        "  elif isinstance(d, tuple):\n",
+        "    return tuple(_fix_frozen(v) for v in d)\n",
+        "  elif not isinstance(d, abc.Mapping):\n",
+        "    return d\n",
+        "  d = dict(d)\n",
+        "  for k, v in d.items():\n",
+        "    d[k] = _fix_frozen(v)\n",
+        "  return d"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "3HEsKNXbCwXw"
+      },
+      "outputs": [],
+      "source": [
+        "class TFModel(tf.Module):\n",
+        "  def __init__(self, state, model):\n",
+        "    super().__init__()\n",
+        "\n",
+        "    # Special care needed for the train=True/False parameter in the loss\n",
+        "    @jax.jit\n",
+        "    def loss_with_train_bool(state, rng, data, labels, train):\n",
+        "      other_state, params = state.pop('params')\n",
+        "      loss, batch_stats = jax.lax.cond(train,\n",
+        "                                       lambda state, data, labels: model.loss(params, other_state, rng, data, labels, train=True),\n",
+        "                                       lambda state, data, labels: model.loss(params, other_state, rng, data, labels, train=False),\n",
+        "                                       state, data, labels)\n",
+        "      # must use JAX to split the RNG, therefore, must do it in a @jax.jit function\n",
+        "      new_rng, _ = jax.random.split(rng)\n",
+        "      return loss, batch_stats, new_rng\n",
+        "\n",
+        "    self.state_vars = tf.nest.map_structure(tf.Variable, state)\n",
+        "    self.vars = tf.nest.flatten(self.state_vars)\n",
+        "    self.jax_rng = tf.Variable(jax.random.PRNGKey(0))\n",
+        "\n",
+        "    self.loss_fn = jax2tf.convert(loss_with_train_bool, polymorphic_shapes=[\"...\", \"...\", \"(b, 28, 28, 1)\", \"(b, 10)\", \"...\"])\n",
+        "    self.accuracy_fn = jax2tf.convert(model.accuracy, polymorphic_shapes=[\"...\", \"(b, 28, 28, 1)\", \"(b, 10)\"])\n",
+        "    self.predict_fn = jax2tf.convert(model.predict, polymorphic_shapes=[\"...\", \"(b, 28, 28, 1)\"])\n",
+        "\n",
+        "  # Must specify TensorSpec manually for variable batch size to work\n",
+        "  @tf.function(autograph=False, input_signature=[tf.TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float32)])\n",
+        "  def predict(self, data):\n",
+        "    # Make sure the TfModel.predict function implicitly use self.state_vars and not the JAX state directly\n",
+        "    # otherwise, all model weights would be embedded in the TF graph as constants.\n",
+        "    return self.predict_fn(self.state_vars, data)\n",
+        "\n",
+        "  @tf.function(input_signature=[tf.TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float32),\n",
+        "                                tf.TensorSpec(shape=(None, 10), dtype=tf.float32)],\n",
+        "               autograph=False)\n",
+        "  def train_loss(self, data, labels):\n",
+        "      loss, batch_stats, new_rng = self.loss_fn(self.state_vars, self.jax_rng, data, labels, True)\n",
+        "      # update batch norm stats\n",
+        "      flat_vars = tf.nest.flatten(self.state_vars['batch_stats'])\n",
+        "      flat_values = tf.nest.flatten(batch_stats['batch_stats'])\n",
+        "      for var, val in zip(flat_vars, flat_values):\n",
+        "        var.assign(val)\n",
+        "      # update RNG\n",
+        "      self.jax_rng.assign(new_rng)\n",
+        "      return loss\n",
+        "\n",
+        "  @tf.function(input_signature=[tf.TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float32),\n",
+        "                                tf.TensorSpec(shape=(None, 10), dtype=tf.float32)],\n",
+        "               autograph=False)\n",
+        "  def eval_loss(self, data, labels):\n",
+        "      loss, batch_stats, new_rng = self.loss_fn(self.state_vars, self.jax_rng, data, labels, False)\n",
+        "      return loss\n",
+        "\n",
+        "  @tf.function(input_signature=[tf.TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float32),\n",
+        "                                tf.TensorSpec(shape=(None, 10), dtype=tf.float32)],\n",
+        "               autograph=False)\n",
+        "  def accuracy(self, data, labels):\n",
+        "    return self.accuracy_fn(self.state_vars, data, labels)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "znJrAVpcxO9u"
+      },
+      "outputs": [],
+      "source": [
+        "# Instantiate the model.\n",
+        "tf_model = TFModel(state, model)\n",
+        "\n",
+        "# Save the model.\n",
+        "tf.saved_model.save(tf_model, \"./\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Y02DHEwTjNzV"
+      },
+      "source": [
+        "## Reload the model"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "i75yS3v2jPpM"
+      },
+      "outputs": [],
+      "source": [
+        "reloaded_model = tf.saved_model.load(\"./\")\n",
+        "\n",
+        "# Test if it works and that the batch size is indeed variable.\n",
+        "x,y = next(iter(train_data.unbatch().batch(13)))\n",
+        "print(np.argmax(reloaded_model.predict(x).numpy(), axis=-1))\n",
+        "x,y = next(iter(train_data.unbatch().batch(20)))\n",
+        "print(np.argmax(reloaded_model.predict(x).numpy(), axis=-1))\n",
+        "\n",
+        "print(reloaded_model.accuracy(one_batch, one_batch_labels))\n",
+        "print(reloaded_model.accuracy(all_test_data, all_test_labels))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DiwEAwQmlx1x"
+      },
+      "source": [
+        "## Continue training the converted JAX model in TensorFlow"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "MubFcO_jl2vE"
+      },
+      "outputs": [],
+      "source": [
+        "optimizer = tf.keras.optimizers.Adam(learning_rate=tflr_decay)\n",
+        "\n",
+        "# Set the iteration step for the learning rate to resume from where it left off in JAX.\n",
+        "optimizer.iterations.assign(len(eval_losses)*STEPS_PER_EPOCH)\n",
+        "\n",
+        "p = Progress(STEPS_PER_EPOCH)\n",
+        "\n",
+        "for epoch in range(JAX_EPOCHS, JAX_EPOCHS+TF_EPOCHS):\n",
+        "\n",
+        "  # This is where the learning rate schedule state is stored in the optimizer state.\n",
+        "  optimizer_step = optimizer.iterations\n",
+        "\n",
+        "  for step, (data, labels) in enumerate(train_data):\n",
+        "    p.step(reset=(step==0))\n",
+        "    with tf.GradientTape() as tape:\n",
+        "      #loss = reloaded_model.loss(data, labels, True)\n",
+        "      loss = reloaded_model.train_loss(data, labels)\n",
+        "      grads = tape.gradient(loss, reloaded_model.vars)\n",
+        "      optimizer.apply_gradients(zip(grads, reloaded_model.vars))\n",
+        "      losses.append(loss)\n",
+        "  avg_loss = np.mean(losses[-step:])\n",
+        "  avg_losses.append(avg_loss)\n",
+        "\n",
+        "  eval_loss = reloaded_model.eval_loss(all_test_data.numpy(), all_test_labels.numpy()).numpy()\n",
+        "  eval_losses.append(eval_loss)\n",
+        "  eval_accuracy = reloaded_model.accuracy(all_test_data.numpy(), all_test_labels.numpy()).numpy()\n",
+        "  eval_accuracies.append(eval_accuracy)\n",
+        "\n",
+        "  print(\"\\nEpoch\", epoch, \"train loss:\", avg_loss, \"eval loss:\", eval_loss, \"eval accuracy\", eval_accuracy, \"lr:\", tflr_decay(optimizer.iterations).numpy())"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "50V1FSmI6UTk"
+      },
+      "outputs": [],
+      "source": [
+        "display_train_curves(losses, avg_losses, eval_losses, eval_accuracies, len(eval_losses), STEPS_PER_EPOCH, ignore_first_n=2*STEPS_PER_EPOCH)\n",
+        "\n",
+        "# The loss takes a hit when the training restarts, but does not go back to random levels.\n",
+        "# This is likely caused by the optimizer momentum being reinitialized."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "L7lSziW0K0ny"
+      },
+      "source": [
+        "## Next steps\n",
+        "You can learn more about [JAX](https://jax.readthedocs.io/en/latest/index.html) and [Flax](https://flax.readthedocs.io/en/latest) on their documentation websites which contain detailed guides and examples. If you're new to JAX, be sure to explore the [JAX 101 tutorials](https://jax.readthedocs.io/en/latest/jax-101/index.html), and check out the [Flax quickstart](https://flax.readthedocs.io/en/latest/getting_started.html). To learn more about converting JAX models to TensorFlow format, check out the [jax2tf](https://github.com/google/jax/tree/main/jax/experimental/jax2tf) utility on GitHub. If you're interested in converting JAX models to run in the browser with TensorFlow.js, visit [JAX on the Web with TensorFlow.js](https://blog.tensorflow.org/2022/08/jax-on-web-with-tensorflowjs.html). If you'd like to prepare JAX models to run in TensorFLow Lite, visit the [JAX Model Conversion For TFLite](https://www.tensorflow.org/lite/examples/jax_conversion/overview) guide."
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "name": "jax2tf.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/guide/keras.md b/site/en/guide/keras.md
new file mode 100644
index 00000000000..3dee7be3aa1
--- /dev/null
+++ b/site/en/guide/keras.md
@@ -0,0 +1,135 @@
+# Keras: The high-level API for TensorFlow
+
+Keras is the high-level API of the TensorFlow platform. It provides an
+approachable, highly-productive interface for solving machine learning (ML)
+problems, with a focus on modern deep learning. Keras covers every step of the
+machine learning workflow, from data processing to hyperparameter tuning to
+deployment. It was developed with a focus on enabling fast experimentation.
+
+With Keras, you have full access to the scalability and cross-platform
+capabilities of TensorFlow. You can run Keras on a TPU Pod or large clusters of
+GPUs, and you can export Keras models to run in the browser or on mobile
+devices. You can also serve Keras models via a web API.
+
+Keras is designed to reduce cognitive load by achieving the following goals:
+
+* Offer simple, consistent interfaces.
+* Minimize the number of actions required for common use cases.
+* Provide clear, actionable error messages.
+* Follow the principle of progressive disclosure of complexity: It's easy to get
+  started, and you can complete advanced workflows by learning as you go.
+* Help you write concise, readable code.
+
+## Who should use Keras
+
+The short answer is that every TensorFlow user should use the Keras APIs by
+default. Whether you're an engineer, a researcher, or an ML practitioner, you
+should start with Keras.
+
+There are a few use cases (for example, building tools on top of TensorFlow or
+developing your own high-performance platform) that require the low-level
+[TensorFlow Core APIs](https://www.tensorflow.org/guide/core). But if your use
+case doesn't fall into one
+of the
+[Core API applications](https://www.tensorflow.org/guide/core#core_api_applications),
+you should prefer Keras.
+
+## Keras API components
+
+The core data structures of Keras are [layers](https://keras.io/api/layers/) and
+[models](https://keras.io/api/models/). A layer is a simple input/output
+transformation, and a model is a directed acyclic graph (DAG) of layers.
+
+### Layers
+
+The `tf.keras.layers.Layer` class is the fundamental abstraction in Keras. A
+`Layer` encapsulates a state (weights) and some computation (defined in the
+`tf.keras.layers.Layer.call` method).
+
+Weights created by layers can be trainable or non-trainable. Layers are
+recursively composable: If you assign a layer instance as an attribute of
+another layer, the outer layer will start tracking the weights created by the
+inner layer.
+
+You can also use layers to handle data preprocessing tasks like normalization
+and text vectorization. Preprocessing layers can be included directly into a
+model, either during or after training, which makes the model portable.
+
+### Models
+
+A model is an object that groups layers together and that can be trained on
+data.
+
+The simplest type of model is the
+[`Sequential` model](https://www.tensorflow.org/guide/keras/sequential_model),
+which is a linear stack of layers. For more complex architectures, you can
+either use the
+[Keras functional API](https://www.tensorflow.org/guide/keras/functional_api),
+which lets you build arbitrary graphs of layers, or
+[use subclassing to write models from scratch](https://www.tensorflow.org/guide/keras/making_new_layers_and_models_via_subclassing).
+
+The `tf.keras.Model` class features built-in training and evaluation methods:
+
+* `tf.keras.Model.fit`: Trains the model for a fixed number of epochs.
+* `tf.keras.Model.predict`: Generates output predictions for the input samples.
+* `tf.keras.Model.evaluate`: Returns the loss and metrics values for the model;
+  configured via the `tf.keras.Model.compile` method.
+
+These methods give you access to the following built-in training features:
+
+* [Callbacks](https://www.tensorflow.org/api_docs/python/tf/keras/callbacks).
+  You can leverage built-in callbacks for early stopping, model checkpointing,
+  and [TensorBoard](https://www.tensorflow.org/tensorboard) monitoring. You can
+  also
+  [implement custom callbacks](https://www.tensorflow.org/guide/keras/writing_your_own_callbacks).
+* [Distributed training](https://www.tensorflow.org/guide/keras/distributed_training).
+  You can easily scale up your training to multiple GPUs, TPUs, or devices.
+* Step fusing. With the `steps_per_execution` argument in
+  `tf.keras.Model.compile`, you can process multiple batches in a single
+  `tf.function` call, which greatly improves device utilization on TPUs.
+
+For a detailed overview of how to use `fit`, see the
+[training and evaluation guide](https://www.tensorflow.org/guide/keras/training_with_built_in_methods).
+To learn how to customize the built-in training and evaluation loops, see
+[Customizing what happens in `fit()`](https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit).
+
+### Other APIs and tools
+
+Keras provides many other APIs and tools for deep learning, including:
+
+* [Optimizers](https://keras.io/api/optimizers/)
+* [Metrics](https://keras.io/api/metrics/)
+* [Losses](https://keras.io/api/losses/)
+* [Data loading utilities](https://keras.io/api/data_loading/)
+
+For a full list of available APIs, see the
+[Keras API reference](https://keras.io/api/). To learn more about other Keras
+projects and initiatives, see
+[The Keras ecosystem](https://keras.io/getting_started/ecosystem/).
+
+## Next steps
+
+To get started using Keras with TensorFlow, check out the following topics:
+
+* [The Sequential model](https://www.tensorflow.org/guide/keras/sequential_model)
+* [The Functional API](https://www.tensorflow.org/guide/keras/functional)
+* [Training & evaluation with the built-in methods](https://www.tensorflow.org/guide/keras/training_with_built_in_methods)
+* [Making new layers and models via subclassing](https://www.tensorflow.org/guide/keras/custom_layers_and_models)
+* [Serialization and saving](https://www.tensorflow.org/guide/keras/save_and_serialize)
+* [Working with preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers)
+* [Customizing what happens in fit()](https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit)
+* [Writing a training loop from scratch](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch)
+* [Working with RNNs](https://www.tensorflow.org/guide/keras/rnn)
+* [Understanding masking & padding](https://www.tensorflow.org/guide/keras/masking_and_padding)
+* [Writing your own callbacks](https://www.tensorflow.org/guide/keras/custom_callback)
+* [Transfer learning & fine-tuning](https://www.tensorflow.org/guide/keras/transfer_learning)
+* [Multi-GPU and distributed training](https://www.tensorflow.org/guide/keras/distributed_training)
+
+To learn more about Keras, see the following topics at
+[keras.io](http://keras.io):
+
+* [About Keras](https://keras.io/about/)
+* [Introduction to Keras for Engineers](https://keras.io/getting_started/intro_to_keras_for_engineers/)
+* [Introduction to Keras for Researchers](https://keras.io/getting_started/intro_to_keras_for_researchers/)
+* [Keras API reference](https://keras.io/api/)
+* [The Keras ecosystem](https://keras.io/getting_started/ecosystem/)
\ No newline at end of file
diff --git a/site/en/guide/migrate/_toc.yaml b/site/en/guide/migrate/_toc.yaml
index efad54a108f..ceb7e5f57ae 100644
--- a/site/en/guide/migrate/_toc.yaml
+++ b/site/en/guide/migrate/_toc.yaml
@@ -65,7 +65,7 @@ toc:
 - heading: Validate model quality and performance
 - title: Validate correctness and numerical equivalence
   path: /guide/migrate/validate_correctness
-#  - title: Quality testing and debugging
-#    path: /guide/migrate/testing_debugging
+- title: Debug TF2 Migrated Training Pipeline
+  path: /guide/migrate/migration_debugging
 #  - title: Testing speed and throughput
 #    path: /guide/migrate/throughput_testing
diff --git a/site/en/guide/migrate/canned_estimators.ipynb b/site/en/guide/migrate/canned_estimators.ipynb
index d19b2a6cae0..68859511a84 100644
--- a/site/en/guide/migrate/canned_estimators.ipynb
+++ b/site/en/guide/migrate/canned_estimators.ipynb
@@ -37,7 +37,7 @@
         "id": "77z2OchJTk0l"
       },
       "source": [
-        "# Migration Examples: Canned Estimators\n",
+        "# Migration examples: Canned Estimators\n",
         "\n",
         "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
         "  <td>\n",
@@ -67,15 +67,15 @@
       "source": [
         "Canned (or Premade) Estimators have traditionally been used in TensorFlow 1 as quick and easy ways to train models for a variety of typical use cases. TensorFlow 2 provides straightforward approximate substitutes for a number of them by way of Keras models. For those canned estimators that do not have built-in TensorFlow 2 substitutes, you can still build your own replacement fairly easily.\n",
         "\n",
-        "This guide walks through a few examples of direct equivalents and custom substitutions to demonstrate how TensorFlow 1's `tf.estimator`-derived models can be migrated to TF2 with Keras.\n",
+        "This guide will walk you through a few examples of direct equivalents and custom substitutions to demonstrate how TensorFlow 1's `tf.estimator`-derived models can be migrated to TensorFlow 2 with Keras.\n",
         "\n",
         "Namely, this guide includes examples for migrating:\n",
         "*    From `tf.estimator`'s `LinearEstimator`, `Classifier` or `Regressor` in TensorFlow 1 to Keras `tf.compat.v1.keras.models.LinearModel` in TensorFlow 2\n",
         "*    From `tf.estimator`'s `DNNEstimator`, `Classifier` or `Regressor` in TensorFlow 1 to a custom Keras DNN ModelKeras in TensorFlow 2\n",
         "*    From `tf.estimator`'s `DNNLinearCombinedEstimator`, `Classifier` or `Regressor` in TensorFlow 1 to `tf.compat.v1.keras.models.WideDeepModel` in TensorFlow 2\n",
-        "*    From `tf.estimator`'s `BoostedTreesEstimator`, `Classifier` or `Regressor` in TensorFlow 1 to `tf.compat.v1.keras.models.WideDeepModel` in TensorFlow 2\n",
+        "*    From `tf.estimator`'s `BoostedTreesEstimator`, `Classifier` or `Regressor` in TensorFlow 1 to `tfdf.keras.GradientBoostedTreesModel` in TensorFlow 2\n",
         "\n",
-        "A common precursor to the training of a model is feature preprocessing, which is done for TensorFlow 1 Estimator models with `tf.feature_column`. For more information on feature preprocessing in TensorFlow 2, see [this guide on migrating feature columns](migrating_feature_columns.ipynb)."
+        "A common precursor to the training of a model is feature preprocessing, which is done for TensorFlow 1 Estimator models with `tf.feature_column`. For more information on feature preprocessing in TensorFlow 2, see [this guide on migrating from feature columns to the Keras preprocessing layers API](migrating_feature_columns.ipynb)."
       ]
     },
     {
@@ -108,11 +108,11 @@
       },
       "outputs": [],
       "source": [
-        "import keras\n",
         "import pandas as pd\n",
         "import tensorflow as tf\n",
         "import tensorflow.compat.v1 as tf1\n",
-        "import tensorflow_decision_forests as tfdf\n"
+        "import tensorflow_decision_forests as tfdf\n",
+        "from tensorflow import keras\n"
       ]
     },
     {
@@ -183,7 +183,7 @@
         "id": "bYSgoezeMrpI"
       },
       "source": [
-        "and create a method to instantiate a simplistic sample optimizer to use with our various TensorFlow 1 Estimator and TensorFlow 2 Keras models."
+        "and create a method to instantiate a simplistic sample optimizer to use with various TensorFlow 1 Estimator and TensorFlow 2 Keras models."
       ]
     },
     {
@@ -196,7 +196,7 @@
       "source": [
         "def create_sample_optimizer(tf_version):\n",
         "  if tf_version == 'tf1':\n",
-        "    optimizer = lambda: tf.keras.optimizers.Ftrl(\n",
+        "    optimizer = lambda: tf.keras.optimizers.legacy.Ftrl(\n",
         "        l1_regularization_strength=0.001,\n",
         "        learning_rate=tf1.train.exponential_decay(\n",
         "            learning_rate=0.1,\n",
@@ -204,7 +204,7 @@
         "            decay_steps=10000,\n",
         "            decay_rate=0.9))\n",
         "  elif tf_version == 'tf2':\n",
-        "    optimizer = tf.keras.optimizers.Ftrl(\n",
+        "    optimizer = tf.keras.optimizers.legacy.Ftrl(\n",
         "        l1_regularization_strength=0.001,\n",
         "        learning_rate=tf.keras.optimizers.schedules.ExponentialDecay(\n",
         "            initial_learning_rate=0.1, decay_steps=10000, decay_rate=0.9))\n",
@@ -226,7 +226,7 @@
         "id": "_O7fyhCnpvED"
       },
       "source": [
-        "### TF1: Using LinearEstimator"
+        "### TensorFlow 1: Using LinearEstimator"
       ]
     },
     {
@@ -270,7 +270,7 @@
         "id": "KEmzBjfnsxwT"
       },
       "source": [
-        "### TF2: Using Keras LinearModel"
+        "### TensorFlow 2: Using Keras LinearModel"
       ]
     },
     {
@@ -311,7 +311,7 @@
         "id": "YKl6XZ7Bp1t5"
       },
       "source": [
-        "### TF1: Using DNNEstimator"
+        "### TensorFlow 1: Using DNNEstimator"
       ]
     },
     {
@@ -320,7 +320,7 @@
         "id": "J7wJUmgypln8"
       },
       "source": [
-        "In TensorFlow 1, you can use `tf.estimator.DNNEstimator` to create a baseline DNN model for regression and classification problems."
+        "In TensorFlow 1, you can use `tf.estimator.DNNEstimator` to create a baseline deep neural network (DNN) model for regression and classification problems."
       ]
     },
     {
@@ -357,7 +357,7 @@
         "id": "6xJz6px6pln-"
       },
       "source": [
-        "### TF2: Using Keras to Create a Custom DNN Model"
+        "### TensorFlow 2: Using Keras to create a custom DNN model"
       ]
     },
     {
@@ -368,7 +368,7 @@
       "source": [
         "In TensorFlow 2, you can create a custom DNN model to substitute for one generated by `tf.estimator.DNNEstimator`, with similar levels of user-specified customization (for instance, as in the previous example, the ability to customize a chosen model optimizer).\n",
         "\n",
-        "A similar workflow can be used to replace `tf.estimator.experimental.RNNEstimator` with a Keras RNN Model. Keras provides a number of built-in, customizable choices by way of `tf.keras.layers.RNN`, `tf.keras.layers.LSTM`, and `tf.keras.layers.GRU` - see [here](https://www.tensorflow.org/guide/keras/rnn#built-in_rnn_layers_a_simple_example) for more details."
+        "A similar workflow can be used to replace `tf.estimator.experimental.RNNEstimator` with a Keras recurrent neural network (RNN) model. Keras provides a number of built-in, customizable choices by way of `tf.keras.layers.RNN`, `tf.keras.layers.LSTM`, and `tf.keras.layers.GRU`. To learn more, check out the _Built-in RNN layers: a simple example_ section of [RNN with Keras guide](https://www.tensorflow.org/guide/keras/rnn)."
       ]
     },
     {
@@ -413,7 +413,7 @@
         "id": "GfRaObf5g4TU"
       },
       "source": [
-        "### TF1: Using DNNLinearCombinedEstimator"
+        "### TensorFlow 1: Using DNNLinearCombinedEstimator"
       ]
     },
     {
@@ -464,7 +464,7 @@
         "id": "BeMikL5ug4TX"
       },
       "source": [
-        "### TF2: Using Keras WideDeepModel"
+        "### TensorFlow 2: Using Keras WideDeepModel"
       ]
     },
     {
@@ -477,7 +477,7 @@
         "\n",
         "This `WideDeepModel` is constructed on the basis of a constituent `LinearModel` and a custom DNN Model, both of which are discussed in the preceding two examples. A custom linear model can also be used in place of the built-in Keras `LinearModel` if desired.\n",
         "\n",
-        "If you would like to build your own model instead of a canned estimator, check out [how to build a `keras.Sequential` model](https://www.tensorflow.org/guide/keras/sequential_model). For more information on custom training and optimizers you can also checkout [this guide](https://www.tensorflow.org/tutorials/customization/custom_training_walkthrough)."
+        "If you would like to build your own model instead of using a canned estimator, check out the [Keras Sequential model](https://www.tensorflow.org/guide/keras/sequential_model) guide. For more information on custom training and optimizers, check out the [Custom training: walkthrough](https://www.tensorflow.org/tutorials/customization/custom_training_walkthrough) guide."
       ]
     },
     {
@@ -532,7 +532,7 @@
         "id": "_3mCQVDSeOKD"
       },
       "source": [
-        "### TF1: Using BoostedTreesEstimator"
+        "### TensorFlow 1: Using BoostedTreesEstimator"
       ]
     },
     {
@@ -541,35 +541,35 @@
         "id": "oEWYHNt4eOKD"
       },
       "source": [
-        "In TensorFlow 1, you can use `tf.estimator.BoostedTreesEstimator` to create a baseline to create a baseline Gradient Boosting model using an ensemble of decision trees for regression and classification problems."
+        "In TensorFlow 1, you could use `tf.estimator.BoostedTreesEstimator` to create a baseline to create a baseline Gradient Boosting model using an ensemble of decision trees for regression and classification problems. This functionality is no longer included in TensorFlow 2."
       ]
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
+      "cell_type": "markdown",
       "metadata": {
-        "id": "uBIURLLMeOKE"
+        "id": "wliVIER1jLnA"
       },
-      "outputs": [],
       "source": [
+        "```\n",
         "bt_estimator = tf1.estimator.BoostedTreesEstimator(\n",
         "    head=tf.estimator.BinaryClassHead(),\n",
         "    n_batches_per_layer=1,\n",
         "    max_depth=10,\n",
         "    n_trees=1000,\n",
-        "    feature_columns=feature_columns)"
+        "    feature_columns=feature_columns)\n",
+        "```"
       ]
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
+      "cell_type": "markdown",
       "metadata": {
-        "id": "MUo5omt3eOKE"
+        "id": "-K87uBrZjR0u"
       },
-      "outputs": [],
       "source": [
+        "```\n",
         "bt_estimator.train(input_fn=_input_fn, steps=1000)\n",
-        "bt_estimator.evaluate(input_fn=_eval_input_fn, steps=100)"
+        "bt_estimator.evaluate(input_fn=_eval_input_fn, steps=100)\n",
+        "```"
       ]
     },
     {
@@ -578,7 +578,7 @@
         "id": "eNuLP6BeeOKF"
       },
       "source": [
-        "### TF2: Using TensorFlow Decision Forests"
+        "### TensorFlow 2: Using TensorFlow Decision Forests"
       ]
     },
     {
@@ -587,40 +587,100 @@
         "id": "m3EVq388eOKF"
       },
       "source": [
-        "In TensorFlow 2, the closest pre-packaged substitute for a model generated by `tf.estimator.BoostedTreesEstimator` is one created using `tfdf.keras.GradientBoostedTreesModel`, which creates a sequentially-trained sequence of shallow decision trees, each designed to \"learn\" from errors made by its predecessors in the sequence.\n",
+        "In TensorFlow 2, `tf.estimator.BoostedTreesEstimator` is replaced by [tfdf.keras.GradientBoostedTreesModel](https://www.tensorflow.org/decision_forests/api_docs/python/tfdf/keras/GradientBoostedTreesModel#attributes) from the [TensorFlow Decision Forests](https://www.tensorflow.org/decision_forests) package.\n",
         "\n",
-        "`GradientBoostedTreesModel` provides more options for customization, allowing for the specification of everything from basic depth constraints to early stopping conditions. See [here](https://www.tensorflow.org/decision_forests/api_docs/python/tfdf/keras/GradientBoostedTreesModel#attributes) for more `GradientBoostedTreesModel` attribute details."
+        "TensorFlow Decision Forests provides various advantages over the `tf.estimator.BoostedTreesEstimator`, notably regarding quality, speed, ease of use and flexibility. To learn about TensorFlow Decision Forests, start with the [beginner colab](https://www.tensorflow.org/decision_forests/tutorials/beginner_colab).\n",
+        "\n",
+        "The following example shows how to train a Gradient Boosted Trees model using TensorFlow 2:"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "UB90fXJdVWC5"
+      },
+      "source": [
+        "Install TensorFlow Decision Forests."
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "JLS_2vKKeOKF"
+        "id": "9097mTCIVVE9"
       },
       "outputs": [],
       "source": [
-        "gbt_model = tfdf.keras.GradientBoostedTreesModel(\n",
-        "    task=tfdf.keras.Task.CLASSIFICATION)\n",
-        "gbt_model.compile(metrics=['mse', 'accuracy'])"
+        "!pip install tensorflow_decision_forests"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "B1qTdAS-VpXk"
+      },
+      "source": [
+        "Create a TensorFlow dataset. Note that Decision Forests natively support many types of features and do not need pre-processing."
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "sZZSM7_VeOKF"
+        "id": "jkjFHmDTVswY"
       },
       "outputs": [],
       "source": [
-        "train_df, eval_df = x_train.copy(), x_eval.copy()\n",
-        "train_df['survived'], eval_df['survived'] = y_train, y_eval\n",
-        "\n",
-        "train_dataset = tfdf.keras.pd_dataframe_to_tf_dataset(train_df, label='survived')\n",
-        "eval_dataset = tfdf.keras.pd_dataframe_to_tf_dataset(eval_df, label='survived')\n",
+        "train_dataframe = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')\n",
+        "eval_dataframe = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')\n",
         "\n",
-        "gbt_model.fit(train_dataset)\n",
-        "gbt_model.evaluate(eval_dataset, return_dict=True)"
+        "# Convert the Pandas Dataframes into TensorFlow datasets.\n",
+        "train_dataset = tfdf.keras.pd_dataframe_to_tf_dataset(train_dataframe, label=\"survived\")\n",
+        "eval_dataset = tfdf.keras.pd_dataframe_to_tf_dataset(eval_dataframe, label=\"survived\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "7fPa-LfDWDzB"
+      },
+      "source": [
+        "Train the model on the `train_dataset` dataset."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "JO0yCH9hWPvJ"
+      },
+      "outputs": [],
+      "source": [
+        "# Use the default hyper-parameters of the model.\n",
+        "gbt_model = tfdf.keras.GradientBoostedTreesModel()\n",
+        "gbt_model.fit(train_dataset)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2Y5xm29AWGxt"
+      },
+      "source": [
+        "Evaluate the quality of the model on the `eval_dataset` dataset."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "JLS_2vKKeOKF"
+      },
+      "outputs": [],
+      "source": [
+        "gbt_model.compile(metrics=['accuracy'])\n",
+        "gbt_evaluation = gbt_model.evaluate(eval_dataset, return_dict=True)\n",
+        "print(gbt_evaluation)"
       ]
     },
     {
@@ -629,36 +689,52 @@
         "id": "Z22UJ5SUqToQ"
       },
       "source": [
-        "In TensorFlow 2, there is also another available TFDF substitute for a model generated by `tf.estimator.BoostedTreesEstimator` - `tfdf.keras.RandomForestModel`. `RandomForestModel` creates a robust, overfitting-resistant learner consisting of a voting population of deep decision trees, each trained on random subsets of the input training dataset.\n",
-        "\n",
-        "`RandomForestModel` and `GradientBoostedTreesModel` provide similarly extensive levels of customization. Choosing between them is problem-specific and dependent on your task or application.\n",
+        "Gradient Boosted Trees is just one of the many decision forest algorithms available in TensorFlow Decision Forests. For example, Random Forests (available as [tfdf.keras.GradientBoostedTreesModel](https://www.tensorflow.org/decision_forests/api_docs/python/tfdf/keras/RandomForestModel) is very resistant to overfitting) while CART (available as [tfdf.keras.CartModel](https://www.tensorflow.org/decision_forests/api_docs/python/tfdf/keras/CartModel)) is great for model interpretation.\n",
         "\n",
-        "Check the API docs for more information on the [`RandomForestModel`](https://https://www.tensorflow.org/decision_forests/api_docs/python/tfdf/keras/RandomForestModel#attributes) and [`GradientBoostedTreesModel`](https://www.tensorflow.org/decision_forests/api_docs/python/tfdf/keras/GradientBoostedTreesModel#attributes) attribute."
+        "In the next example, train and plot a Random Forest model."
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "027bGnCork_W"
+        "id": "W3slOhn4Zi9X"
       },
       "outputs": [],
       "source": [
-        "rf_model = tfdf.keras.RandomForestModel(\n",
-        "    task=tfdf.keras.Task.CLASSIFICATION)\n",
-        "rf_model.compile(metrics=['mse', 'accuracy'])"
+        "# Train a Random Forest model\n",
+        "rf_model = tfdf.keras.RandomForestModel()\n",
+        "rf_model.fit(train_dataset)\n",
+        "\n",
+        "# Evaluate the Random Forest model\n",
+        "rf_model.compile(metrics=['accuracy'])\n",
+        "rf_evaluation = rf_model.evaluate(eval_dataset, return_dict=True)\n",
+        "print(rf_evaluation)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Z0QYolhoZb_k"
+      },
+      "source": [
+        "In the final example, train and evaluate a CART model."
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "Tc7KtjMlryn_"
+        "id": "027bGnCork_W"
       },
       "outputs": [],
       "source": [
-        "rf_model.fit(train_dataset)\n",
-        "rf_model.evaluate(eval_dataset, return_dict=True)"
+        "# Train a CART model\n",
+        "cart_model = tfdf.keras.CartModel()\n",
+        "cart_model.fit(train_dataset)\n",
+        "\n",
+        "# Plot the CART model\n",
+        "tfdf.model_plotter.plot_model_in_colab(cart_model, max_depth=2)"
       ]
     }
   ],
diff --git a/site/en/guide/migrate/checkpoint_saver.ipynb b/site/en/guide/migrate/checkpoint_saver.ipynb
index fca239a8497..7e7f35f4c4e 100644
--- a/site/en/guide/migrate/checkpoint_saver.ipynb
+++ b/site/en/guide/migrate/checkpoint_saver.ipynb
@@ -77,9 +77,9 @@
         "- Save continually at a certain frequency (using the `save_freq` argument).\n",
         "- Save the weights/parameters only instead of the whole model by setting `save_weights_only` to `True`.\n",
         "\n",
-        "For more details, refer to the `tf.keras.callbacks.ModelCheckpoint` API docs and the *Save checkpoints during training* section in the [Save and load models](../../tutorials/keras/save_and_load.ipynb) tutorial. Learn more about the Checkpoint format in the *TF Checkpoint format* section in the [Save and load Keras models](../../guide/keras/save_and_serialize.ipynb) guide. In addition, to add fault tolerance, you can use `tf.keras.callbacks.experimental.BackupAndRestore` or `tf.train.Checkpoint` for manual checkpointing. Learn more in the [Fault tolerance migration guide](fault_tolerance.ipynb).\n",
+        "For more details, refer to the `tf.keras.callbacks.ModelCheckpoint` API docs and the *Save checkpoints during training* section in the [Save and load models](../../tutorials/keras/save_and_load.ipynb) tutorial. Learn more about the Checkpoint format in the *TF Checkpoint format* section in the [Save and load Keras models](https://www.tensorflow.org/guide/keras/save_and_serialize) guide. In addition, to add fault tolerance, you can use `tf.keras.callbacks.BackupAndRestore` or `tf.train.Checkpoint` for manual checkpointing. Learn more in the [Fault tolerance migration guide](fault_tolerance.ipynb).\n",
         "\n",
-        "Keras [callbacks](../../guide/keras/custom_callback.ipynb) are objects that are called at different points during training/evaluation/prediction in the built-in Keras `Model.fit`/`Model.evaluate`/`Model.predict` APIs. Learn more in the _Next steps_ section at the end of the guide."
+        "Keras [callbacks](https://www.tensorflow.org/guide/keras/custom_callback) are objects that are called at different points during training/evaluation/prediction in the built-in Keras `Model.fit`/`Model.evaluate`/`Model.predict` APIs. Learn more in the _Next steps_ section at the end of the guide."
       ]
     },
     {
@@ -201,7 +201,7 @@
       "source": [
         "## TensorFlow 2: Save checkpoints with a Keras callback for Model.fit\n",
         "\n",
-        "In TensorFlow 2, when you use the built-in Keras `Model.fit` (or `Model.evaluate`) for training/evaluation, you can configure `tf.keras.callbacks.ModelCheckpoint` and then pass it to the `callbacks` parameter of `Model.fit` (or `Model.evaluate`). (Learn more in the API docs and the *Using callbacks* section in the [Training and evaluation with the built-in methods](../../guide/keras/train_and_evaluate.ipynb) guide.)\n",
+        "In TensorFlow 2, when you use the built-in Keras `Model.fit` (or `Model.evaluate`) for training/evaluation, you can configure `tf.keras.callbacks.ModelCheckpoint` and then pass it to the `callbacks` parameter of `Model.fit` (or `Model.evaluate`). (Learn more in the API docs and the *Using callbacks* section in the [Training and evaluation with the built-in methods](https://www.tensorflow.org/guide/keras/train_and_evaluate) guide.)\n",
         "\n",
         "In the example below, you will use a `tf.keras.callbacks.ModelCheckpoint` callback to store checkpoints in a temporary directory:"
       ]
@@ -263,17 +263,17 @@
         "\n",
         "- API docs: `tf.keras.callbacks.ModelCheckpoint`\n",
         "- Tutorial: [Save and load models](../../tutorials/keras/save_and_load.ipynb) (the *Save checkpoints during training* section)\n",
-        "- Guide: [Save and load Keras models](../../guide/keras/save_and_serialize.ipynb) (the *TF Checkpoint format* section)\n",
+        "- Guide: [Save and load Keras models](https://www.tensorflow.org/guide/keras/save_and_serialize) (the *TF Checkpoint format* section)\n",
         "\n",
         "Learn more about callbacks in:\n",
         "\n",
         "- API docs: `tf.keras.callbacks.Callback`\n",
-        "- Guide: [Writing your own callbacks](../..guide/keras/custom_callback.ipynb/)\n",
-        "- Guide: [Training and evaluation with the built-in methods](../../guide/keras/train_and_evaluate.ipynb) (the *Using callbacks* section)\n",
+        "- Guide: [Writing your own callbacks](https://www.tensorflow.org/guide/keras/guide/keras/custom_callback)\n",
+        "- Guide: [Training and evaluation with the built-in methods](https://www.tensorflow.org/guide/keras/train_and_evaluate) (the *Using callbacks* section)\n",
         "\n",
         "You may also find the following migration-related resources useful:\n",
         "\n",
-        "- The [Fault tolerance migration guide](fault_tolerance.ipynb): `tf.keras.callbacks.experimental.BackupAndRestore` for `Model.fit`, or `tf.train.Checkpoint` and `tf.train.CheckpointManager` APIs for a custom training loop\n",
+        "- The [Fault tolerance migration guide](fault_tolerance.ipynb): `tf.keras.callbacks.BackupAndRestore` for `Model.fit`, or `tf.train.Checkpoint` and `tf.train.CheckpointManager` APIs for a custom training loop\n",
         "- The [Early stopping migration guide](early_stopping.ipynb): `tf.keras.callbacks.EarlyStopping` is a built-in early stopping callback\n",
         "- The [TensorBoard migration guide](tensorboard.ipynb): TensorBoard enables tracking and displaying metrics\n",
         "- The [LoggingTensorHook and StopAtStepHook to Keras callbacks migration guide](logging_stop_hook.ipynb)\n",
diff --git a/site/en/guide/migrate/early_stopping.ipynb b/site/en/guide/migrate/early_stopping.ipynb
index fec8144348a..1c1712e975b 100644
--- a/site/en/guide/migrate/early_stopping.ipynb
+++ b/site/en/guide/migrate/early_stopping.ipynb
@@ -457,7 +457,7 @@
         "epochs = 100\n",
         "patience = 5\n",
         "wait = 0\n",
-        "best = 0\n",
+        "best = float('inf')\n",
         "\n",
         "for epoch in range(epochs):\n",
         "    print(\"\\nStart of epoch %d\" % (epoch,))\n",
@@ -486,7 +486,7 @@
         "    # The early stopping strategy: stop the training if `val_loss` does not\n",
         "    # decrease over a certain number of epochs.\n",
         "    wait += 1\n",
-        "    if val_loss > best:\n",
+        "    if val_loss < best:\n",
         "      best = val_loss\n",
         "      wait = 0\n",
         "    if wait >= patience:\n",
diff --git a/site/en/guide/migrate/evaluator.ipynb b/site/en/guide/migrate/evaluator.ipynb
index 305fdef8e37..c8f848e4406 100644
--- a/site/en/guide/migrate/evaluator.ipynb
+++ b/site/en/guide/migrate/evaluator.ipynb
@@ -13,6 +13,7 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
+        "cellView": "form",
         "id": "HMUDt0CiUJk9"
       },
       "outputs": [],
@@ -67,7 +68,7 @@
       "source": [
         "Evaluation is a critical part of measuring and benchmarking models.\n",
         "\n",
-        "This guide demonstrates how to migrate evaluator tasks from TensorFlow 1 to  TensorFlow 2. In Tensorflow 1  this functionality is implemented by `tf.estimator.train_and_evaluate`, when the API is running distributedly. In Tensorflow 2, you can use the built-in `tf.keras.experimental.SidecarEvaluator`, or a custom evaluation loop on the evaluator task.\n",
+        "This guide demonstrates how to migrate evaluator tasks from TensorFlow 1 to  TensorFlow 2. In Tensorflow 1  this functionality is implemented by `tf.estimator.train_and_evaluate`, when the API is running distributedly. In Tensorflow 2, you can use the built-in `tf.keras.utils.SidecarEvaluator`, or a custom evaluation loop on the evaluator task.\n",
         "\n",
         "There are simple serial evaluation options in both TensorFlow 1 (`tf.estimator.Estimator.evaluate`) and TensorFlow 2 (`Model.fit(..., validation_data=(...))` or `Model.evaluate`). The evaluator task is preferable when you would like your workers not switching between training and evaluation, and built-in evaluation in `Model.fit` is preferable when you would like your evaluation to be distributed.\n"
       ]
@@ -121,7 +122,7 @@
         "\n",
         "In TensorFlow 1, you can configure a `tf.estimator` to evaluate the estimator using `tf.estimator.train_and_evaluate`.\n",
         "\n",
-        "In this example, start by defining the `tf.estimator.Estimator` and speciyfing training and evaluation specifications:"
+        "In this example, start by defining the `tf.estimator.Estimator` and specifying training and evaluation specifications:"
       ]
     },
     {
@@ -193,7 +194,7 @@
       "source": [
         "## TensorFlow 2: Evaluating a Keras model\n",
         "\n",
-        "In TensorFlow 2, if you use the Keras `Model.fit` API for training, you can evaluate the model with `tf.keras.experimental.SidecarEvaluator`. You can also visualize the evaluation metrics in Tensorboard which is not shown in this guide.\n",
+        "In TensorFlow 2, if you use the Keras `Model.fit` API for training, you can evaluate the model with `tf.keras.utils.SidecarEvaluator`. You can also visualize the evaluation metrics in TensorBoard which is not shown in this guide.\n",
         "\n",
         "To help demonstrate this, let's first start by defining and training the model:\n"
       ]
@@ -240,7 +241,7 @@
         "id": "AhU3VTYZoDh-"
       },
       "source": [
-        "Then, evaluate the model using `tf.keras.experimental.SidecarEvaluator`. In real training, it's recommended to use a separate job to conduct the evaluation to free up worker resources for training."
+        "Then, evaluate the model using `tf.keras.utils.SidecarEvaluator`. In real training, it's recommended to use a separate job to conduct the evaluation to free up worker resources for training."
       ]
     },
     {
@@ -254,7 +255,7 @@
         "data = tf.data.Dataset.from_tensor_slices((x_test, y_test))\n",
         "data = data.batch(64)\n",
         "\n",
-        "tf.keras.experimental.SidecarEvaluator(\n",
+        "tf.keras.utils.SidecarEvaluator(\n",
         "    model=model,\n",
         "    data=data,\n",
         "    checkpoint_dir=log_dir,\n",
@@ -270,7 +271,7 @@
       "source": [
         "## Next steps\n",
         "\n",
-        "- To learn more about sidecar evaluation consider reading the `tf.keras.experimental.SidecarEvaluator` API docs.\n",
+        "- To learn more about sidecar evaluation consider reading the `tf.keras.utils.SidecarEvaluator` API docs.\n",
         "- To consider alternating training and evaluation in Keras consider reading about [other built-in methods](https://www.tensorflow.org/guide/keras/train_and_evaluate)."
       ]
     }
@@ -279,7 +280,6 @@
     "colab": {
       "collapsed_sections": [],
       "name": "evaluator.ipynb",
-      "provenance": [],
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/guide/migrate/fault_tolerance.ipynb b/site/en/guide/migrate/fault_tolerance.ipynb
index b9c2ed52ae2..fdbd0b972c3 100644
--- a/site/en/guide/migrate/fault_tolerance.ipynb
+++ b/site/en/guide/migrate/fault_tolerance.ipynb
@@ -13,6 +13,7 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
+        "cellView": "form",
         "id": "HMUDt0CiUJk9"
       },
       "outputs": [],
@@ -69,7 +70,7 @@
         "\n",
         "This guide first demonstrates how to add fault tolerance to training with `tf.estimator.Estimator` in TensorFlow 1 by specifying metric saving with `tf.estimator.RunConfig`. Then, you will learn how to implement fault tolerance for training in Tensorflow 2 in two ways:\n",
         "\n",
-        "- If you use the Keras `Model.fit` API, you can pass the `tf.keras.callbacks.experimental.BackupAndRestore` callback to it.\n",
+        "- If you use the Keras `Model.fit` API, you can pass the `tf.keras.callbacks.BackupAndRestore` callback to it.\n",
         "- If you use a custom training loop (with `tf.GradientTape`), you can arbitrarily save checkpoints using the `tf.train.Checkpoint` and `tf.train.CheckpointManager` APIs.\n",
         "\n",
         "Both of these methods will back up and restore the training states in [checkpoint](../../guide/checkpoint.ipynb) files.\n"
@@ -84,6 +85,26 @@
         "## Setup"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "TOVQubuDzdmA"
+      },
+      "source": [
+        "Install `tf-nightly`, as the frequency of checkpoint saving at a particular step with the `save_freq` argument in `tf.keras.callbacks.BackupAndRestore` is introduced from TensorFlow 2.10:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "pGW0XhXkxY_q"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install tf-nightly"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -119,7 +140,7 @@
         "id": "TtlucRG_Uro_"
       },
       "source": [
-        "## TensorFlow 1: Save checkpoints with tf.estimator.RunConfig\n",
+        "## TensorFlow 1: Save checkpoints with `tf.estimator.RunConfig`\n",
         "\n",
         "In TensorFlow 1, you can configure a `tf.estimator` to save checkpoints every step by configuring `tf.estimator.RunConfig`.\n",
         "\n",
@@ -250,11 +271,11 @@
         "id": "T5LtVtmvYx7J"
       },
       "source": [
-        "## TensorFlow 2: Back up and restore with a callback and Model.fit\n",
+        "## TensorFlow 2: Back up and restore with a callback and `Model.fit`\n",
         "\n",
-        "In TensorFlow 2, if you use the Keras `Model.fit` API for training, you can provide the `tf.keras.callbacks.experimental.BackupAndRestore` callback to add the fault tolerance functionality.\n",
+        "In TensorFlow 2, if you use the Keras `Model.fit` API for training, you can provide the `tf.keras.callbacks.BackupAndRestore` callback to add the fault tolerance functionality.\n",
         "\n",
-        "To help demonstrate this, let's first start by defining a callback class that artificially throws an error during the fifth checkpoint:\n"
+        "To help demonstrate this, first start by defining a Keras `Callback` class that artificially throws an error during the fourth epoch checkpoint:\n"
       ]
     },
     {
@@ -265,10 +286,13 @@
       },
       "outputs": [],
       "source": [
-        "class InterruptingCallback(tf.keras.callbacks.Callback):\n",
+        "class InterruptAtEpoch(tf.keras.callbacks.Callback):\n",
         "  # A callback for artificially interrupting training.\n",
+        "  def __init__(self, interrupting_epoch=3):\n",
+        "    self.interrupting_epoch = interrupting_epoch\n",
+        "\n",
         "  def on_epoch_end(self, epoch, log=None):\n",
-        "    if epoch == 4:\n",
+        "    if epoch == self.interrupting_epoch:\n",
         "      raise RuntimeError('Interruption')"
       ]
     },
@@ -278,7 +302,7 @@
         "id": "AhU3VTYZoDh-"
       },
       "source": [
-        "Then, define and instantiate a simple Keras model, define the loss function, call `Model.compile`, and set up a `tf.keras.callbacks.experimental.BackupAndRestore` callback that will save the checkpoints in a temporary directory:"
+        "Then, define and instantiate a simple Keras model, define the loss function, call `Model.compile`, and set up a `tf.keras.callbacks.BackupAndRestore` callback that will save the checkpoints in a temporary directory at epoch boundaries:"
       ]
     },
     {
@@ -296,20 +320,14 @@
         "    tf.keras.layers.Dropout(0.2),\n",
         "    tf.keras.layers.Dense(10)\n",
         "  ])\n",
-        "\n",
         "loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n",
-        "\n",
         "model = create_model()\n",
         "model.compile(optimizer='adam',\n",
         "              loss=loss,\n",
-        "              metrics=['accuracy'],\n",
-        "              steps_per_execution=10)\n",
-        "\n",
+        "              metrics=['accuracy'])\n",
         "log_dir = tempfile.mkdtemp()\n",
-        "\n",
-        "backup_restore_callback = tf.keras.callbacks.experimental.BackupAndRestore(\n",
-        "    backup_dir = log_dir\n",
-        ")"
+        "backup_restore_callback = tf.keras.callbacks.BackupAndRestore(\n",
+        "    backup_dir = log_dir)"
       ]
     },
     {
@@ -318,7 +336,7 @@
         "id": "LRRWmZqsvMrq"
       },
       "source": [
-        "Now, start training the model with `Model.fit`. During training, checkpoints will be saved thanks to the `backup_restore_callback` defined above, while the `InterruptingCallback` will raise an artificial exception to simulate a failure."
+        "Start training the model with `Model.fit`. During training, checkpoints will be saved thanks to `tf.keras.callbacks.BackupAndRestore` instantiated above, while the `InterruptAtEpoch` class will raise an artificial exception to simulate a failure after the fourth epoch."
       ]
     },
     {
@@ -333,8 +351,9 @@
         "  model.fit(x=x_train,\n",
         "            y=y_train,\n",
         "            epochs=10,\n",
+        "            steps_per_epoch=100,\n",
         "            validation_data=(x_test, y_test),\n",
-        "            callbacks=[backup_restore_callback, InterruptingCallback()])\n",
+        "            callbacks=[backup_restore_callback, InterruptAtEpoch()])\n",
         "except Exception as e:\n",
         "  print(f'{type(e).__name__}:{e}')"
       ]
@@ -364,6 +383,108 @@
         "model.fit(x=x_train,\n",
         "            y=y_train,\n",
         "            epochs=10,\n",
+        "            steps_per_epoch=100,\n",
+        "            validation_data=(x_test, y_test),\n",
+        "            callbacks=[backup_restore_callback])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nP2dnpMPxtYj"
+      },
+      "source": [
+        "Define another `Callback` class that artificially throws an error during the 140th step:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "YardkAaBxr-c"
+      },
+      "outputs": [],
+      "source": [
+        "class InterruptAtStep(tf.keras.callbacks.Callback):\n",
+        "  # A callback for artificially interrupting training.\n",
+        "  def __init__(self, interrupting_step=140):\n",
+        "    self.total_step_count = 0\n",
+        "    self.interrupting_step = interrupting_step\n",
+        "\n",
+        "  def on_batch_begin(self, batch, logs=None):\n",
+        "    self.total_step_count += 1\n",
+        "\n",
+        "  def on_batch_end(self, batch, logs=None):\n",
+        "    if self.total_step_count == self.interrupting_step:\n",
+        "      print(\"\\nInterrupting at step count\", self.total_step_count)\n",
+        "      raise RuntimeError('Interruption')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Af3VpehxyTpb"
+      },
+      "source": [
+        "Note: This section uses features that are only available in `tf-nightly` until Tensorflow 2.10 is released.\n",
+        "\n",
+        "To make sure the checkpoints are saved every 30 steps, set the `save_freq` in the `BackupAndRestore` callback to `30`. The `InterruptAtStep` will raise an artificial exception to simulate a failure at epoch 1 and step 40 (total step count 140). The checkpoint would be last saved at epoch 1 and step 20."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dHHCENDPyUHS"
+      },
+      "outputs": [],
+      "source": [
+        "log_dir_2 = tempfile.mkdtemp()\n",
+        "\n",
+        "backup_restore_callback = tf.keras.callbacks.BackupAndRestore(\n",
+        "    backup_dir = log_dir_2, save_freq=30\n",
+        ")\n",
+        "model = create_model()\n",
+        "model.compile(optimizer='adam',\n",
+        "              loss=loss,\n",
+        "              metrics=['accuracy'])\n",
+        "try:\n",
+        "  model.fit(x=x_train,\n",
+        "            y=y_train,\n",
+        "            epochs=10,\n",
+        "            steps_per_epoch=100,\n",
+        "            validation_data=(x_test, y_test),\n",
+        "            callbacks=[backup_restore_callback, InterruptAtStep()])\n",
+        "except Exception as e:\n",
+        "  print(f'{type(e).__name__}:{e}')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2-ggMFEHynMR"
+      },
+      "source": [
+        "Next, instantiate the Keras model, call `Model.compile`, and continue training the model with `Model.fit` from a previously saved checkpoint. Notice that the training starts from epoch 2 and step 21."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "vT7Kx30NEqly"
+      },
+      "outputs": [],
+      "source": [
+        "model = create_model()\n",
+        "model.compile(optimizer='adam',\n",
+        "              loss=loss,\n",
+        "              metrics=['accuracy'],\n",
+        "              steps_per_execution=10)\n",
+        "model.fit(x=x_train,\n",
+        "            y=y_train,\n",
+        "            epochs=10,\n",
+        "            steps_per_epoch=100,\n",
         "            validation_data=(x_test, y_test),\n",
         "            callbacks=[backup_restore_callback])"
       ]
@@ -452,7 +573,7 @@
         "\n",
         "To learn more about fault tolerance and checkpointing in TensorFlow 2, consider the following documentation:\n",
         "\n",
-        "- The `tf.keras.callbacks.experimental.BackupAndRestore` callback API docs.\n",
+        "- The `tf.keras.callbacks.BackupAndRestore` callback API docs.\n",
         "- The `tf.train.Checkpoint` and `tf.train.CheckpointManager` API docs.\n",
         "- The [Training checkpoints](../../guide/checkpoint.ipynb) guide, including the _Writing checkpoints_ section.\n",
         "\n",
@@ -467,7 +588,6 @@
     "colab": {
       "collapsed_sections": [],
       "name": "fault_tolerance.ipynb",
-      "provenance": [],
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/guide/migrate/images/tensorboard_TF1.png b/site/en/guide/migrate/images/tensorboard_TF1.png
index 18b7bbb12cf..294fbbcc5b5 100644
Binary files a/site/en/guide/migrate/images/tensorboard_TF1.png and b/site/en/guide/migrate/images/tensorboard_TF1.png differ
diff --git a/site/en/guide/migrate/images/tensorboard_TF2.png b/site/en/guide/migrate/images/tensorboard_TF2.png
index 55abb91fe6f..bbad8768210 100644
Binary files a/site/en/guide/migrate/images/tensorboard_TF2.png and b/site/en/guide/migrate/images/tensorboard_TF2.png differ
diff --git a/site/en/guide/migrate/logging_stop_hook.ipynb b/site/en/guide/migrate/logging_stop_hook.ipynb
index 3dbcd35e293..a1d60243c37 100644
--- a/site/en/guide/migrate/logging_stop_hook.ipynb
+++ b/site/en/guide/migrate/logging_stop_hook.ipynb
@@ -69,7 +69,7 @@
       "source": [
         "In TensorFlow 1, you use `tf.estimator.LoggingTensorHook` to monitor and log tensors, while `tf.estimator.StopAtStepHook` helps stop training at a specified step when training with `tf.estimator.Estimator`. This notebook demonstrates how to migrate from these APIs to their equivalents in TensorFlow 2 using custom Keras callbacks (`tf.keras.callbacks.Callback`) with `Model.fit`.\n",
         "\n",
-        "Keras [callbacks](../../guide/keras/custom_callback.ipynb) are objects that are called at different points during training/evaluation/prediction in the built-in Keras `Model.fit`/`Model.evaluate`/`Model.predict` APIs. You can learn more about callbacks in the `tf.keras.callbacks.Callback` API docs, as well as the [Writing your own callbacks](../..guide/keras/custom_callback.ipynb/) and [Training and evaluation with the built-in methods](../../guide/keras/train_and_evaluate.ipynb) (the *Using callbacks* section) guides. For migrating from `SessionRunHook` in TensorFlow 1 to Keras callbacks in TensorFlow 2, check out the [Migrate training with assisted logic](sessionrunhook_callback.ipynb) guide."
+        "Keras [callbacks](https://www.tensorflow.org/guide/keras/custom_callback) are objects that are called at different points during training/evaluation/prediction in the built-in Keras `Model.fit`/`Model.evaluate`/`Model.predict` APIs. You can learn more about callbacks in the `tf.keras.callbacks.Callback` API docs, as well as the [Writing your own callbacks](../..guide/keras/custom_callback.ipynb/) and [Training and evaluation with the built-in methods](https://www.tensorflow.org/guide/keras/train_and_evaluate) (the *Using callbacks* section) guides. For migrating from `SessionRunHook` in TensorFlow 1 to Keras callbacks in TensorFlow 2, check out the [Migrate training with assisted logic](sessionrunhook_callback.ipynb) guide."
       ]
     },
     {
@@ -277,7 +277,7 @@
         "\n",
         "- API docs: `tf.keras.callbacks.Callback`\n",
         "- Guide: [Writing your own callbacks](../..guide/keras/custom_callback.ipynb/)\n",
-        "- Guide: [Training and evaluation with the built-in methods](../../guide/keras/train_and_evaluate.ipynb) (the *Using callbacks* section)\n",
+        "- Guide: [Training and evaluation with the built-in methods](https://www.tensorflow.org/guide/keras/train_and_evaluate) (the *Using callbacks* section)\n",
         "\n",
         "You may also find the following migration-related resources useful:\n",
         "\n",
diff --git a/site/en/guide/migrate/metrics_optimizers.ipynb b/site/en/guide/migrate/metrics_optimizers.ipynb
index a720b3c8e9a..61afb35aea6 100644
--- a/site/en/guide/migrate/metrics_optimizers.ipynb
+++ b/site/en/guide/migrate/metrics_optimizers.ipynb
@@ -144,7 +144,7 @@
         "\n",
         "def _model_fn(features, labels, mode):\n",
         "  logits = tf1.layers.Dense(2)(features)\n",
-        "  predictions = tf.argmax(input=logits, axis=1)\n",
+        "  predictions = tf.math.argmax(input=logits, axis=1)\n",
         "  loss = tf1.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits)\n",
         "  optimizer = tf1.train.AdagradOptimizer(0.05)\n",
         "  train_op = optimizer.minimize(loss, global_step=tf1.train.get_global_step())\n",
@@ -223,7 +223,7 @@
         "\n",
         "inputs = tf.keras.Input((2,))\n",
         "logits = tf.keras.layers.Dense(2)(inputs)\n",
-        "predictions = tf.argmax(input=logits, axis=1)\n",
+        "predictions = tf.math.argmax(input=logits, axis=1)\n",
         "model = tf.keras.models.Model(inputs, predictions)\n",
         "optimizer = tf.keras.optimizers.Adagrad(learning_rate=0.05)\n",
         "\n",
@@ -283,7 +283,7 @@
         "id": "E3F3ElcyadW-"
       },
       "source": [
-        "For more details about `tf.keras.metrics.Metric`, please take a look for the API documentation at `tf.keras.metrics.Metric`, as well as the [migration guide](https://www.tensorflow.org/guide/migrate#new-style_metrics_and_losses)."
+        "For more details about `tf.keras.metrics.Metric`, please take a look for the API documentation at `tf.keras.metrics.Metric`, as well as the [migration guide](https://www.tensorflow.org/guide/effective_tf2#new-style_metrics_and_losses)."
       ]
     },
     {
@@ -370,8 +370,7 @@
   "metadata": {
     "colab": {
       "collapsed_sections": [],
-      "name": "metrics.ipynb",
-      "provenance": [],
+      "name": "metrics_optimizers.ipynb",
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/guide/migrate/migrating_checkpoints.ipynb b/site/en/guide/migrate/migrating_checkpoints.ipynb
index 38569cc7615..a63789037ff 100644
--- a/site/en/guide/migrate/migrating_checkpoints.ipynb
+++ b/site/en/guide/migrate/migrating_checkpoints.ipynb
@@ -834,7 +834,7 @@
         "\n",
         "**TF2 checkpoints work with Keras's `build()` step**\n",
         "\n",
-        "`tf.train.Checkpoint.restore` has a mechanism called *delayed restoration* which\n",
+        "`tf.train.Checkpoint.restore` has a mechanism called *deferred restoration* which\n",
         "allows `tf.Module` and Keras objects to store variable values if the variable has not yet been created. This allows *initialized* models to load weights and  *build* after.\n",
         "\n",
         "```\n",
diff --git a/site/en/guide/migrate/migrating_estimator.ipynb b/site/en/guide/migrate/migrating_estimator.ipynb
index ffd8c9ce153..4d3259babb8 100644
--- a/site/en/guide/migrate/migrating_estimator.ipynb
+++ b/site/en/guide/migrate/migrating_estimator.ipynb
@@ -70,7 +70,7 @@
         "This guide demonstrates how to migrate from TensorFlow 1's `tf.estimator.Estimator` APIs to TensorFlow 2's `tf.keras` APIs. First, you will set up and run a basic model for training and evaluation with `tf.estimator.Estimator`. Then, you will perform the equivalent steps in TensorFlow 2 with the `tf.keras` APIs. You will also learn how to customize the training step by subclassing `tf.keras.Model` and using `tf.GradientTape`.\n",
         "\n",
         "- In TensorFlow 1, the high-level `tf.estimator.Estimator` APIs let you train and evaluate a model, as well as perform inference and save your model (for serving).\n",
-        "- In TensorFlow 2, use the Keras APIs to perform the aforementioned tasks, such as [model building](../../guide/keras/custom_layers_and_models.ipynb), gradient application, [training](../../guide/keras/customizing_what_happens_in_fit.ipynb), evaluation, and prediction.\n",
+        "- In TensorFlow 2, use the Keras APIs to perform the aforementioned tasks, such as [model building](https://www.tensorflow.org/guide/keras/custom_layers_and_models), gradient application, [training](https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit), evaluation, and prediction.\n",
         "\n",
         "(For migrating model/checkpoint saving workflows to TensorFlow 2, check out the [SavedModel](saved_model.ipynb) and [Checkpoint](checkpoint_saved.ipynb) migration guides.)"
       ]
@@ -197,10 +197,10 @@
       "source": [
         "## TensorFlow 2: Train and evaluate with the built-in Keras methods\n",
         "\n",
-        "This example demonstrates how to perform training and evaluation with Keras `Model.fit` and `Model.evaluate` in TensorFlow 2. (You can learn more in the [Training and evaluation with the built-in methods](../../guide/keras/train_and_evaluate.ipynb) guide.)\n",
+        "This example demonstrates how to perform training and evaluation with Keras `Model.fit` and `Model.evaluate` in TensorFlow 2. (You can learn more in the [Training and evaluation with the built-in methods](https://www.tensorflow.org/guide/keras/train_and_evaluate) guide.)\n",
         "\n",
         "- Start by preparing the dataset pipeline with the `tf.data.Dataset` APIs.\n",
-        "- Define a simple Keras [Sequential](../../guide/keras/sequential_model.ipynb) model with one linear (`tf.keras.layers.Dense`) layer.\n",
+        "- Define a simple Keras [Sequential](https://www.tensorflow.org/guide/keras/sequential_model) model with one linear (`tf.keras.layers.Dense`) layer.\n",
         "- Instantiate an Adagrad optimizer (`tf.keras.optimizers.Adagrad`).\n",
         "- Configure the model for training by passing the `optimizer` variable and the mean-squared error (`\"mse\"`) loss to `Model.compile`."
       ]
@@ -278,9 +278,9 @@
         "id": "gHx_RUL8xcJ3"
       },
       "source": [
-        "In TensorFlow 2, you can also write your own custom training step function with `tf.GradientTape` to perform forward and backward passes, while still taking advantage of the built-in training support, such as `tf.keras.callbacks.Callback` and `tf.distribute.Strategy`. (Learn more in [Customizing what happens in Model.fit](../../guide/keras/customizing_what_happens_in_fit.ipynb) and [Writing custom training loops from scratch](../../guide/keras/writing_a_training_loop_from_scratch.ipynb).)\n",
+        "In TensorFlow 2, you can also write your own custom training step function with `tf.GradientTape` to perform forward and backward passes, while still taking advantage of the built-in training support, such as `tf.keras.callbacks.Callback` and `tf.distribute.Strategy`. (Learn more in [Customizing what happens in Model.fit](https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit) and [Writing custom training loops from scratch](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch).)\n",
         "\n",
-        "In this example, start by creating a custom `tf.keras.Model` by subclassing `tf.keras.Sequential` that overrides `Model.train_step`. (Learn more about [subclassing tf.keras.Model](../../keras/custom_layers_and_models.ipynb)). Inside that class, define a custom `train_step` function that for each batch of data performs a forward pass and backward pass during one training step.\n"
+        "In this example, start by creating a custom `tf.keras.Model` by subclassing `tf.keras.Sequential` that overrides `Model.train_step`. (Learn more about [subclassing tf.keras.Model](https://www.tensorflow.org/guide/keras/custom_layers_and_models)). Inside that class, define a custom `train_step` function that for each batch of data performs a forward pass and backward pass during one training step.\n"
       ]
     },
     {
@@ -394,10 +394,10 @@
         "\n",
         "Additional Keras resources you may find useful:\n",
         "\n",
-        "- Guide: [Training and evaluation with the built-in methods](../../guide/keras/train_and_evaluate.ipynb)\n",
-        "- Guide: [Customize what happens in Model.fit](../../guide/keras/customizing_what_happens_in_fit.ipynb)\n",
-        "- Guide: [Writing a training loop from scratch](../../guide/keras/writing_a_training_loop_from_scratch.ipynb)\n",
-        "- Guide: [Making new Keras layers and models via subclassing](../../guide/keras/custom_layers_and_models.ipynb)\n",
+        "- Guide: [Training and evaluation with the built-in methods](https://www.tensorflow.org/guide/keras/train_and_evaluate)\n",
+        "- Guide: [Customize what happens in Model.fit](https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit)\n",
+        "- Guide: [Writing a training loop from scratch](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch)\n",
+        "- Guide: [Making new Keras layers and models via subclassing](https://www.tensorflow.org/guide/keras/custom_layers_and_models)\n",
         "\n",
         "The following guides can assist with migrating distribution strategy workflows from `tf.estimator` APIs:\n",
         "\n",
diff --git a/site/en/guide/migrate/migrating_feature_columns.ipynb b/site/en/guide/migrate/migrating_feature_columns.ipynb
index 0e08fd431ea..b2dbc5fe7c0 100644
--- a/site/en/guide/migrate/migrating_feature_columns.ipynb
+++ b/site/en/guide/migrate/migrating_feature_columns.ipynb
@@ -37,7 +37,7 @@
         "id": "77z2OchJTk0l"
       },
       "source": [
-        "# Migrating feature_columns to TF2's Keras Preprocessing Layers\n",
+        "# Migrate `tf.feature_column`s to Keras preprocessing layers\n",
         "\n",
         "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
         "  <td>\n",
@@ -67,11 +67,11 @@
         "id": "-5jGPDA2PDPI"
       },
       "source": [
-        "Training a model will usually come with some amount of feature preprocessing, particularly when dealing with structured data. When training a `tf.estimator.Estimator` in TF1, this feature preprocessing is usually done with the `tf.feature_column` API. In TF2, this preprocessing can be done directly with Keras layers, called _preprocessing layers_.\n",
+        "Training a model usually comes with some amount of feature preprocessing, particularly when dealing with structured data. When training a `tf.estimator.Estimator` in TensorFlow 1, you usually perform feature preprocessing with the `tf.feature_column` API. In TensorFlow 2, you can do this directly with Keras preprocessing layers.\n",
         "\n",
-        "In this migration guide, you will perform some common feature transformations using both feature columns and preprocessing layers, followed by training a complete model with both APIs.\n",
+        "This migration guide demonstrates common feature transformations using both feature columns and preprocessing layers, followed by training a complete model with both APIs.\n",
         "\n",
-        "First, start with a couple of necessary imports,"
+        "First, start with a couple of necessary imports:"
       ]
     },
     {
@@ -93,7 +93,7 @@
         "id": "NVPYTQAWtDwH"
       },
       "source": [
-        "and add a utility for calling a feature column for demonstration:"
+        "Now, add a utility function for calling a feature column for demonstration:"
       ]
     },
     {
@@ -463,6 +463,124 @@
         "embedding(string_lookup_layer(['small', 'medium', 'large']))"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "UwqvADV6HRdC"
+      },
+      "source": [
+        "## Summing weighted categorical data\n",
+        "\n",
+        "In some cases, you need to deal with categorical data where each occurance of a category comes with an associated weight. In feature columns, this is handled with `tf.feature_column.weighted_categorical_column`. When paired with an `indicator_column`, this has the effect of summing weights per category."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "02HqjPLMRxWn"
+      },
+      "outputs": [],
+      "source": [
+        "ids = tf.constant([[5, 11, 5, 17, 17]])\n",
+        "weights = tf.constant([[0.5, 1.5, 0.7, 1.8, 0.2]])\n",
+        "\n",
+        "categorical_col = tf1.feature_column.categorical_column_with_identity(\n",
+        "    'ids', num_buckets=20)\n",
+        "weighted_categorical_col = tf1.feature_column.weighted_categorical_column(\n",
+        "    categorical_col, 'weights')\n",
+        "indicator_col = tf1.feature_column.indicator_column(weighted_categorical_col)\n",
+        "call_feature_columns(indicator_col, {'ids': ids, 'weights': weights})"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "98jaq7Q3S9aG"
+      },
+      "source": [
+        "In Keras, this can be done by passing a `count_weights` input to `tf.keras.layers.CategoryEncoding` with `output_mode='count'`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "JsoYUUgRS7hu"
+      },
+      "outputs": [],
+      "source": [
+        "ids = tf.constant([[5, 11, 5, 17, 17]])\n",
+        "weights = tf.constant([[0.5, 1.5, 0.7, 1.8, 0.2]])\n",
+        "\n",
+        "# Using sparse output is more efficient when `num_tokens` is large.\n",
+        "count_layer = tf.keras.layers.CategoryEncoding(\n",
+        "    num_tokens=20, output_mode='count', sparse=True)\n",
+        "tf.sparse.to_dense(count_layer(ids, count_weights=weights))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gBJxb6y2GasI"
+      },
+      "source": [
+        "## Embedding weighted categorical data\n",
+        "\n",
+        "You might alternately want to embed weighted categorical inputs. In feature columns, the `embedding_column` contains a `combiner` argument. If any sample\n",
+        "contains multiple entries for a category, they will be combined according to the argument setting (by default `'mean'`)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "AjOt1wgmT5mM"
+      },
+      "outputs": [],
+      "source": [
+        "ids = tf.constant([[5, 11, 5, 17, 17]])\n",
+        "weights = tf.constant([[0.5, 1.5, 0.7, 1.8, 0.2]])\n",
+        "\n",
+        "categorical_col = tf1.feature_column.categorical_column_with_identity(\n",
+        "    'ids', num_buckets=20)\n",
+        "weighted_categorical_col = tf1.feature_column.weighted_categorical_column(\n",
+        "    categorical_col, 'weights')\n",
+        "embedding_col = tf1.feature_column.embedding_column(\n",
+        "    weighted_categorical_col, 4, combiner='mean')\n",
+        "call_feature_columns(embedding_col, {'ids': ids, 'weights': weights})"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fd6eluARXndC"
+      },
+      "source": [
+        "In Keras, there is no `combiner` option to `tf.keras.layers.Embedding`, but you can achieve the same effect with `tf.keras.layers.Dense`. The `embedding_column` above is simply linearly combining embedding vectors according to category weight. Though not obvious at first, it is exactly equivalent to representing your categorical inputs as a sparse weight vector of size `(num_tokens)`, and multiplying them by a `Dense` kernel of shape `(embedding_size, num_tokens)`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Y-vZvPyiYilE"
+      },
+      "outputs": [],
+      "source": [
+        "ids = tf.constant([[5, 11, 5, 17, 17]])\n",
+        "weights = tf.constant([[0.5, 1.5, 0.7, 1.8, 0.2]])\n",
+        "\n",
+        "# For `combiner='mean'`, normalize your weights to sum to 1. Removing this line\n",
+        "# would be equivalent to an `embedding_column` with `combiner='sum'`.\n",
+        "weights = weights / tf.reduce_sum(weights, axis=-1, keepdims=True)\n",
+        "\n",
+        "count_layer = tf.keras.layers.CategoryEncoding(\n",
+        "    num_tokens=20, output_mode='count', sparse=True)\n",
+        "embedding_layer = tf.keras.layers.Dense(4, use_bias=False)\n",
+        "embedding_layer(count_layer(ids, count_weights=weights))"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -497,7 +615,7 @@
         "id": "e_4Xx2c37lqD"
       },
       "source": [
-        "Define some common constants for both TF1 and TF2 workflows:"
+        "Define some common constants for both TensorFlow 1 and TensorFlow 2 workflows:"
       ]
     },
     {
@@ -536,17 +654,17 @@
       "source": [
         "categorical_col = tf1.feature_column.categorical_column_with_identity(\n",
         "    'type', num_buckets=one_hot_dims)\n",
-        "# Convert index to one-hot; e.g. [2] -> [0,0,1].\n",
+        "# Convert index to one-hot; e.g., [2] -> [0,0,1].\n",
         "indicator_col = tf1.feature_column.indicator_column(categorical_col)\n",
         "\n",
-        "# Convert strings to indices; e.g. ['small'] -> [1].\n",
+        "# Convert strings to indices; e.g., ['small'] -> [1].\n",
         "vocab_col = tf1.feature_column.categorical_column_with_vocabulary_list(\n",
         "    'size', vocabulary_list=vocab, num_oov_buckets=1)\n",
         "# Embed the indices.\n",
         "embedding_col = tf1.feature_column.embedding_column(vocab_col, embedding_dims)\n",
         "\n",
         "normalizer_fn = lambda x: (x - weight_mean) / math.sqrt(weight_variance)\n",
-        "# Normalize the numeric inputs; e.g. [2.0] -> [0.0].\n",
+        "# Normalize the numeric inputs; e.g., [2.0] -> [0.0].\n",
         "numeric_col = tf1.feature_column.numeric_column(\n",
         "    'weight', normalizer_fn=normalizer_fn)\n",
         "\n",
@@ -609,12 +727,12 @@
         "  'size': tf.keras.Input(shape=(), dtype='string'),\n",
         "  'weight': tf.keras.Input(shape=(), dtype='float32'),\n",
         "}\n",
-        "# Convert index to one-hot; e.g. [2] -> [0,0,1].\n",
+        "# Convert index to one-hot; e.g., [2] -> [0,0,1].\n",
         "type_output = tf.keras.layers.CategoryEncoding(\n",
         "      one_hot_dims, output_mode='one_hot')(inputs['type'])\n",
-        "# Convert size strings to indices; e.g. ['small'] -> [1].\n",
+        "# Convert size strings to indices; e.g., ['small'] -> [1].\n",
         "size_output = tf.keras.layers.StringLookup(vocabulary=vocab)(inputs['size'])\n",
-        "# Normalize the numeric inputs; e.g. [2.0] -> [0.0].\n",
+        "# Normalize the numeric inputs; e.g., [2.0] -> [0.0].\n",
         "weight_output = tf.keras.layers.Normalization(\n",
         "      axis=None, mean=weight_mean, variance=weight_variance)(inputs['weight'])\n",
         "outputs = {\n",
@@ -727,8 +845,8 @@
       "outputs": [],
       "source": [
         "inputs = preprocessing_model.input\n",
-        "outpus = training_model(preprocessing_model(inputs))\n",
-        "inference_model = tf.keras.Model(inputs, outpus)\n",
+        "outputs = training_model(preprocessing_model(inputs))\n",
+        "inference_model = tf.keras.Model(inputs, outputs)\n",
         "\n",
         "predict_dataset = tf.data.Dataset.from_tensor_slices(predict_features).batch(1)\n",
         "inference_model.predict(predict_dataset)"
@@ -740,7 +858,7 @@
         "id": "O01VQIxCWBxU"
       },
       "source": [
-        "This composed model can be saved as a [SavedModel](https://www.tensorflow.org/guide/saved_model) for later use."
+        "This composed model can be saved as a `.keras` file for later use."
       ]
     },
     {
@@ -751,8 +869,8 @@
       },
       "outputs": [],
       "source": [
-        "inference_model.save('model')\n",
-        "restored_model = tf.keras.models.load_model('model')\n",
+        "inference_model.save('model.keras')\n",
+        "restored_model = tf.keras.models.load_model('model.keras')\n",
         "restored_model.predict(predict_dataset)"
       ]
     },
@@ -762,7 +880,7 @@
         "id": "IXMBwzggwUjI"
       },
       "source": [
-        "Note: Preprocessing layers are not trainable, which allows you to apply them *asynchronously* using `tf.data`. This has performence benefits, as you can both [prefetch](https://www.tensorflow.org/guide/data_performance#prefetching) preprocessed batches, and free up any accelerators to focus on the differentiable parts of a model. As this guide shows, seperating preprocessing during training and composing it during inference is a flexible way to leverage these performance gains. However, if your model is small or preprocessing time is negligable, it may be simpler to build preprocessing into a complete model from the start. To do this you can build a single model starting with `tf.keras.Input`, followed by preprocessing layers, followed by trainable layers."
+        "Note: Preprocessing layers are not trainable, which allows you to apply them *asynchronously* using `tf.data`. This has performance benefits, as you can both prefetch preprocessed batches, and free up any accelerators to focus on the differentiable parts of a model (learn more in the _Prefetching_ section of the [Better performance with the `tf.data` API](../data_performance.ipynb) guide). As this guide shows, separating preprocessing during training and composing it during inference is a flexible way to leverage these performance gains. However, if your model is small or preprocessing time is negligible, it may be simpler to build preprocessing into a complete model from the start. To do this you can build a single model starting with `tf.keras.Input`, followed by preprocessing layers, followed by trainable layers."
       ]
     },
     {
@@ -774,76 +892,78 @@
         "## Feature column equivalence table\n",
         "\n",
         "For reference, here is an approximate correspondence between feature columns and\n",
-        "preprocessing layers:<table>\n",
+        "Keras preprocessing layers:<table>\n",
         "  <tr>\n",
-        "    <th>Feature Column</th>\n",
-        "    <th>Keras Layer</th>\n",
+        "    <th>Feature column</th>\n",
+        "    <th>Keras layer</th>\n",
         "  </tr>\n",
         "  <tr>\n",
-        "    <td>`feature_column.bucketized_column`</td>\n",
-        "    <td>`layers.Discretization`</td>\n",
+        "    <td>`tf.feature_column.bucketized_column`</td>\n",
+        "    <td>`tf.keras.layers.Discretization`</td>\n",
         "  </tr>\n",
         "  <tr>\n",
-        "    <td>`feature_column.categorical_column_with_hash_bucket`</td>\n",
-        "    <td>`layers.Hashing`</td>\n",
+        "    <td>`tf.feature_column.categorical_column_with_hash_bucket`</td>\n",
+        "    <td>`tf.keras.layers.Hashing`</td>\n",
         "  </tr>\n",
         "  <tr>\n",
-        "    <td>`feature_column.categorical_column_with_identity`</td>\n",
-        "    <td>`layers.CategoryEncoding`</td>\n",
+        "    <td>`tf.feature_column.categorical_column_with_identity`</td>\n",
+        "    <td>`tf.keras.layers.CategoryEncoding`</td>\n",
         "  </tr>\n",
         "  <tr>\n",
-        "    <td>`feature_column.categorical_column_with_vocabulary_file`</td>\n",
-        "    <td>`layers.StringLookup` or `layers.IntegerLookup`</td>\n",
+        "    <td>`tf.feature_column.categorical_column_with_vocabulary_file`</td>\n",
+        "    <td>`tf.keras.layers.StringLookup` or `tf.keras.layers.IntegerLookup`</td>\n",
         "  </tr>\n",
         "  <tr>\n",
-        "    <td>`feature_column.categorical_column_with_vocabulary_list`</td>\n",
-        "    <td>`layers.StringLookup` or `layers.IntegerLookup`</td>\n",
+        "    <td>`tf.feature_column.categorical_column_with_vocabulary_list`</td>\n",
+        "    <td>`tf.keras.layers.StringLookup` or `tf.keras.layers.IntegerLookup`</td>\n",
         "  </tr>\n",
         "  <tr>\n",
-        "    <td>`feature_column.crossed_column`</td>\n",
-        "    <td>Not implemented.</td>\n",
+        "    <td>`tf.feature_column.crossed_column`</td>\n",
+        "    <td>`tf.keras.layers.experimental.preprocessing.HashedCrossing`</td>\n",
         "  </tr>\n",
         "  <tr>\n",
-        "    <td>`feature_column.embedding_column`</td>\n",
-        "    <td>`layers.Embedding`</td>\n",
+        "    <td>`tf.feature_column.embedding_column`</td>\n",
+        "    <td>`tf.keras.layers.Embedding`</td>\n",
         "  </tr>\n",
         "  <tr>\n",
-        "    <td>`feature_column.indicator_column`</td>\n",
+        "    <td>`tf.feature_column.indicator_column`</td>\n",
         "    <td>`output_mode='one_hot'` or `output_mode='multi_hot'`*</td>\n",
         "  </tr>\n",
         "  <tr>\n",
-        "    <td>`feature_column.numeric_column`</td>\n",
-        "    <td>`layers.Normalization`</td>\n",
+        "    <td>`tf.feature_column.numeric_column`</td>\n",
+        "    <td>`tf.keras.layers.Normalization`</td>\n",
         "  </tr>\n",
         "  <tr>\n",
-        "    <td>`feature_column.sequence_categorical_column_with_hash_bucket`</td>\n",
-        "    <td>`layers.Hashing`</td>\n",
+        "    <td>`tf.feature_column.sequence_categorical_column_with_hash_bucket`</td>\n",
+        "    <td>`tf.keras.layers.Hashing`</td>\n",
         "  </tr>\n",
         "  <tr>\n",
-        "    <td>`feature_column.sequence_categorical_column_with_identity`</td>\n",
-        "    <td>`layers.CategoryEncoding`</td>\n",
+        "    <td>`tf.feature_column.sequence_categorical_column_with_identity`</td>\n",
+        "    <td>`tf.keras.layers.CategoryEncoding`</td>\n",
         "  </tr>\n",
         "  <tr>\n",
-        "    <td>`feature_column.sequence_categorical_column_with_vocabulary_file`</td>\n",
-        "    <td>`layers.StringLookup`, `layers.IntegerLookup`, or `layer.TextVectorization`†</td>\n",
+        "    <td>`tf.feature_column.sequence_categorical_column_with_vocabulary_file`</td>\n",
+        "    <td>`tf.keras.layers.StringLookup`, `tf.keras.layers.IntegerLookup`, or `tf.keras.layer.TextVectorization`†</td>\n",
         "  </tr>\n",
         "  <tr>\n",
-        "    <td>`feature_column.sequence_categorical_column_with_vocabulary_list`</td>\n",
-        "    <td>`layers.StringLookup`, `layers.IntegerLookup`, or `layer.TextVectorization`†</td>\n",
+        "    <td>`tf.feature_column.sequence_categorical_column_with_vocabulary_list`</td>\n",
+        "    <td>`tf.keras.layers.StringLookup`, `tf.keras.layers.IntegerLookup`, or `tf.keras.layer.TextVectorization`†</td>\n",
         "  </tr>\n",
         "  <tr>\n",
-        "    <td>`feature_column.sequence_numeric_column`</td>\n",
-        "    <td>`layers.Normalization`</td>\n",
+        "    <td>`tf.feature_column.sequence_numeric_column`</td>\n",
+        "    <td>`tf.keras.layers.Normalization`</td>\n",
         "  </tr>\n",
         "  <tr>\n",
-        "    <td>`feature_column.weighted_categorical_column`</td>\n",
-        "    <td>`layers.CategoryEncoding`</td>\n",
+        "    <td>`tf.feature_column.weighted_categorical_column`</td>\n",
+        "    <td>`tf.keras.layers.CategoryEncoding`</td>\n",
         "  </tr>\n",
         "</table>\n",
         "\n",
-        "\\* `output_mode` can be passed to `layers.CategoryEncoding`, `layers.StringLookup`, `layers.IntegerLookup`, and `layers.TextVectorization`.\n",
+        "\\* The `output_mode` can be passed to `tf.keras.layers.CategoryEncoding`, `tf.keras.layers.StringLookup`, `tf.keras.layers.IntegerLookup`, and `tf.keras.layers.TextVectorization`.\n",
+        "\n",
+        "† `tf.keras.layers.TextVectorization` can handle freeform text input directly (for example, entire sentences or paragraphs). This is not one-to-one replacement for categorical sequence handling in TensorFlow 1, but may offer a convenient replacement for ad-hoc text preprocessing.\n",
         "\n",
-        "† `layers.TextVectorization` can handle freeform text input directly (e.g. entire sentences or paragraphs). This is not one-to-one replacement for categorical sequence handling in TF1, but may offer a convinient replacement for ad-hoc text preprocessing."
+        "Note: Linear estimators, such as `tf.estimator.LinearClassifier`, can handle direct categorical input (integer indices) without an `embedding_column` or `indicator_column`. However, integer indices cannot be passed directly to `tf.keras.layers.Dense` or `tf.keras.experimental.LinearModel`. These inputs should be first encoded with `tf.layers.CategoryEncoding` with `output_mode='count'` (and `sparse=True` if the category sizes are large) before calling into `Dense` or `LinearModel`."
       ]
     },
     {
@@ -852,10 +972,10 @@
         "id": "AQCJ6lM3YDq_"
       },
       "source": [
-        "## Next Steps\n",
+        "## Next steps\n",
         "\n",
-        " - For more information on keras preprocessing layers, see [the guide to preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers).\n",
-        " - For a more in-depth example of applying preprocessing layers to structured data, see [the structured data tutorial](https://www.tensorflow.org/tutorials/structured_data/preprocessing_layers)."
+        " - For more information on Keras preprocessing layers, go to the [Working with preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers) guide.\n",
+        " - For a more in-depth example of applying preprocessing layers to structured data, refer to the [Classify structured data using Keras preprocessing layers](../../tutorials/structured_data/preprocessing_layers.ipynb) tutorial."
       ]
     }
   ],
@@ -863,7 +983,6 @@
     "colab": {
       "collapsed_sections": [],
       "name": "migrating_feature_columns.ipynb",
-      "provenance": [],
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/guide/migrate/migration_debugging.ipynb b/site/en/guide/migrate/migration_debugging.ipynb
new file mode 100644
index 00000000000..25cb7f9065f
--- /dev/null
+++ b/site/en/guide/migrate/migration_debugging.ipynb
@@ -0,0 +1,799 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "FEL3NlTTDlSX"
+      },
+      "source": [
+        "##### Copyright 2021 The TensorFlow Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "FlUw7tSKbtg4"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "77z2OchJTk0l"
+      },
+      "source": [
+        "# Debug a TensorFlow 2 migrated training pipeline\n",
+        "\n",
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/guide/migrate/migration_debugging\">\n",
+        "    <img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />\n",
+        "    View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/guide/migrate/migration_debugging.ipynb\">\n",
+        "    <img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />\n",
+        "    Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/guide/migrate/migration_debugging.ipynb\">\n",
+        "    <img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />\n",
+        "    View source on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/guide/migrate/migration_debugging.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "zTwPu-w6M5sz"
+      },
+      "source": [
+        "This notebook demonstrates how to debug a training pipeline when migrating to TensorFlow 2 (TF2). It consists of following components:\n",
+        "1. Suggested steps and code samples for debugging training pipeline\n",
+        "2. Tools for debugging\n",
+        "3. Other related resources\n",
+        "\n",
+        "One assumption is you have the TensorFlow 1 (TF1.x) code and trained models for comparison, and you want to build a TF2 model that achieves similar validation accuracy.\n",
+        "\n",
+        "This notebook does **NOT** cover debugging performance issues for training/inference speed or memory usage."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fKm9R4CtOAP3"
+      },
+      "source": [
+        "## Debugging workflow\n",
+        "\n",
+        "Below is a general workflow for debugging your TF2 training pipelines. Note that you do not need to follow these steps in order. You can also use a binary search approach where you test the model in an intermediate step and narrow down the debugging scope. \n",
+        "\n",
+        "1.  Fix compile and runtime errors\n",
+        "\n",
+        "2.  Single forward pass validation (in a separate\n",
+        "    [guide](./validate_correctness.ipynb))\n",
+        "\n",
+        "    a. On single CPU device\n",
+        "\n",
+        "    *   Verify variables are created only once\n",
+        "    *   Check variable counts, names, and shapes match\n",
+        "    *   Reset all variables, check numerical equivalence with all randomness\n",
+        "        disabled\n",
+        "    *   Align random number generation, check numerical equivalence in inference\n",
+        "    *   (Optional) Check checkpoints are loaded properly and TF1.x/TF2 models\n",
+        "        generate identical output\n",
+        "\n",
+        "    b. On single GPU/TPU device\n",
+        "\n",
+        "    c. With multi-device strategies\n",
+        "\n",
+        "3.  Model training numerical equivalence validation for a few steps (code\n",
+        "    samples available below)\n",
+        "\n",
+        "    a. Single training step validation using small and fixed data on single CPU\n",
+        "    device. Specifically, check numerical equivalence for the following\n",
+        "    components\n",
+        "\n",
+        "    *   losses computation\n",
+        "    *   metrics\n",
+        "    *   learning rate\n",
+        "    *   gradient computation and update\n",
+        "\n",
+        "    b. Check statistics after training 3 or more steps to verify optimizer behaviors like the momentum, still with fixed data on single CPU device\n",
+        "\n",
+        "    c. On single GPU/TPU device\n",
+        "\n",
+        "    d. With multi-device strategies (check the intro for [MultiProcessRunner](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/distribute/multi_process_runner.py#L108) at the bottom)\n",
+        "\n",
+        "4.  End-to-end convergence testing on real dataset\n",
+        "\n",
+        "    a. Check training behaviors with TensorBoard\n",
+        "\n",
+        "    *   use simple optimizers e.g., SGD and simple distribution strategies e.g.\n",
+        "        `tf.distribute.OneDeviceStrategy` first\n",
+        "    *   training metrics\n",
+        "    *   evaluation metrics\n",
+        "    *   figure out what the reasonable tolerance for inherent randomness is\n",
+        "\n",
+        "    b. Check equivalence with advanced optimizer/learning rate\n",
+        "    scheduler/distribution strategies\n",
+        "\n",
+        "    c. Check equivalence when using mixed precision\n",
+        "\n",
+        "5.  Additional product benchmarks"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XKakQBI9-FLb"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "i1ghHyXl-Oqd"
+      },
+      "outputs": [],
+      "source": [
+        "# The `DeterministicRandomTestTool` is only available from Tensorflow 2.8:\n",
+        "!pip install -q \"tensorflow==2.9.*\""
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "usyRSlIRl3r2"
+      },
+      "source": [
+        "### Single forward pass validation \n",
+        "\n",
+        "Single forward pass validation, including checkpoint loading, is covered in a different [colab](./validate_correctness.ipynb)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "HVBQbsZeVL_V"
+      },
+      "outputs": [],
+      "source": [
+        "import sys\n",
+        "import unittest\n",
+        "import numpy as np\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "import tensorflow.compat.v1 as v1"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4M104dt7m5cC"
+      },
+      "source": [
+        "### Model training numerical equivalence validation for a few steps"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "v2Nz2Ni1EkMz"
+      },
+      "source": [
+        "Set up model configuration and prepare a fake dataset."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "hUxXadzKU9rT"
+      },
+      "outputs": [],
+      "source": [
+        "params = {\n",
+        "    'input_size': 3,\n",
+        "    'num_classes': 3,\n",
+        "    'layer_1_size': 2,\n",
+        "    'layer_2_size': 2,\n",
+        "    'num_train_steps': 100,\n",
+        "    'init_lr': 1e-3,\n",
+        "    'end_lr': 0.0,\n",
+        "    'decay_steps': 1000,\n",
+        "    'lr_power': 1.0,\n",
+        "}\n",
+        "\n",
+        "# make a small fixed dataset\n",
+        "fake_x = np.ones((2, params['input_size']), dtype=np.float32)\n",
+        "fake_y = np.zeros((2, params['num_classes']), dtype=np.int32)\n",
+        "fake_y[0][0] = 1\n",
+        "fake_y[1][1] = 1\n",
+        "\n",
+        "step_num = 3"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "lV_n3Ukmz4Un"
+      },
+      "source": [
+        "Define the TF1.x model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ATa5fzL8mAwl"
+      },
+      "outputs": [],
+      "source": [
+        "# Assume there is an existing TF1.x model using estimator API\n",
+        "# Wrap the model_fn to log necessary tensors for result comparison\n",
+        "class SimpleModelWrapper():\n",
+        "  def __init__(self):\n",
+        "    self.logged_ops = {}\n",
+        "    self.logs = {\n",
+        "        'step': [],\n",
+        "        'lr': [],\n",
+        "        'loss': [],\n",
+        "        'grads_and_vars': [],\n",
+        "        'layer_out': []}\n",
+        "     \n",
+        "  def model_fn(self, features, labels, mode, params):\n",
+        "      out_1 = tf.compat.v1.layers.dense(features, units=params['layer_1_size'])\n",
+        "      out_2 = tf.compat.v1.layers.dense(out_1, units=params['layer_2_size'])\n",
+        "      logits = tf.compat.v1.layers.dense(out_2, units=params['num_classes'])\n",
+        "      loss = tf.compat.v1.losses.softmax_cross_entropy(labels, logits)\n",
+        "\n",
+        "      # skip EstimatorSpec details for prediction and evaluation \n",
+        "      if mode == tf.estimator.ModeKeys.PREDICT:\n",
+        "          pass\n",
+        "      if mode == tf.estimator.ModeKeys.EVAL:\n",
+        "          pass\n",
+        "      assert mode == tf.estimator.ModeKeys.TRAIN\n",
+        "\n",
+        "      global_step = tf.compat.v1.train.get_or_create_global_step()\n",
+        "      lr = tf.compat.v1.train.polynomial_decay(\n",
+        "        learning_rate=params['init_lr'],\n",
+        "        global_step=global_step,\n",
+        "        decay_steps=params['decay_steps'],\n",
+        "        end_learning_rate=params['end_lr'],\n",
+        "        power=params['lr_power'])\n",
+        "      \n",
+        "      optmizer = tf.compat.v1.train.GradientDescentOptimizer(lr)\n",
+        "      grads_and_vars = optmizer.compute_gradients(\n",
+        "          loss=loss,\n",
+        "          var_list=graph.get_collection(\n",
+        "              tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES))\n",
+        "      train_op = optmizer.apply_gradients(\n",
+        "          grads_and_vars,\n",
+        "          global_step=global_step)\n",
+        "      \n",
+        "      # log tensors\n",
+        "      self.logged_ops['step'] = global_step\n",
+        "      self.logged_ops['lr'] = lr\n",
+        "      self.logged_ops['loss'] = loss\n",
+        "      self.logged_ops['grads_and_vars'] = grads_and_vars\n",
+        "      self.logged_ops['layer_out'] = {\n",
+        "          'layer_1': out_1,\n",
+        "          'layer_2': out_2,\n",
+        "          'logits': logits}\n",
+        "\n",
+        "      return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)\n",
+        "\n",
+        "  def update_logs(self, logs):\n",
+        "    for key in logs.keys():\n",
+        "      model_tf1.logs[key].append(logs[key])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kki9yILSKS7f"
+      },
+      "source": [
+        "The following [`v1.keras.utils.DeterministicRandomTestTool`](https://www.tensorflow.org/api_docs/python/tf/compat/v1/keras/utils/DeterministicRandomTestTool) class provides a context manager `scope()` that can make stateful random operations use the same seed across both TF1 graphs/sessions and eager execution,\n",
+        "\n",
+        "The tool provides two testing modes: \n",
+        "1. `constant` which uses the same seed for every single operation no matter how many times it has been called and,\n",
+        "2. `num_random_ops` which uses the number of previously-observed stateful random operations as the operation seed.\n",
+        "\n",
+        "This applies both to the stateful random operations used for creating and initializing variables, and to the stateful random operations used in computation (such as for dropout layers)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "X6Y3RWMoKOl8"
+      },
+      "outputs": [],
+      "source": [
+        "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mk5-ZzxcErX5"
+      },
+      "source": [
+        "Run the TF1.x model in graph mode. Collect statistics for first 3 training steps for numerical equivalence comparison."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "r5zhJHvsWA24"
+      },
+      "outputs": [],
+      "source": [
+        "with random_tool.scope():\n",
+        "  graph = tf.Graph()\n",
+        "  with graph.as_default(), tf.compat.v1.Session(graph=graph) as sess:\n",
+        "    model_tf1 = SimpleModelWrapper()\n",
+        "    # build the model\n",
+        "    inputs = tf.compat.v1.placeholder(tf.float32, shape=(None, params['input_size']))\n",
+        "    labels = tf.compat.v1.placeholder(tf.float32, shape=(None, params['num_classes']))\n",
+        "    spec = model_tf1.model_fn(inputs, labels, tf.estimator.ModeKeys.TRAIN, params)\n",
+        "    train_op = spec.train_op\n",
+        "\n",
+        "    sess.run(tf.compat.v1.global_variables_initializer())\n",
+        "    for step in range(step_num):\n",
+        "      # log everything and update the model for one step\n",
+        "      logs, _ = sess.run(\n",
+        "          [model_tf1.logged_ops, train_op],\n",
+        "          feed_dict={inputs: fake_x, labels: fake_y})\n",
+        "      model_tf1.update_logs(logs)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "eZxjI8Nxz9Ea"
+      },
+      "source": [
+        "Define the TF2 model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "AA67rh2TkS1M"
+      },
+      "outputs": [],
+      "source": [
+        "class SimpleModel(tf.keras.Model):\n",
+        "  def __init__(self, params, *args, **kwargs):\n",
+        "    super(SimpleModel, self).__init__(*args, **kwargs)\n",
+        "    # define the model\n",
+        "    self.dense_1 = tf.keras.layers.Dense(params['layer_1_size'])\n",
+        "    self.dense_2 = tf.keras.layers.Dense(params['layer_2_size'])\n",
+        "    self.out = tf.keras.layers.Dense(params['num_classes'])\n",
+        "    learning_rate_fn = tf.keras.optimizers.schedules.PolynomialDecay(\n",
+        "      initial_learning_rate=params['init_lr'],\n",
+        "      decay_steps=params['decay_steps'],\n",
+        "      end_learning_rate=params['end_lr'],\n",
+        "      power=params['lr_power'])  \n",
+        "    self.optimizer = tf.keras.optimizers.legacy.SGD(learning_rate_fn)\n",
+        "    self.compiled_loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)\n",
+        "    self.logs = {\n",
+        "        'lr': [],\n",
+        "        'loss': [],\n",
+        "        'grads': [],\n",
+        "        'weights': [],\n",
+        "        'layer_out': []}\n",
+        "\n",
+        "  def call(self, inputs):\n",
+        "    out_1 = self.dense_1(inputs)\n",
+        "    out_2 = self.dense_2(out_1)\n",
+        "    logits = self.out(out_2)\n",
+        "    # log output features for every layer for comparison\n",
+        "    layer_wise_out = {\n",
+        "        'layer_1': out_1,\n",
+        "        'layer_2': out_2,\n",
+        "        'logits': logits}\n",
+        "    self.logs['layer_out'].append(layer_wise_out)\n",
+        "    return logits\n",
+        "\n",
+        "  def train_step(self, data):\n",
+        "    x, y = data\n",
+        "    with tf.GradientTape() as tape:\n",
+        "      logits = self(x)\n",
+        "      loss = self.compiled_loss(y, logits)\n",
+        "    grads = tape.gradient(loss, self.trainable_weights)\n",
+        "    # log training statistics\n",
+        "    step = self.optimizer.iterations.numpy()\n",
+        "    self.logs['lr'].append(self.optimizer.learning_rate(step).numpy())\n",
+        "    self.logs['loss'].append(loss.numpy())\n",
+        "    self.logs['grads'].append(grads)\n",
+        "    self.logs['weights'].append(self.trainable_weights)\n",
+        "    # update model\n",
+        "    self.optimizer.apply_gradients(zip(grads, self.trainable_weights))\n",
+        "    return"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "I5smAcaEE8nX"
+      },
+      "source": [
+        "Run the TF2 model in eager mode. Collect statistics for first 3 training steps for numerical equivalence comparison."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Q0AbXF_eE8cS"
+      },
+      "outputs": [],
+      "source": [
+        "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n",
+        "with random_tool.scope():\n",
+        "  model_tf2 = SimpleModel(params)\n",
+        "  for step in range(step_num):\n",
+        "    model_tf2.train_step([fake_x, fake_y])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "cjJDjLcAz_gU"
+      },
+      "source": [
+        "Compare numerical equivalence for first few training steps.\n",
+        "\n",
+        "You can also check the [Validating correctness & numerical equivalence notebook](./validate_correctness.ipynb) for additional advice for numerical equivalence."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "6CbCUbsCiabC"
+      },
+      "outputs": [],
+      "source": [
+        "np.testing.assert_allclose(model_tf1.logs['lr'], model_tf2.logs['lr'])\n",
+        "np.testing.assert_allclose(model_tf1.logs['loss'], model_tf2.logs['loss'])\n",
+        "for step in range(step_num):\n",
+        "  for name in model_tf1.logs['layer_out'][step]:\n",
+        "    np.testing.assert_allclose(\n",
+        "        model_tf1.logs['layer_out'][step][name],\n",
+        "        model_tf2.logs['layer_out'][step][name])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dhVuuciimLIY"
+      },
+      "source": [
+        "#### Unit tests"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "sXZYFC6Hhqeb"
+      },
+      "source": [
+        "There are a few types of unit testing that can help debug your migration code.\n",
+        "1. Single forward pass validation\n",
+        "2. Model training numerical equivalence validation for a few steps\n",
+        "3. Benchmark inference performance\n",
+        "4. The trained model makes correct predictions on fixed and simple data points\n",
+        "\n",
+        "You can use  `@parameterized.parameters` to test models with different configurations. [Details with code sample](https://github.com/abseil/abseil-py/blob/master/absl/testing/parameterized.py).\n",
+        "\n",
+        "Note that it's possible to run session APIs and eager execution in the same test case. The code snippets below show how."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "CdHqkgPPM2Bj"
+      },
+      "outputs": [],
+      "source": [
+        "import unittest\n",
+        "\n",
+        "class TestNumericalEquivalence(unittest.TestCase):\n",
+        "\n",
+        "  # copied from code samples above\n",
+        "  def setup(self):\n",
+        "    # record statistics for 100 training steps\n",
+        "    step_num = 100\n",
+        "\n",
+        "    # setup TF 1 model\n",
+        "    random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n",
+        "    with random_tool.scope():\n",
+        "      # run TF1.x code in graph mode with context management\n",
+        "      graph = tf.Graph()\n",
+        "      with graph.as_default(), tf.compat.v1.Session(graph=graph) as sess:\n",
+        "        self.model_tf1 = SimpleModelWrapper()\n",
+        "        # build the model\n",
+        "        inputs = tf.compat.v1.placeholder(tf.float32, shape=(None, params['input_size']))\n",
+        "        labels = tf.compat.v1.placeholder(tf.float32, shape=(None, params['num_classes']))\n",
+        "        spec = self.model_tf1.model_fn(inputs, labels, tf.estimator.ModeKeys.TRAIN, params)\n",
+        "        train_op = spec.train_op\n",
+        "\n",
+        "        sess.run(tf.compat.v1.global_variables_initializer())\n",
+        "        for step in range(step_num):\n",
+        "          # log everything and update the model for one step\n",
+        "          logs, _ = sess.run(\n",
+        "              [self.model_tf1.logged_ops, train_op],\n",
+        "              feed_dict={inputs: fake_x, labels: fake_y})\n",
+        "          self.model_tf1.update_logs(logs)\n",
+        "\n",
+        "    # setup TF2 model\n",
+        "    random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n",
+        "    with random_tool.scope():\n",
+        "      self.model_tf2 = SimpleModel(params)\n",
+        "      for step in range(step_num):\n",
+        "        self.model_tf2.train_step([fake_x, fake_y])\n",
+        "  \n",
+        "  def test_learning_rate(self):\n",
+        "    np.testing.assert_allclose(\n",
+        "        self.model_tf1.logs['lr'],\n",
+        "        self.model_tf2.logs['lr'])\n",
+        "\n",
+        "  def test_training_loss(self):\n",
+        "    # adopt different tolerance strategies before and after 10 steps\n",
+        "    first_n_step = 10\n",
+        "\n",
+        "    # absolute difference is limited below 1e-5\n",
+        "    # set `equal_nan` to be False to detect potential NaN loss issues\n",
+        "    abosolute_tolerance = 1e-5\n",
+        "    np.testing.assert_allclose(\n",
+        "        actual=self.model_tf1.logs['loss'][:first_n_step],\n",
+        "        desired=self.model_tf2.logs['loss'][:first_n_step],\n",
+        "        atol=abosolute_tolerance,\n",
+        "        equal_nan=False)\n",
+        "    \n",
+        "    # relative difference is limited below 5%\n",
+        "    relative_tolerance = 0.05\n",
+        "    np.testing.assert_allclose(self.model_tf1.logs['loss'][first_n_step:],\n",
+        "                               self.model_tf2.logs['loss'][first_n_step:],\n",
+        "                               rtol=relative_tolerance,\n",
+        "                               equal_nan=False)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gshSQdKIddpZ"
+      },
+      "source": [
+        "## Debugging tools"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "CkMfCaJRclKv"
+      },
+      "source": [
+        "### tf.print\n",
+        "\n",
+        "tf.print vs print/logging.info\n",
+        "\n",
+        "- With configurable arguments, `tf.print` can recursively display the first and last few elements of each dimension for printed tensors. Check the [API docs](https://www.tensorflow.org/api_docs/python/tf/print) for details.\n",
+        "- For eager execution, both `print` and `tf.print` print the value of the tensor. But `print` may involve device-to-host copy, which can potentially slow down your code. \n",
+        "- For graph mode including usage inside `tf.function`, you need to use `tf.print` to print the actual tensor value. `tf.print` is compiled into an op in the graph, whereas `print` and `logging.info` only log at tracing time, which is often not what you want. \n",
+        "- `tf.print` also supports printing composite tensors like `tf.RaggedTensor` and `tf.sparse.SparseTensor`.\n",
+        "- You can also use a callback to monitor metrics and variables. Please check how to use custom callbacks with [logs dict](https://www.tensorflow.org/guide/keras/custom_callback#usage_of_logs_dict) and [self.model attribute](https://www.tensorflow.org/guide/keras/custom_callback#usage_of_selfmodel_attribute)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "S-5h3cX8Dc50"
+      },
+      "source": [
+        "tf.print vs print inside tf.function"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "gRED9FMyDKih"
+      },
+      "outputs": [],
+      "source": [
+        "# `print` prints info of tensor object\n",
+        "# `tf.print` prints the tensor value\n",
+        "@tf.function\n",
+        "def dummy_func(num):\n",
+        "  num += 1\n",
+        "  print(num)\n",
+        "  tf.print(num)\n",
+        "  return num\n",
+        "\n",
+        "_ = dummy_func(tf.constant([1.0]))\n",
+        "\n",
+        "# Output:\n",
+        "# Tensor(\"add:0\", shape=(1,), dtype=float32)\n",
+        "# [2]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3QroLA_zDK2w"
+      },
+      "source": [
+        "tf.distribute.Strategy\n",
+        "\n",
+        "-  If the `tf.function` containing `tf.print` is executed on the workers, for example when using `TPUStrategy` or `ParameterServerStrategy`, you need to check worker/parameter server logs to find the printed values.\n",
+        "- For `print` or `logging.info`, logs will be printed on the coordinator when using `ParameterServerStrategy`, and logs will be printed on the STDOUT on worker0 when using TPUs.\n",
+        "\n",
+        "tf.keras.Model\n",
+        "- When using Sequential and Functional API models, if you want to print values, e.g., model inputs or intermediate features after some layers, you have following options.\n",
+        "  1. [Write a custom layer](https://www.tensorflow.org/guide/keras/custom_layers_and_models) that `tf.print` the inputs. \n",
+        "  2. Include the intermediate outputs you want to inspect in the model outputs.\n",
+        "- `tf.keras.layers.Lambda` layers have (de)serialization limitations. To avoid checkpoint loading issues, write a custom subclassed layer instead. Check the [API docs](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Lambda) for more details. \n",
+        "- You can't `tf.print` intermediate outputs in a `tf.keras.callbacks.LambdaCallback` if you don't have access to the actual values, but instead only to the symbolic Keras tensor objects.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "aKazGTr1ZUMG"
+      },
+      "source": [
+        "Option 1: write a custom layer"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "8w4aY7wO0B4W"
+      },
+      "outputs": [],
+      "source": [
+        "class PrintLayer(tf.keras.layers.Layer):\n",
+        "  def call(self, inputs):\n",
+        "    tf.print(inputs)\n",
+        "    return inputs\n",
+        "\n",
+        "def get_model():\n",
+        "  inputs = tf.keras.layers.Input(shape=(1,))\n",
+        "  out_1 = tf.keras.layers.Dense(4)(inputs)\n",
+        "  out_2 = tf.keras.layers.Dense(1)(out_1)\n",
+        "  # use custom layer to tf.print intermediate features\n",
+        "  out_3 = PrintLayer()(out_2)\n",
+        "  model = tf.keras.Model(inputs=inputs, outputs=out_3)\n",
+        "  return model\n",
+        "\n",
+        "model = get_model()\n",
+        "model.compile(optimizer=\"adam\", loss=\"mse\")\n",
+        "model.fit([1, 2, 3], [0.0, 0.0, 1.0])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KNESOatq7iM9"
+      },
+      "source": [
+        "Option 2: include the intermediate outputs you want to inspect in the model outputs.\n",
+        "\n",
+        "Note that in such case, you may need some [customizations](https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit) to use `Model.fit`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "MiifvdLk7g9J"
+      },
+      "outputs": [],
+      "source": [
+        "def get_model():\n",
+        "  inputs = tf.keras.layers.Input(shape=(1,))\n",
+        "  out_1 = tf.keras.layers.Dense(4)(inputs)\n",
+        "  out_2 = tf.keras.layers.Dense(1)(out_1)\n",
+        "  # include intermediate values in model outputs\n",
+        "  model = tf.keras.Model(\n",
+        "      inputs=inputs,\n",
+        "      outputs={\n",
+        "          'inputs': inputs,\n",
+        "          'out_1': out_1,\n",
+        "          'out_2': out_2})\n",
+        "  return model"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MvIKDZpHSLmQ"
+      },
+      "source": [
+        "### pdb\n",
+        "You can use [pdb](https://docs.python.org/3/library/pdb.html) both in terminal and Colab to inspect intermediate values for debugging.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Qu0n4O2umyT7"
+      },
+      "source": [
+        "### Visualize graph with TensorBoard\n",
+        "\n",
+        "You can [examine the TensorFlow graph with TensorBoard](https://www.tensorflow.org/tensorboard/graphs). TensorBoard is also [supported on colab](https://www.tensorflow.org/tensorboard/tensorboard_in_notebooks). TensorBoard is a great tool to visualize summaries. You can use it to compare learning rate, model weights, gradient scale, training/validation metrics, or even model intermediate outputs between TF1.x model and migrated TF2 model through the training process and seeing if the values look as expected."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vBnxB6_xzlnT"
+      },
+      "source": [
+        "### TensorFlow Profiler\n",
+        "\n",
+        "[TensorFlow Profiler](https://www.tensorflow.org/guide/profiler) can help you visualize the execution timeline on GPUs/TPUs. You can check out this [Colab Demo](https://www.tensorflow.org/tensorboard/tensorboard_profiling_keras) for its basic usage."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9wNmCSHBpiGM"
+      },
+      "source": [
+        "### MultiProcessRunner\n",
+        "[MultiProcessRunner](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/distribute/multi_process_runner.py#L108) is a useful tool when debugging with MultiWorkerMirroredStrategy and ParameterServerStrategy. You can take a look at [this concrete example](https://github.com/keras-team/keras/blob/master/keras/integration_test/mwms_multi_process_runner_test.py) for its usage.\n",
+        "\n",
+        "Specifically for the cases of these two strategies, you are recommended to 1) not only have unit tests to cover their flow, 2) but also to attempt to reproduce failures using it in unit test to avoid launch real distributed job every time when they attempt a fix."
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "migration_debugging.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/guide/migrate/model_mapping.ipynb b/site/en/guide/migrate/model_mapping.ipynb
index 9970405c45e..2d4582839c0 100644
--- a/site/en/guide/migrate/model_mapping.ipynb
+++ b/site/en/guide/migrate/model_mapping.ipynb
@@ -123,8 +123,9 @@
       },
       "outputs": [],
       "source": [
-        "# Install tf-nightly as the model mapping shim is available only in \n",
-        "# TensorFlow 2.7\n",
+        "# Install tf-nightly as the DeterministicRandomTestTool is available only in\n",
+        "# Tensorflow 2.8\n",
+        "\n",
         "!pip install -q tf-nightly"
       ]
     },
@@ -141,7 +142,6 @@
         "import sys\n",
         "import numpy as np\n",
         "\n",
-        "from unittest import mock\n",
         "from contextlib import contextmanager"
       ]
     },
@@ -617,65 +617,71 @@
       "source": [
         "## Nesting `tf.Variable`s, `tf.Module`s, `tf.keras.layers` & `tf.keras.models` in decorated calls\n",
         "\n",
-        "Decorating your layer call in `tf.compat.v1.keras.utils.track_tf1_style_variables` will only add automatic implicit tracking of variables created (and reused) via `tf.compat.v1.get_variable`. It will not capture weights directly created by `tf.Variable` calls, such as those used by typical Keras layers and most `tf.Module`s. You still need to explicitly track these in the same way you would for any other Keras layer or `tf.Module`.\n",
-        "\n",
-        "If you need to embed `tf.Variable` calls, Keras layers/models, or `tf.Module`s in your decorators (either because you are following the incremental migration to Native TF2 described later in this guide, or because your TF1.x code partially consisted of Keras modules):\n",
-        "* Explicitly make sure that the variable/module/layer is only created once\n",
-        "* Explicitly attach them as instance attributes just as you would when defining a [typical module/layer](https://www.tensorflow.org/guide/intro_to_modules#defining_models_and_layers_in_tensorflow)\n",
-        "* Explicitly reuse the already-created object in follow-on calls\n",
+        "Decorating your layer call in `tf.compat.v1.keras.utils.track_tf1_style_variables` will only add automatic implicit tracking of variables created (and reused) via `tf.compat.v1.get_variable`. It will not capture weights directly created by `tf.Variable` calls, such as those used by typical Keras layers and most `tf.Module`s. This section describes how to handle these nested cases.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Azxza3bVOZlv"
+      },
+      "source": [
+        "###  (Pre-existing usages) `tf.keras.layers` and `tf.keras.models`\n",
         "\n",
-        "This ensures that weights are not created new and are correctly resued. Additionally, this also ensures that existing weights and regularization losses get tracked.\n",
+        "For pre-existing usages of nested Keras layers and models, use `tf.compat.v1.keras.utils.get_or_create_layer`. This is only recommended for easing migration of existing TF1.x nested Keras usages; new code should use explicit attribute setting as described below for tf.Variables and tf.Modules.\n",
         "\n",
-        "Here is an example of how this could look:"
+        "To use `tf.compat.v1.keras.utils.get_or_create_layer`, wrap the code that constructs your nested model into a method, and pass it in to the method. Example:"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "mrRPPoJ5ap5U"
+        "id": "LN15TcRgHKsq"
       },
       "outputs": [],
       "source": [
-        "class WrappedDenseLayer(tf.keras.layers.Layer):\n",
+        "class NestedModel(tf.keras.Model):\n",
         "\n",
-        "  def __init__(self, units, **kwargs):\n",
-        "    super().__init__(**kwargs)\n",
+        "  def __init__(self, units, *args, **kwargs):\n",
+        "    super().__init__(*args, **kwargs)\n",
         "    self.units = units\n",
-        "    self._dense_model = None\n",
+        "\n",
+        "  def build_model(self):\n",
+        "    inp = tf.keras.Input(shape=(5, 5))\n",
+        "    dense_layer = tf.keras.layers.Dense(\n",
+        "        10, name=\"dense\", kernel_regularizer=\"l2\",\n",
+        "        kernel_initializer=tf.compat.v1.ones_initializer())\n",
+        "    model = tf.keras.Model(inputs=inp, outputs=dense_layer(inp))\n",
+        "    return model\n",
         "\n",
         "  @tf.compat.v1.keras.utils.track_tf1_style_variables\n",
         "  def call(self, inputs):\n",
-        "    # Create the nested tf.variable/module/layer/model\n",
-        "    # only if it has not been created already\n",
-        "    if not self._dense_model:\n",
-        "      inp = tf.keras.Input(shape=inputs.shape)\n",
-        "      dense_layer = tf.keras.layers.Dense(\n",
-        "          self.units, name=\"dense\",\n",
-        "          kernel_regularizer=\"l2\")\n",
-        "      self._dense_model = tf.keras.Model(\n",
-        "          inputs=inp, outputs=dense_layer(inp))\n",
-        "    return self._dense_model(inputs)\n",
-        "\n",
-        "layer = WrappedDenseLayer(10)\n",
+        "    # Get or create a nested model without assigning it as an explicit property\n",
+        "    model = tf.compat.v1.keras.utils.get_or_create_layer(\n",
+        "        \"dense_model\", self.build_model)\n",
+        "    return model(inputs)\n",
         "\n",
-        "layer(tf.ones(shape=(5, 5)))"
+        "layer = NestedModel(10)\n",
+        "layer(tf.ones(shape=(5,5)))"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "Lo9h6wc6bmEF"
+        "id": "DgsKlltPHI8z"
       },
       "source": [
-        "The weights are correctly tracked:"
+        "This method ensures that these nested layers are correctly reused and tracked by tensorflow. Note that the `@track_tf1_style_variables` decorator is still required on the appropriate method. The model builder method passed into `get_or_create_layer` (in this case, `self.build_model`), should take no arguments.\n",
+        "\n",
+        "Weights are tracked:"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "Qt6USaTVbauM"
+        "id": "3zO5A78MJsqO"
       },
       "outputs": [],
       "source": [
@@ -690,55 +696,46 @@
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "oyH4lIcPb45r"
+        "id": "o3Xsi-JbKTuj"
       },
       "source": [
-        "As is the regularization loss (if present):"
+        "And regularization loss as well:"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "N7cmuhRGbfFt"
+        "id": "mdK5RGm5KW5C"
       },
       "outputs": [],
       "source": [
-        "regularization_loss = tf.add_n(layer.losses)\n",
-        "regularization_loss"
+        "tf.add_n(layer.losses)"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "FsTgnydkdezQ"
+        "id": "J_VRycQYJrXu"
       },
       "source": [
-        "### Guidance on variable names\n",
-        "\n",
-        "Explicit `tf.Variable` calls and Keras layers use a different layer name / variable name autogeneration mechanism than you may be used to from the combination of `get_variable` and `variable_scopes`. Although the shim will make your variable names match for variables created by `get_variable` even when going from TF1.x graphs to TF2 eager execution & `tf.function`, it cannot guarantee the same for the variable names generated for `tf.Variable` calls and Keras layers that you embed within your method decorators. It is even possible for multiple variables to share the same name in TF2 eager execution and `tf.function`.\n",
+        "###  Incremental migration: `tf.Variables` and `tf.Modules`\n",
         "\n",
-        "You should take special care with this when following the sections on validating correctness and mapping TF1.x checkpoints later on in this guide."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "mSFaHTCvhUso"
-      },
-      "source": [
-        "### Nesting layers/modules that use `@track_tf1_style_variables`\n",
+        "If you need to embed `tf.Variable` calls or `tf.Module`s in your decorated methods (for example, if you are following the incremental migration to non-legacy TF2 APIs described later in this guide), you still need to explicitly track these, with the following requirements:\n",
+        "* Explicitly make sure that the variable/module/layer is only created once\n",
+        "* Explicitly attach them as instance attributes just as you would when defining a [typical module or layer](https://www.tensorflow.org/guide/intro_to_modules#defining_models_and_layers_in_tensorflow)\n",
+        "* Explicitly reuse the already-created object in follow-on calls\n",
         "\n",
-        "If you are nesting one layer that uses the `@track_tf1_style_variables` decorator inside of another, you should treat it the same way you would treat any Keras layer or `tf.Module` that did not use `get_variable` to create its variables.\n",
+        "This ensures that weights are not created new each call and are correctly reused. Additionally, this also ensures that existing weights and regularization losses get tracked.\n",
         "\n",
-        "For example,"
+        "Here is an example of how this could look:"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "SI5V-1JLhTfW"
+        "id": "mrRPPoJ5ap5U"
       },
       "outputs": [],
       "source": [
@@ -749,9 +746,9 @@
         "    self.units = units\n",
         "\n",
         "  @tf.compat.v1.keras.utils.track_tf1_style_variables\n",
-        "  def call(self, inputs):\n",
+        "  def __call__(self, inputs):\n",
         "    out = inputs\n",
-        "    with tf.compat.v1.variable_scope(\"dense\"):\n",
+        "    with tf.compat.v1.variable_scope(\"inner_dense\"):\n",
         "      # The weights are created with a `regularizer`,\n",
         "      # so the layer should track their regularization losses\n",
         "      kernel = tf.compat.v1.get_variable(\n",
@@ -785,29 +782,81 @@
         "\n",
         "layer = WrappedDenseLayer(10)\n",
         "\n",
-        "layer(tf.ones(shape=(5, 5)))\n",
+        "layer(tf.ones(shape=(5, 5)))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Lo9h6wc6bmEF"
+      },
+      "source": [
+        "Note that explicit tracking of the nested module is needed even though it is decorated with the `track_tf1_style_variables` decorator. This is because each module/layer with decorated methods has its own variable store associated with it. \n",
+        "\n",
+        "The weights are correctly tracked:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Qt6USaTVbauM"
+      },
+      "outputs": [],
+      "source": [
+        "assert len(layer.weights) == 6\n",
+        "weights = {x.name: x for x in layer.variables}\n",
+        "\n",
+        "assert set(weights.keys()) == {\"outer/inner_dense/bias:0\",\n",
+        "                               \"outer/inner_dense/kernel:0\",\n",
+        "                               \"outer/dense/bias:0\",\n",
+        "                               \"outer/dense/kernel:0\",\n",
+        "                               \"outer/dense_1/bias:0\",\n",
+        "                               \"outer/dense_1/kernel:0\"}\n",
         "\n",
-        "# Recursively track weights and regularization losses\n",
-        "layer.trainable_weights\n",
+        "layer.trainable_weights"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dHn-bJoNJw7l"
+      },
+      "source": [
+        "As well as regularization loss:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "pq5GFtXjJyut"
+      },
+      "outputs": [],
+      "source": [
         "layer.losses"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "DkEkLnGbipSS"
+        "id": "p7VKJj3JOCEk"
       },
       "source": [
-        "Notice that `variable_scope`s set in the outer layer may affect the naming of variables set in the nested layer, *but* `get_variable` will not share variables by name across the outer shim-based layer and the nested shim-based layer even if they have the same name, because the nested and outer layer utilize different internal variable stores."
+        "Note that if the `NestedLayer` were a non-Keras `tf.Module` instead, variables would still be tracked but regularization losses would not be automatically tracked, so you would have to explicitly track them separately."
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "PfbiY08UizLz"
+        "id": "FsTgnydkdezQ"
       },
       "source": [
-        "As mentioned previously, if you are using a shim-decorated `tf.Module` there is no `losses` property to recursively and automatically track the regularization loss of your nested layer, and you will have to track it separately."
+        "### Guidance on variable names\n",
+        "\n",
+        "Explicit `tf.Variable` calls and Keras layers use a different layer name / variable name autogeneration mechanism than you may be used to from the combination of `get_variable` and `variable_scopes`. Although the shim will make your variable names match for variables created by `get_variable` even when going from TF1.x graphs to TF2 eager execution & `tf.function`, it cannot guarantee the same for the variable names generated for `tf.Variable` calls and Keras layers that you embed within your method decorators. It is even possible for multiple variables to share the same name in TF2 eager execution and `tf.function`.\n",
+        "\n",
+        "You should take special care with this when following the sections on validating correctness and mapping TF1.x checkpoints later on in this guide."
       ]
     },
     {
@@ -986,69 +1035,7 @@
         "id": "kzJF0H0sbce8"
       },
       "source": [
-        "Use the deterministic number generation test tool to verify that this incremental change leaves the model with the same behavior as before."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "VRTg0bQlcPeP"
-      },
-      "outputs": [],
-      "source": [
-        "# import tensorflow.python.framework.random_seed as random_seed\n",
-        "seed_implementation = sys.modules[tf.compat.v1.get_seed.__module__]\n",
-        "\n",
-        "class DeterministicTestTool(object):\n",
-        "  def __init__(self, seed: int = 42, mode='constant'):\n",
-        "    \"\"\"Set mode to 'constant' or 'num_random_ops'. Defaults to 'constant'.\"\"\"\n",
-        "    if mode not in {'constant', 'num_random_ops'}:\n",
-        "      raise ValueError(\"Mode arg must be 'constant' or 'num_random_ops'. \" +\n",
-        "                       \"Got: {}\".format(mode))\n",
-        "\n",
-        "    self._mode = mode\n",
-        "    self._seed = seed\n",
-        "    self.operation_seed = 0\n",
-        "    self._observed_seeds = set()\n",
-        "\n",
-        "  def scope(self):\n",
-        "    tf.random.set_seed(self._seed)\n",
-        "\n",
-        "    def _get_seed(_):\n",
-        "      \"\"\"Wraps TF get_seed to make deterministic random generation easier.\n",
-        "\n",
-        "      This makes a variable's initialization (and calls that involve random\n",
-        "      number generation) depend only on how many random number generations\n",
-        "      were used in the scope so far, rather than on how many unrelated\n",
-        "      operations the graph contains.\n",
-        "\n",
-        "      Returns:\n",
-        "        Random seed tuple.\n",
-        "      \"\"\"\n",
-        "      op_seed = self.operation_seed\n",
-        "      if self._mode == \"constant\":\n",
-        "        tf.random.set_seed(op_seed)\n",
-        "      else:\n",
-        "        if op_seed in self._observed_seeds:\n",
-        "          raise ValueError(\n",
-        "              'This `DeterministicTestTool` object is trying to re-use the ' +\n",
-        "              'already-used operation seed {}. '.format(op_seed) +\n",
-        "              'It cannot guarantee random numbers will match between eager ' +\n",
-        "              'and sessions when an operation seed is reused. ' +\n",
-        "              'You most likely set ' +\n",
-        "              '`operation_seed` explicitly but used a value that caused the ' +\n",
-        "              'naturally-incrementing operation seed sequences to overlap ' +\n",
-        "              'with an already-used seed.')\n",
-        "\n",
-        "        self._observed_seeds.add(op_seed)\n",
-        "        self.operation_seed += 1\n",
-        "\n",
-        "      return (self._seed, op_seed)\n",
-        "\n",
-        "    # mock.patch internal symbols to modify the behavior of TF APIs relying on them\n",
-        "\n",
-        "    return mock.patch.object(seed_implementation, 'get_seed', wraps=_get_seed)"
+        "Use the [`v1.keras.utils.DeterministicRandomTestTool`](https://www.tensorflow.org/api_docs/python/tf/compat/v1/keras/utils/DeterministicRandomTestTool) class to verify that this incremental change leaves the model with the same behavior as before."
       ]
     },
     {
@@ -1059,8 +1046,9 @@
       },
       "outputs": [],
       "source": [
-        "random_tool = DeterministicTestTool(mode='num_random_ops')\n",
+        "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n",
         "with random_tool.scope():\n",
+        "  tf.keras.utils.set_random_seed(42)\n",
         "  layer = CompatModel(10)\n",
         "\n",
         "  inputs = tf.random.normal(shape=(10, 5, 5, 5))\n",
@@ -1080,8 +1068,9 @@
       },
       "outputs": [],
       "source": [
-        "random_tool = DeterministicTestTool(mode='num_random_ops')\n",
+        "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n",
         "with random_tool.scope():\n",
+        "  tf.keras.utils.set_random_seed(42)\n",
         "  layer = PartiallyMigratedModel(10)\n",
         "\n",
         "  inputs = tf.random.normal(shape=(10, 5, 5, 5))\n",
@@ -1153,8 +1142,9 @@
       },
       "outputs": [],
       "source": [
-        "random_tool = DeterministicTestTool(mode='num_random_ops')\n",
+        "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n",
         "with random_tool.scope():\n",
+        "  tf.keras.utils.set_random_seed(42)\n",
         "  layer = NearlyFullyNativeModel(10)\n",
         "\n",
         "  inputs = tf.random.normal(shape=(10, 5, 5, 5))\n",
@@ -1226,8 +1216,9 @@
       },
       "outputs": [],
       "source": [
-        "random_tool = DeterministicTestTool(mode='num_random_ops')\n",
+        "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n",
         "with random_tool.scope():\n",
+        "  tf.keras.utils.set_random_seed(42)\n",
         "  layer = FullyNativeModel(10)\n",
         "\n",
         "  inputs = tf.random.normal(shape=(10, 5, 5, 5))\n",
@@ -1262,7 +1253,7 @@
         "\n",
         "The above migration process to native TF2 APIs changed both the variable names (as Keras APIs produce very different weight names), and the object-oriented paths that point to different weights in the model. The impact of these changes is that they will have broken both any existing TF1-style name-based checkpoints or TF2-style object-oriented checkpoints.\n",
         "\n",
-        "However, in some cases, you might be able to take your original name-based checkpoint and find a mapping of the variables to their new names with approaches like the one detailed in the [Reusing TF1.x checkpoints guide](./reusing_checkpoints.ipynb).\n",
+        "However, in some cases, you might be able to take your original name-based checkpoint and find a mapping of the variables to their new names with approaches like the one detailed in the [Reusing TF1.x checkpoints guide](./migrating_checkpoints.ipynb).\n",
         "\n",
         "Some tips to making this feasible are as follows:\n",
         "- Variables still all have a `name` argument you can set.\n",
@@ -1527,7 +1518,6 @@
     "colab": {
       "collapsed_sections": [],
       "name": "model_mapping.ipynb",
-      "provenance": [],
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/guide/migrate/multi_worker_cpu_gpu_training.ipynb b/site/en/guide/migrate/multi_worker_cpu_gpu_training.ipynb
index 48cb8823262..8a95cb903d6 100644
--- a/site/en/guide/migrate/multi_worker_cpu_gpu_training.ipynb
+++ b/site/en/guide/migrate/multi_worker_cpu_gpu_training.ipynb
@@ -105,10 +105,8 @@
       },
       "outputs": [],
       "source": [
-        "# Install tf-nightly as the notebook uses a dataset instance for `Model.fit`\n",
-        "# with `ParameterServerStrategy`, which depends on symbols in TF 2.7.\n",
-        "!pip uninstall -q -y tensorflow keras\n",
-        "!pip install -q tf-nightly\n",
+        "# The notebook uses a dataset instance for `Model.fit` with\n",
+        "# `ParameterServerStrategy`, which depends on symbols in TF 2.7.\n",
         "# Install a utility needed for this demonstration\n",
         "!pip install portpicker\n",
         "\n",
@@ -371,7 +369,7 @@
         "\n",
         "with strategy.scope():\n",
         "  model = tf.keras.models.Sequential([tf.keras.layers.Dense(1)])\n",
-        "  optimizer = tf.keras.optimizers.Adagrad(learning_rate=0.05)\n",
+        "  optimizer = tf.keras.optimizers.legacy.Adagrad(learning_rate=0.05)\n",
         "  model.compile(optimizer, \"mse\")\n",
         "\n",
         "model.fit(dataset, epochs=5, steps_per_epoch=10)"
@@ -466,7 +464,6 @@
     "colab": {
       "collapsed_sections": [],
       "name": "multi_worker_cpu_gpu_training.ipynb",
-      "provenance": [],
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/guide/migrate/saved_model.ipynb b/site/en/guide/migrate/saved_model.ipynb
index f1e888ca272..e7e8ce8daa1 100644
--- a/site/en/guide/migrate/saved_model.ipynb
+++ b/site/en/guide/migrate/saved_model.ipynb
@@ -168,7 +168,7 @@
       },
       "outputs": [],
       "source": [
-        "!saved_model_cli run --dir simple-save --tag_set serve \\\n",
+        "!saved_model_cli run --dir saved-model-builder --tag_set serve \\\n",
         " --signature_def serving_default --input_exprs input=10"
       ]
     },
@@ -303,7 +303,7 @@
       "source": [
         "### Save and export a SavedModel defined with tf.Module\n",
         "\n",
-        "To export your model in TensorFlow 2, you must define a `tf.Module` or a `tf.keras.Model` to hold all of your model's variables and functions. Then, you can call `tf.saved_model.save` to create a SavedModel. Refer to [Saving a custom model](../../guide/saved_model#saving_a_custom_model) in the [Using the SavedModel format](../../guide/saved_model) guide to learn more."
+        "To export your model in TensorFlow 2, you must define a `tf.Module` or a `tf.keras.Model` to hold all of your model's variables and functions. Then, you can call `tf.saved_model.save` to create a SavedModel. Refer to the _Saving a custom model_ section in the [Using the SavedModel format](../saved_model.ipynb) guide to learn more."
       ]
     },
     {
@@ -352,7 +352,11 @@
       "source": [
         "### Save and export a SavedModel defined with Keras\n",
         "\n",
-        "The Keras APIs for saving and exporting—`Mode.save` or `tf.keras.models.save_model`—can export a SavedModel from a `tf.keras.Model`. Check out the [Save and load Keras models](../..guide/keras/save_and_serialize) for more details."
+        "\n",
+        "Deprecated: For Keras objects, it's recommended to use the new high-level `.keras` format and `tf.keras.Model.export`, as demonstrated in the guide [here](https://www.tensorflow.org/guide/keras/save_and_serialize). The low-level SavedModel format continues to be supported for existing code.\n",
+        "\n",
+        "\n",
+        "The Keras APIs for saving and exporting—`Model.save` or `tf.keras.models.save_model`—can export a SavedModel from a `tf.keras.Model`. Check out the [Save and load Keras models](../..guide/keras/save_and_serialize) for more details."
       ]
     },
     {
@@ -395,7 +399,7 @@
       "source": [
         "## Loading a SavedModel\n",
         "\n",
-        "A SavedModel saved with any of the above APIs can be loaded using either TensorFlow 1 or TensorFlow APIs.\n",
+        "A SavedModel saved with any of the above APIs can be loaded using either TensorFlow 1 or TensorFlow 2 APIs.\n",
         "\n",
         "A TensorFlow 1 SavedModel can generally be used for inference when loaded into TensorFlow 2, but training (generating gradients) is only possible if the SavedModel contains *resource variables*. You can check the dtype of the variables—if the variable dtype contains \"_ref\", then it is a reference variable.\n",
         "\n",
@@ -506,9 +510,12 @@
       "source": [
         "### TensorFlow 2: Load a model saved with Keras\n",
         "\n",
+        "Deprecated: For Keras objects, it's recommended to use the new high-level `.keras` format and `tf.keras.Model.export`, as demonstrated in the guide [here](https://www.tensorflow.org/guide/keras/save_and_serialize). The low-level SavedModel format continues to be supported for existing code.\n",
+        "\n",
+        "\n",
         "The Keras loading API—`tf.keras.models.load_model`—allows you to reload a saved model back into a Keras Model object. Note that this only allows you to load SavedModels saved with Keras (`Model.save` or `tf.keras.models.save_model`).\n",
         "\n",
-        "Models saved with `tf.saved_model.save` should be loaded with `tf.saved_model.load`. You can load a Keras model saved with `Model.save` using `tf.saved_model.load` but you will only get the TensorFlow graph. Refer to the `tf.keras.models.load_model` API docs and [Save and load Keras models](../../guide/keras/save_and_serialize#savedmodel_format) guide for details."
+        "Models saved with `tf.saved_model.save` should be loaded with `tf.saved_model.load`. You can load a Keras model saved with `Model.save` using `tf.saved_model.load` but you will only get the TensorFlow graph. Refer to the `tf.keras.models.load_model` API docs and [Save and load Keras models](https://www.tensorflow.org/guide/keras/save_and_serialize#savedmodel_format) guide for details."
       ]
     },
     {
@@ -655,7 +662,7 @@
         "\n",
         "Signatures are the endpoints of a SavedModel—they tell the user how to run the model and what inputs are needed.\n",
         "\n",
-        "In TensorFlow 1, signatures are created by listing the input and output tensors. In TensorFlow 2, signatures are generated by passing in *concrete functions*. (Read more about TensorFlow functions in the [Introduction to graphs and tf.function](../guide/intro_to_graphs) guide.) In short, [a concrete function is generated](../guide/intro_to_graphs#polymorphism_one_function_many_graphs) from a `tf.function`:\n",
+        "In TensorFlow 1, signatures are created by listing the input and output tensors. In TensorFlow 2, signatures are generated by passing in *concrete functions*. (Read more about TensorFlow functions in the [Introduction to graphs and tf.function](../intro_to_graphs.ipynb) guide, particularly the _Polymorphism: one Function, many graphs_ section.) In short, a concrete function is generated from a `tf.function`:\n",
         "\n",
         "```python\n",
         "# Option 1: Specify an input signature.\n",
@@ -748,7 +755,6 @@
     "colab": {
       "collapsed_sections": [],
       "name": "saved_model.ipynb",
-      "provenance": [],
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/guide/migrate/sessionrunhook_callback.ipynb b/site/en/guide/migrate/sessionrunhook_callback.ipynb
index 60a7df4ed63..7e20a1bab05 100644
--- a/site/en/guide/migrate/sessionrunhook_callback.ipynb
+++ b/site/en/guide/migrate/sessionrunhook_callback.ipynb
@@ -69,7 +69,7 @@
       "source": [
         "In TensorFlow 1, to customize the behavior of training, you use `tf.estimator.SessionRunHook` with `tf.estimator.Estimator`. This guide demonstrates how to migrate from `SessionRunHook` to TensorFlow 2's custom callbacks with the `tf.keras.callbacks.Callback` API, which works with Keras `Model.fit` for training (as well as `Model.evaluate` and `Model.predict`). You will learn how to do this by implementing a `SessionRunHook` and a `Callback` task that measures examples per second during training.\n",
         "\n",
-        "Examples of callbacks are checkpoint saving (`tf.keras.callbacks.ModelCheckpoint`) and [TensorBoard](`tf.keras.callbacks.TensorBoard`) summary writing. Keras [callbacks](../../guide/keras/custom_callback.ipynb) are objects that are called at different points during training/evaluation/prediction in the built-in Keras `Model.fit`/`Model.evaluate`/`Model.predict` APIs. You can learn more about callbacks in the `tf.keras.callbacks.Callback` API docs, as well as the [Writing your own callbacks](../..guide/keras/custom_callback.ipynb/) and [Training and evaluation with the built-in methods](../../guide/keras/train_and_evaluate.ipynb) (the *Using callbacks* section) guides."
+        "Examples of callbacks are checkpoint saving (`tf.keras.callbacks.ModelCheckpoint`) and [TensorBoard](`tf.keras.callbacks.TensorBoard`) summary writing. Keras [callbacks](https://www.tensorflow.org/guide/keras/custom_callback) are objects that are called at different points during training/evaluation/prediction in the built-in Keras `Model.fit`/`Model.evaluate`/`Model.predict` APIs. You can learn more about callbacks in the `tf.keras.callbacks.Callback` API docs, as well as the [Writing your own callbacks](https://www.tensorflow.org/guide/keras/custom_callback.ipynb/) and [Training and evaluation with the built-in methods](https://www.tensorflow.org/guide/keras/train_and_evaluate) (the *Using callbacks* section) guides."
       ]
     },
     {
@@ -246,7 +246,7 @@
         "\n",
         "- API docs: `tf.keras.callbacks.Callback`\n",
         "- Guide: [Writing your own callbacks](../..guide/keras/custom_callback.ipynb/)\n",
-        "- Guide: [Training and evaluation with the built-in methods](../../guide/keras/train_and_evaluate.ipynb) (the *Using callbacks* section)\n",
+        "- Guide: [Training and evaluation with the built-in methods](https://www.tensorflow.org/guide/keras/train_and_evaluate) (the *Using callbacks* section)\n",
         "\n",
         "You may also find the following migration-related resources useful:\n",
         "\n",
diff --git a/site/en/guide/migrate/tensorboard.ipynb b/site/en/guide/migrate/tensorboard.ipynb
index c8ec222b621..ea0cd72b47e 100644
--- a/site/en/guide/migrate/tensorboard.ipynb
+++ b/site/en/guide/migrate/tensorboard.ipynb
@@ -218,10 +218,10 @@
         "\n",
         "def create_model():\n",
         "  return tf.keras.models.Sequential([\n",
-        "    tf.keras.layers.Flatten(input_shape=(28, 28)),\n",
-        "    tf.keras.layers.Dense(512, activation='relu'),\n",
-        "    tf.keras.layers.Dropout(0.2),\n",
-        "    tf.keras.layers.Dense(10, activation='softmax')\n",
+        "    tf.keras.layers.Flatten(input_shape=(28, 28), name='layers_flatten'),\n",
+        "    tf.keras.layers.Dense(512, activation='relu', name='layers_dense'),\n",
+        "    tf.keras.layers.Dropout(0.2, name='layers_dropout'),\n",
+        "    tf.keras.layers.Dense(10, activation='softmax', name='layers_dense_2')\n",
         "  ])\n",
         "\n",
         "model = create_model()\n",
@@ -279,7 +279,6 @@
     "colab": {
       "collapsed_sections": [],
       "name": "tensorboard.ipynb",
-      "provenance": [],
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/guide/migrate/tf1_vs_tf2.ipynb b/site/en/guide/migrate/tf1_vs_tf2.ipynb
index 3fc69a4c7f9..60791f72680 100644
--- a/site/en/guide/migrate/tf1_vs_tf2.ipynb
+++ b/site/en/guide/migrate/tf1_vs_tf2.ipynb
@@ -131,7 +131,7 @@
         "\n",
         "### Other API changes\n",
         "\n",
-        "* TF2 features significant improvements to the device placement algorithms which renders the usage of `tf.colocate_with` unnecessary. If removing it causes a performance degrade [please file a bug](https://github.com/tensorflow/tensorflow/issues).\n",
+        "* TF2 features significant improvements to the device placement algorithms which renders the usage of `tf.colocate_with` unnecessary. If removing it causes a performance degradation, [please file a bug](https://github.com/tensorflow/tensorflow/issues).\n",
         "\n",
         "* Replace all usage of `tf.v1.ConfigProto` with equivalent functions from `tf.config`."
       ]
@@ -159,7 +159,7 @@
       "source": [
         "## No more globals\n",
         "\n",
-        "TF1.x relied heavily on implicit global namespaces and collections. When you called `tf.Variable`, it would be put into a collection in the default graph, and it would remain there, even if you lost track of the Python variable pointing to it. You could then recover that `tf.Variable`, but only if you knew the name that it had been created with. This was difficult to do if you were not in control of the variable's creation. As a result, all sorts of mechanisms proliferated to\n",
+        "TF1.x relied heavily on implicit global namespaces and collections. When you call `tf.Variable`, it would be put into a collection in the default graph, and it would remain there, even if you lost track of the Python variable pointing to it. You could then recover that `tf.Variable`, but only if you knew the name that it had been created with. This was difficult to do if you were not in control of the variable's creation. As a result, all sorts of mechanisms proliferated to\n",
         "attempt to help you find your variables again, and for frameworks to find\n",
         "user-created variables. Some of these include: variable scopes, global collections, helper methods like `tf.get_global_step` and `tf.global_variables_initializer`, optimizers implicitly\n",
         "computing gradients over all trainable variables, and so on. TF2 eliminates all of these mechanisms ([Variables 2.0 RFC](https://github.com/tensorflow/community/pull/11)) in favor of the default mechanism - you keep track of your variables. If you lose track of a `tf.Variable`, it gets garbage collected.\n",
@@ -241,15 +241,223 @@
         "#### Pattern 1: Python object manipulation and variable creation intended to be done only once get run multiple times\n",
         "<a id=\"pattern-1\"></a>\n",
         "\n",
-        "In TF1.x programs that rely on graphs and sessions, the expectation is usually that all Python logic in your program will only run once. However, with eager execution and `tf.function` it is fair to expect that your Python logic will be run at least once, but possibly more times (either multiple times eagerly, or multiple times across different `tf.function` traces). Any Python logic within a `tf.function` will be traced at least twice due to how `tf.function` works. Refer to the `tf.function` [guide](https://www.tensorflow.org/guide/function) for more details.\n",
+        "In TF1.x programs that rely on graphs and sessions, the expectation is usually that all Python logic in your program will only run once. However, with eager execution and `tf.function` it is fair to expect that your Python logic will be run at least once, but possibly more times (either multiple times eagerly, or multiple times across different `tf.function` traces). Sometimes, `tf.function` will even trace twice on the same input, causing unexpected behaviors  (see Example 1 and 2). Refer to the `tf.function` [guide](https://www.tensorflow.org/guide/function) for more details.\n",
         "\n",
         "Note: This pattern usually causes your code to silently misbehave when executing eagerly without `tf.function`s, but generally raises an `InaccessibleTensorError` or a `ValueError` when attempting to wrap the problematic code inside of a `tf.function`. To discover and debug this issue, it is recommended you wrap your code with `tf.function` early on, and use [pdb](https://docs.python.org/3/library/pdb.html) or interactive debugging to identify the source of the `InaccessibleTensorError`.\n",
         "\n",
         "**Example 1: Variable creation**\n",
         "\n",
-        "TF1.x code often creates variables without checking that they have already been made (because it runs the Python logic only once at all times). Naively mapping this code to eager execution may cause it to accidentally create new variables in each training step.\n",
+        "Consider the example below, where the function creates a variable when called:\n",
         "\n",
-        "**Example 2: Manipulating a global Python list**\n",
+        "```python\n",
+        "def f():\n",
+        "  v = tf.Variable(1.0)\n",
+        "  return v\n",
+        "\n",
+        "with tf.Graph().as_default():\n",
+        "  with tf.compat.v1.Session() as sess:\n",
+        "    res = f()\n",
+        "    sess.run(tf.compat.v1.global_variables_initializer())\n",
+        "    sess.run(res)\n",
+        "```\n",
+        "\n",
+        "However, naively wrapping the above function that contains variable creation with `tf.function` is not allowed. `tf.function` only supports [singleton variable creations on the first call](https://www.tensorflow.org/guide/function#creating_tfvariables). To enforce this, when tf.function detects variable creation in the first call, it will attempt to trace again and raise an error if there is variable creation in the second trace.\n",
+        "\n",
+        "```python\n",
+        "@tf.function\n",
+        "def f():\n",
+        "  print(\"trace\") # This will print twice because the python body is run twice\n",
+        "  v = tf.Variable(1.0)\n",
+        "  return v\n",
+        "\n",
+        "try:\n",
+        "  f()\n",
+        "except ValueError as e:\n",
+        "  print(e)\n",
+        "```\n",
+        "\n",
+        "A workaround is caching and reusing the variable after it is created in the first call.\n",
+        "\n",
+        "```python\n",
+        "class Model(tf.Module):\n",
+        "  def __init__(self):\n",
+        "    self.v = None\n",
+        "\n",
+        "  @tf.function\n",
+        "  def __call__(self):\n",
+        "    print(\"trace\") # This will print twice because the python body is run twice\n",
+        "    if self.v is None:\n",
+        "      self.v = tf.Variable(0)\n",
+        "    return self.v\n",
+        "\n",
+        "m = Model()\n",
+        "m()\n",
+        "```\n",
+        "\n",
+        "**Example 2: Out-of-scope Tensors due to `tf.function` retracing**\n",
+        "\n",
+        "As demonstrated in Example 1, `tf.function` will retrace when it detects Variable creation in the first call. This can cause extra confusion, because the two tracings will create two graphs. When the second graph from retracing attempts to access a Tensor from the graph generated during the first tracing, Tensorflow will raise an error complaining that the Tensor is out of scope. To demonstrate the scenario, the code below creates a dataset on the first `tf.function` call. This would run as expected.\n",
+        "\n",
+        "```python\n",
+        "class Model(tf.Module):\n",
+        "  def __init__(self):\n",
+        "    self.dataset = None\n",
+        "\n",
+        "  @tf.function\n",
+        "  def __call__(self):\n",
+        "    print(\"trace\") # This will print once: only traced once\n",
+        "    if self.dataset is None:\n",
+        "      self.dataset = tf.data.Dataset.from_tensors([1, 2, 3])\n",
+        "    it = iter(self.dataset)\n",
+        "    return next(it)\n",
+        "\n",
+        "m = Model()\n",
+        "m()\n",
+        "```\n",
+        "\n",
+        "However, if we also attempt to create a variable on the first `tf.function` call, the code will raise an error complaining that the dataset is out of scope. This is because the dataset is in the first graph, while the second graph is also attempting to access it.\n",
+        "\n",
+        "```python\n",
+        "class Model(tf.Module):\n",
+        "  def __init__(self):\n",
+        "    self.v = None\n",
+        "    self.dataset = None\n",
+        "\n",
+        "  @tf.function\n",
+        "  def __call__(self):\n",
+        "    print(\"trace\") # This will print twice because the python body is run twice\n",
+        "    if self.v is None:\n",
+        "      self.v = tf.Variable(0)\n",
+        "    if self.dataset is None:\n",
+        "      self.dataset = tf.data.Dataset.from_tensors([1, 2, 3])\n",
+        "    it = iter(self.dataset)\n",
+        "    return [self.v, next(it)]\n",
+        "\n",
+        "m = Model()\n",
+        "try:\n",
+        "  m()\n",
+        "except TypeError as e:\n",
+        "  print(e) # <tf.Tensor ...> is out of scope and cannot be used here.\n",
+        "```\n",
+        "\n",
+        "The most straightforward solution is ensuring that the variable creation and dataset creation are both outside of the `tf.function` call. For example:\n",
+        "\n",
+        "```python\n",
+        "class Model(tf.Module):\n",
+        "  def __init__(self):\n",
+        "    self.v = None\n",
+        "    self.dataset = None\n",
+        "\n",
+        "  def initialize(self):\n",
+        "    if self.dataset is None:\n",
+        "      self.dataset = tf.data.Dataset.from_tensors([1, 2, 3])\n",
+        "    if self.v is None:\n",
+        "      self.v = tf.Variable(0)\n",
+        "\n",
+        "  @tf.function\n",
+        "  def __call__(self):\n",
+        "    it = iter(self.dataset)\n",
+        "    return [self.v, next(it)]\n",
+        "\n",
+        "m = Model()\n",
+        "m.initialize()\n",
+        "m()\n",
+        "```\n",
+        "\n",
+        "However, sometimes it's not avoidable to create variables in `tf.function` (such as slot variables in some [TF keras optimizers](https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Optimizer#slots)). Still, we can simply move the dataset creation outside of the `tf.function` call. The reason that we can rely on this is because `tf.function` will receive the dataset as an implicit input and both graphs can access it properly.\n",
+        "\n",
+        "```python\n",
+        "class Model(tf.Module):\n",
+        "  def __init__(self):\n",
+        "    self.v = None\n",
+        "    self.dataset = None\n",
+        "\n",
+        "  def initialize(self):\n",
+        "    if self.dataset is None:\n",
+        "      self.dataset = tf.data.Dataset.from_tensors([1, 2, 3])\n",
+        "\n",
+        "  @tf.function\n",
+        "  def __call__(self):\n",
+        "    if self.v is None:\n",
+        "      self.v = tf.Variable(0)\n",
+        "    it = iter(self.dataset)\n",
+        "    return [self.v, next(it)]\n",
+        "\n",
+        "m = Model()\n",
+        "m.initialize()\n",
+        "m()\n",
+        "```\n",
+        "\n",
+        "**Example 3: Unexpected Tensorflow object re-creations due to dict usage**\n",
+        "\n",
+        "`tf.function` has very poor support for python side effects such as appending to a list, or checking/adding to a dictionary. More details are in [\"Better performance with tf.function\"](https://www.tensorflow.org/guide/function#executing_python_side_effects). In the example below, the code uses dictionaries to cache datasets and iterators. For the same key, each call to the model will return the same iterator of the dataset.\n",
+        "\n",
+        "```python\n",
+        "class Model(tf.Module):\n",
+        "  def __init__(self):\n",
+        "    self.datasets = {}\n",
+        "    self.iterators = {}\n",
+        "\n",
+        "  def __call__(self, key):\n",
+        "    if key not in self.datasets:\n",
+        "      self.datasets[key] = tf.compat.v1.data.Dataset.from_tensor_slices([1, 2, 3])\n",
+        "      self.iterators[key] = self.datasets[key].make_initializable_iterator()\n",
+        "    return self.iterators[key]\n",
+        "\n",
+        "with tf.Graph().as_default():\n",
+        "  with tf.compat.v1.Session() as sess:\n",
+        "    m = Model()\n",
+        "    it = m('a')\n",
+        "    sess.run(it.initializer)\n",
+        "    for _ in range(3):\n",
+        "      print(sess.run(it.get_next())) # prints 1, 2, 3\n",
+        "```\n",
+        "\n",
+        "However, the pattern above will not work as expected in `tf.function`. During tracing, `tf.function` will ignore the python side effect of addition to the dictionaries. Instead, it only remembers the creation of a new dataset and iterator. As a result, each call to the model will always return a new iterator. This issue is hard to notice unless the numerical results or performance are significant enough. Hence, we recommend users to think about the code carefully before wrapping `tf.function` naively onto the python code.\n",
+        "\n",
+        "```python\n",
+        "class Model(tf.Module):\n",
+        "  def __init__(self):\n",
+        "    self.datasets = {}\n",
+        "    self.iterators = {}\n",
+        "\n",
+        "  @tf.function\n",
+        "  def __call__(self, key):\n",
+        "    if key not in self.datasets:\n",
+        "      self.datasets[key] = tf.data.Dataset.from_tensor_slices([1, 2, 3])\n",
+        "      self.iterators[key] = iter(self.datasets[key])\n",
+        "    return self.iterators[key]\n",
+        "\n",
+        "m = Model()\n",
+        "for _ in range(3):\n",
+        "  print(next(m('a'))) # prints 1, 1, 1\n",
+        "```\n",
+        "\n",
+        "We can use [`tf.init_scope`](https://www.tensorflow.org/api_docs/python/tf/init_scope) to lift the dataset and iterator creation outside of the graph, to achieve the expected behavior:\n",
+        "\n",
+        "```python\n",
+        "class Model(tf.Module):\n",
+        "  def __init__(self):\n",
+        "    self.datasets = {}\n",
+        "    self.iterators = {}\n",
+        "\n",
+        "  @tf.function\n",
+        "  def __call__(self, key):\n",
+        "    if key not in self.datasets:\n",
+        "      # Lifts ops out of function-building graphs\n",
+        "      with tf.init_scope():\n",
+        "        self.datasets[key] = tf.data.Dataset.from_tensor_slices([1, 2, 3])\n",
+        "        self.iterators[key] = iter(self.datasets[key])\n",
+        "    return self.iterators[key]\n",
+        "\n",
+        "m = Model()\n",
+        "for _ in range(3):\n",
+        "  print(next(m('a'))) # prints 1, 2, 3\n",
+        "```\n",
+        "\n",
+        "The general rule of thumb is to avoid relying on Python side effects in your logic and only use them to debug your traces.\n",
+        "\n",
+        "**Example 4: Manipulating a global Python list**\n",
         "\n",
         "The following TF1.x code uses a global list of losses that it uses to only maintain the list of losses generated by the current training step. Note that the Python logic that appends losses to the list will only be called once regardless of how many training steps the session is run for.\n",
         "\n",
@@ -473,11 +681,11 @@
       "source": [
         "### `ResourceVariables` instead of `ReferenceVariables`\n",
         "\n",
-        "`ResourceVariables` have stronger read-write consistency guarantees than `ReferenceVariables`. This leads to more predictable, easier-to-reason about semantics about whether or not you will observe the result of a previous write when using your variables. This change is extremely unlikely to cause existing code to raise errors or to break silently.\n",
+        "`ResourceVariables` have stronger read-write consistency guarantees than `ReferenceVariables`. This leads to more predictable, easier-to-reason semantics about whether or not you will observe the result of a previous write when using your variables. This change is extremely unlikely to cause existing code to raise errors or to break silently.\n",
         "\n",
         "However, it is ***possible though unlikely*** that these stronger consistency guarantees may increase the memory usage of your specific program. Please file an [issue](https://github.com/tensorflow/tensorflow/issues) if you find this to be the case. Additionally, if you have unit tests relying on exact string comparisons against the operator names in a graph corresponding to variable reads, be aware that enabling resource variables may slightly change the name of these operators.\n",
         "\n",
-        "To isolate the impact of this behavior change on your code, if eager execution is disabled you can use `tf.compat.v1.disable_resource_variables()` and `tf.compat.v1.enable_resource_variables()` to globally disable or enable this behavior change. `ResourceVariables` will always be used if eager execution is enabled. You can also \n"
+        "To isolate the impact of this behavior change on your code, if eager execution is disabled you can use `tf.compat.v1.disable_resource_variables()` and `tf.compat.v1.enable_resource_variables()` to globally disable or enable this behavior change. `ResourceVariables` will always be used if eager execution is enabled.\n"
       ]
     },
     {
@@ -819,28 +1027,12 @@
       },
       "source": [
         "### Hashing tensors and variables\n",
-        "With TF1.x behaviors you used to be able to directly add variables and tensors to data structures that require hashing, such as `set` and `dict` keys."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "fxi7EgKdBpd5"
-      },
-      "outputs": [],
-      "source": [
-        "tf.compat.v1.disable_tensor_equality()\n",
+        "With TF1.x behaviors you used to be able to directly add variables and tensors to data structures that require hashing, such as `set` and `dict` keys.\n",
+        "```python\n",
         "x = tf.Variable(0.0)\n",
-        "set([x, tf.constant(2.0)])"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "LFgjR-PSajVQ"
-      },
-      "source": [
+        "set([x, tf.constant(2.0)])\n",
+        "```\n",
+        "\n",
         "However, in TF2 with tensor equality enabled, tensors and variables are made unhashable due to the `==` and `!=` operator semantics changing to value equality checks."
       ]
     },
@@ -925,11 +1117,8 @@
   ],
   "metadata": {
     "colab": {
-      "collapsed_sections": [
-        "Tce3stUlHN0L"
-      ],
+      "collapsed_sections": [],
       "name": "tf1_vs_tf2.ipynb",
-      "provenance": [],
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/guide/migrate/tflite.ipynb b/site/en/guide/migrate/tflite.ipynb
index fd3695e5434..0426655ee1a 100644
--- a/site/en/guide/migrate/tflite.ipynb
+++ b/site/en/guide/migrate/tflite.ipynb
@@ -67,14 +67,14 @@
         "id": "meUTrR4I6m1C"
       },
       "source": [
-        "[TensorFlow Lite](https://www.tensorflow.org/lite/guide) (TFLite) is a set of tools that helps developers run ML inference on-device (mobile, embedded, and IoT devices). The [TFLite converter](https://www.tensorflow.org/lite/convert)  is one such tool that converts exisiting TF models into an optimized TFLite model format that can be efficiently run on-device.\n",
+        "[TensorFlow Lite](https://www.tensorflow.org/lite/guide) (TFLite) is a set of tools that helps developers run ML inference on-device (mobile, embedded, and IoT devices). The [TFLite converter](https://www.tensorflow.org/lite/convert)  is one such tool that converts existing TF models into an optimized TFLite model format that can be efficiently run on-device.\n",
         "\n",
         "In this doc, you'll learn what changes you need to make to your TF to TFLite  conversion code, followed by a few examples that do the same.\n",
         "\n",
         "\n",
         "## Changes to your TF to TFLite  conversion code\n",
         "\n",
-        "* If you're using a legacy TF1 model format (Keras file, frozen GraphDef, checkpoints, tf.Session, etc), update it to TF1/TF2 SavedModel and use the TF2 converter API `tf.lite.TFLiteConverter.from_saved_model(...)`   to convert it to a TFLite model (refer to Table 1).\n",
+        "* If you're using a legacy TF1 model format (such as Keras file, frozen GraphDef, checkpoints, tf.Session), update it to TF1/TF2 SavedModel and use the TF2 converter API `tf.lite.TFLiteConverter.from_saved_model(...)` to convert it to a TFLite model (refer to Table 1).\n",
         "\n",
         "* Update the converter API flags (refer to Table 2).\n",
         "* Remove legacy APIs such as `tf.lite.constants`. (eg: Replace `tf.lite.constants.INT8` with `tf.int8`)\n",
@@ -125,7 +125,7 @@
       "source": [
         "##  Examples\n",
         "\n",
-        "You'll now walkthrough some examples to convert legacy TF1 models to TF1/TF2 SavedModels and then convert it to TF2 TFLite models.\n",
+        "You'll now walk through some examples to convert legacy TF1 models to TF1/TF2 SavedModels and then convert them to TF2 TFLite models.\n",
         "\n",
         "### Setup\n",
         "\n",
@@ -400,7 +400,7 @@
         "with tf.Graph().as_default() as g:\n",
         "  tf.graph_util.import_graph_def(gdef, name=\"\")\n",
         "\n",
-        "# Lookup the input and output tensors.\n",
+        "# Look up the input and output tensors.\n",
         "input_tensor = g.get_tensor_by_name('input:0') \n",
         "output_tensor = g.get_tensor_by_name('MobilenetV1/Predictions/Softmax:0')\n",
         "\n",
@@ -436,7 +436,6 @@
     "colab": {
       "collapsed_sections": [],
       "name": "tflite.ipynb",
-      "provenance": [],
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/guide/migrate/tpu_embedding.ipynb b/site/en/guide/migrate/tpu_embedding.ipynb
index e912e868df2..44105ea984b 100644
--- a/site/en/guide/migrate/tpu_embedding.ipynb
+++ b/site/en/guide/migrate/tpu_embedding.ipynb
@@ -476,7 +476,10 @@
       "source": [
         "strategy = tf.distribute.TPUStrategy(cluster_resolver)\n",
         "with strategy.scope():\n",
-        "  optimizer = tf.keras.optimizers.Adagrad(learning_rate=0.05)\n",
+        "  if hasattr(tf.keras.optimizers, \"legacy\"):\n",
+        "    optimizer = tf.keras.optimizers.legacy.Adagrad(learning_rate=0.05)\n",
+        "  else:\n",
+        "    optimizer = tf.keras.optimizers.Adagrad(learning_rate=0.05)\n",
         "  dense_input = tf.keras.Input(shape=(2,), dtype=tf.float32, batch_size=global_batch_size)\n",
         "  sparse_input = tf.keras.Input(shape=(), dtype=tf.int32, batch_size=global_batch_size)\n",
         "  embedded_input = tfrs.layers.embedding.TPUEmbedding(\n",
@@ -550,7 +553,7 @@
       "source": [
         "Learn more about setting up TPU-specific embeddings in the API docs:\n",
         "\n",
-        "- `tfrs.layers.embedding.TPUEmbedding`: particularly about feature and table configuration, setting the optimizer, creating a model (using the Keras [functional](../../guide/keras/functional.ipynb) API or via [subclassing](../..guide/keras/custom_layers_and_models.ipynb) `tf.keras.Model`), training/evaluation, and model serving with `tf.saved_model`\n",
+        "- `tfrs.layers.embedding.TPUEmbedding`: particularly about feature and table configuration, setting the optimizer, creating a model (using the Keras [functional](https://www.tensorflow.org/guide/keras/functional) API or via [subclassing](../..guide/keras/custom_layers_and_models.ipynb) `tf.keras.Model`), training/evaluation, and model serving with `tf.saved_model`\n",
         "- `tf.tpu.experimental.embedding.TableConfig`\n",
         "- `tf.tpu.experimental.embedding.FeatureConfig`\n",
         "\n",
@@ -563,7 +566,7 @@
         "To learn more about customizing your training, refer to:\n",
         "\n",
         "- Guide: [Customize what happens in Model.fit](../..guide/keras/customizing_what_happens_in_fit.ipynb)\n",
-        "- Guide: [Writing a training loop from scratch](../../guide/keras/writing_a_training_loop_from_scratch.ipynb)\n",
+        "- Guide: [Writing a training loop from scratch](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch)\n",
         "\n",
         "TPUs—Google's specialized ASICs for machine learning—are available through [Google Colab](https://colab.research.google.com/), the [TPU Research Cloud](https://sites.research.google/trc/), and [Cloud TPU](https://cloud.google.com/tpu)."
       ]
diff --git a/site/en/guide/migrate/tpu_estimator.ipynb b/site/en/guide/migrate/tpu_estimator.ipynb
index 6aa05aed84a..9cc35dc8bae 100644
--- a/site/en/guide/migrate/tpu_estimator.ipynb
+++ b/site/en/guide/migrate/tpu_estimator.ipynb
@@ -369,7 +369,7 @@
         "\n",
         "To learn more about customizing your training, refer to:\n",
         "- Guide: [Customize what happens in Model.fit](../..guide/keras/customizing_what_happens_in_fit.ipynb)\n",
-        "- Guide: [Writing a training loop from scratch](../../guide/keras/writing_a_training_loop_from_scratch.ipynb)\n",
+        "- Guide: [Writing a training loop from scratch](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch)\n",
         "\n",
         "TPUs—Google's specialized ASICs for machine learning—are available through [Google Colab](https://colab.research.google.com/), the [TPU Research Cloud](https://sites.research.google/trc/), and [Cloud TPU](https://cloud.google.com/tpu)."
       ]
diff --git a/site/en/guide/migrate/upgrade.ipynb b/site/en/guide/migrate/upgrade.ipynb
index c29b8edec88..7223a8c8c81 100644
--- a/site/en/guide/migrate/upgrade.ipynb
+++ b/site/en/guide/migrate/upgrade.ipynb
@@ -95,7 +95,7 @@
       "source": [
         "## Compatibility modules\n",
         "\n",
-        "Certain API symbols can not be upgraded simply by using a string replacement. Those that cannot be automatically upgraded will be mapped to their locations in the `compat.v1` module. This module replaces TF 1.x symbols like `tf.foo` with the equivalent `tf.compat.v1.foo` reference. If you are already using `compat.v1` APIs by importing TF via `import tensorflow.compat.v1 as tf`, the `tf_upgrade_v2` script will attempt to convert these usages to the non-compat APIs where possible. Note that while some `compat.v1` APIs are compatible with TF2.x behaviors, many are not. So, we recommend that you manually proofread replacements and migrate them to new APIs in the `tf.*` namespace instead of `tf.compat.v1` namespace as quickly as possible.\n",
+        "Certain API symbols can not be upgraded simply by using a string replacement. Those that cannot be automatically upgraded will be mapped to their locations in the `compat.v1` module. This module replaces TF 1.x symbols like `tf.foo` with the equivalent `tf.compat.v1.foo` reference. If you are already using `compat.v1` APIs by importing TF via `import tensorflow.compat.v1 as tf`, the `tf_upgrade_v2` script will attempt to convert these usages to the non-compat APIs where possible. Note that while some `compat.v1` APIs are compatible with TF2.x behaviors, many are not. Therefore, it's recommended to manually proofread replacements and migrate them to new APIs in the `tf.*` namespace instead of `tf.compat.v1` namespace as quickly as possible.\n",
         "\n",
         "Because of TensorFlow 2.x module deprecations (for example, `tf.flags` and `tf.contrib`), some changes can not be worked around by switching to `compat.v1`. Upgrading this code may require using an additional library (for example, [`absl.flags`](https://github.com/abseil/abseil-py)) or switching to a package in [tensorflow/addons](http://www.github.com/tensorflow/addons).\n"
       ]
@@ -108,7 +108,7 @@
       "source": [
         "## Recommended upgrade process\n",
         "\n",
-        "The rest of this guide demonstrates how to use the symbol-rewriting script. While the script is easy to use, it is strongly recomended that you use the script as part of the following process: \n",
+        "The rest of this guide demonstrates how to use the symbol-rewriting script. While the script is easy to use, it is strongly recommended that you use the script as part of the following process: \n",
         "\n",
         "1. **Unit Test**: Ensure that the code you’re upgrading has a unit test suite with reasonable coverage. This is Python code, so the language won’t protect you from many classes of mistakes. Also ensure that any dependency you have has already been upgraded to be compatible with TensorFlow 2.x.\n",
         "\n",
@@ -574,13 +574,13 @@
       "source": [
         "## Caveats\n",
         "\n",
-        "- Do not update parts of your code manually before running this script. In particular, functions that have had reordered arguments like `tf.argmax` or `tf.batch_to_space` cause the script to incorrectly add keyword arguments that mismap your existing code.\n",
+        "- Do not update parts of your code manually before running this script. In particular, functions that have had reordered arguments like `tf.math.argmax` or `tf.batch_to_space` cause the script to incorrectly add keyword arguments that mismap your existing code.\n",
         "\n",
         "- The script assumes that `tensorflow` is imported using `import tensorflow as tf`, or `import tensorflow.compat.v1 as tf`.\n",
         "\n",
         "- This script does not reorder arguments. Instead, the script adds keyword arguments to functions that have their arguments reordered.\n",
         "\n",
-        "- Check out [tf2up.ml](http://tf2up.ml) for a convenient tool to upgrade Jupyter\n",
+        "- Check out [tf2up.ml](https://github.com/lc0/tf2up) for a convenient tool to upgrade Jupyter\n",
         "  notebooks and Python files in a GitHub repository.\n",
         "\n",
         "To report upgrade script bugs or make feature requests, please file an issue on [GitHub](https://github.com/tensorflow/tensorflow/issues)."
@@ -600,4 +600,4 @@
   },
   "nbformat": 4,
   "nbformat_minor": 0
-}
\ No newline at end of file
+}
diff --git a/site/en/guide/migrate/validate_correctness.ipynb b/site/en/guide/migrate/validate_correctness.ipynb
index 803965a2252..a0555cdd55c 100644
--- a/site/en/guide/migrate/validate_correctness.ipynb
+++ b/site/en/guide/migrate/validate_correctness.ipynb
@@ -105,8 +105,8 @@
       },
       "outputs": [],
       "source": [
-        "# Install tf-nightly as the model mapping shim is available only in\n",
-        "# TensorFlow 2.7\n",
+        "# Install tf-nightly as the DeterministicRandomTestTool is available only in\n",
+        "# Tensorflow 2.8\n",
         "!pip install -q tf-nightly"
       ]
     },
@@ -136,7 +136,6 @@
         "import tf_slim as slim\n",
         "import sys\n",
         "\n",
-        "from unittest import mock\n",
         "\n",
         "from contextlib import contextmanager"
       ]
@@ -686,7 +685,7 @@
         "id": "BQbb8Hyk5YVi"
       },
       "source": [
-        "The following `DeterministicTestTool` object provides a context manager `scope()` that can make stateful random operations use the same seed across both TF1 graphs/sessions and eager execution,\n",
+        "The following [`v1.keras.utils.DeterministicRandomTestTool`](https://www.tensorflow.org/api_docs/python/tf/compat/v1/keras/utils/DeterministicRandomTestTool) class provides a context manager `scope()` that can make stateful random operations use the same seed across both TF1 graphs/sessions and eager execution.\n",
         "\n",
         "The tool provides two testing modes: \n",
         "1. `constant` which uses the same seed for every single operation no matter how many times it has been called and,\n",
@@ -695,67 +694,6 @@
         "This applies both to the stateful random operations used for creating and initializing variables, and to the stateful random operations used in computation (such as for dropout layers)."
       ]
     },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "VRTg0bQlcPeP"
-      },
-      "outputs": [],
-      "source": [
-        "seed_implementation = sys.modules[tf.compat.v1.get_seed.__module__]\n",
-        "\n",
-        "class DeterministicTestTool(object):\n",
-        "  def __init__(self, seed: int = 42, mode='constant'):\n",
-        "    \"\"\"Set mode to 'constant' or 'num_random_ops'. Defaults to 'constant'.\"\"\"\n",
-        "    if mode not in {'constant', 'num_random_ops'}:\n",
-        "      raise ValueError(\"Mode arg must be 'constant' or 'num_random_ops'. \" +\n",
-        "                       \"Got: {}\".format(mode))\n",
-        "\n",
-        "    self._mode = mode\n",
-        "    self._seed = seed\n",
-        "    self.operation_seed = 0\n",
-        "    self._observed_seeds = set()\n",
-        "\n",
-        "  def scope(self):\n",
-        "    tf.random.set_seed(self._seed)\n",
-        "\n",
-        "    def _get_seed(_):\n",
-        "      \"\"\"Wraps TF get_seed to make deterministic random generation easier.\n",
-        "\n",
-        "      This makes a variable's initialization (and calls that involve random\n",
-        "      number generation) depend only on how many random number generations\n",
-        "      were used in the scope so far, rather than on how many unrelated\n",
-        "      operations the graph contains.\n",
-        "\n",
-        "      Returns:\n",
-        "        Random seed tuple.\n",
-        "      \"\"\"\n",
-        "      op_seed = self.operation_seed\n",
-        "      if self._mode == \"constant\":\n",
-        "        tf.random.set_seed(op_seed)\n",
-        "      else:\n",
-        "        if op_seed in self._observed_seeds:\n",
-        "          raise ValueError(\n",
-        "              'This `DeterministicTestTool` object is trying to re-use the ' +\n",
-        "              'already-used operation seed {}. '.format(op_seed) +\n",
-        "              'It cannot guarantee random numbers will match between eager ' +\n",
-        "              'and sessions when an operation seed is reused. ' +\n",
-        "              'You most likely set ' +\n",
-        "              '`operation_seed` explicitly but used a value that caused the ' +\n",
-        "              'naturally-incrementing operation seed sequences to overlap ' +\n",
-        "              'with an already-used seed.')\n",
-        "\n",
-        "        self._observed_seeds.add(op_seed)\n",
-        "        self.operation_seed += 1\n",
-        "\n",
-        "      return (self._seed, op_seed)\n",
-        "\n",
-        "    # mock.patch internal symbols to modify the behavior of TF APIs relying on them\n",
-        "\n",
-        "    return mock.patch.object(seed_implementation, 'get_seed', wraps=_get_seed)"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -773,7 +711,7 @@
       },
       "outputs": [],
       "source": [
-        "random_tool = DeterministicTestTool()\n",
+        "random_tool = v1.keras.utils.DeterministicRandomTestTool()\n",
         "with random_tool.scope():\n",
         "  graph = tf.Graph()\n",
         "  with graph.as_default(), tf.compat.v1.Session(graph=graph) as sess:\n",
@@ -796,7 +734,7 @@
       },
       "outputs": [],
       "source": [
-        "random_tool = DeterministicTestTool()\n",
+        "random_tool = v1.keras.utils.DeterministicRandomTestTool()\n",
         "with random_tool.scope():\n",
         "  a = tf.random.uniform(shape=(3,1))\n",
         "  a = a * 3\n",
@@ -860,7 +798,7 @@
       },
       "outputs": [],
       "source": [
-        "random_tool = DeterministicTestTool(mode='num_random_ops')\n",
+        "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n",
         "with random_tool.scope():\n",
         "  graph = tf.Graph()\n",
         "  with graph.as_default(), tf.compat.v1.Session(graph=graph) as sess:\n",
@@ -883,7 +821,7 @@
       },
       "outputs": [],
       "source": [
-        "random_tool = DeterministicTestTool(mode='num_random_ops')\n",
+        "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n",
         "with random_tool.scope():\n",
         "  a = tf.random.uniform(shape=(3,1))\n",
         "  a = a * 3\n",
@@ -940,14 +878,14 @@
       },
       "outputs": [],
       "source": [
-        "random_tool = DeterministicTestTool(mode='num_random_ops')\n",
+        "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n",
         "with random_tool.scope():\n",
         "  a = tf.random.uniform(shape=(3,1))\n",
         "  a = a * 3\n",
         "  b = tf.random.uniform(shape=(3,3))\n",
         "  b = b * 3\n",
         "\n",
-        "random_tool = DeterministicTestTool(mode='num_random_ops')\n",
+        "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n",
         "with random_tool.scope():\n",
         "  b_prime = tf.random.uniform(shape=(3,3))\n",
         "  b_prime = b_prime * 3\n",
@@ -964,7 +902,7 @@
         "id": "nHhOLHyQIkAe"
       },
       "source": [
-        "To allow for debugging variations due to tracing order, `DeterministicTestTool` in `num_random_ops` mode allows you to see how many random operations have been traced with the `operation_seed` property."
+        "To allow for debugging variations due to tracing order, `DeterministicRandomTestTool` in `num_random_ops` mode allows you to see how many random operations have been traced with the `operation_seed` property."
       ]
     },
     {
@@ -975,7 +913,7 @@
       },
       "outputs": [],
       "source": [
-        "random_tool = DeterministicTestTool(mode='num_random_ops')\n",
+        "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n",
         "with random_tool.scope():\n",
         "  print(random_tool.operation_seed)\n",
         "  a = tf.random.uniform(shape=(3,1))\n",
@@ -1003,7 +941,7 @@
       },
       "outputs": [],
       "source": [
-        "random_tool = DeterministicTestTool(mode='num_random_ops')\n",
+        "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n",
         "with random_tool.scope():\n",
         "  print(random_tool.operation_seed)\n",
         "  a = tf.random.uniform(shape=(3,1))\n",
@@ -1012,7 +950,7 @@
         "  b = tf.random.uniform(shape=(3,3))\n",
         "  b = b * 3\n",
         "\n",
-        "random_tool = DeterministicTestTool(mode='num_random_ops')\n",
+        "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n",
         "with random_tool.scope():\n",
         "  random_tool.operation_seed = 1\n",
         "  b_prime = tf.random.uniform(shape=(3,3))\n",
@@ -1031,7 +969,7 @@
         "id": "bP5Kx1OcNbvM"
       },
       "source": [
-        "However, `DeterministicTestTool` disallows reusing already-used operation seeds, so make sure the auto-incremented sequences cannot overlap. This is because eager execution generates different numbers for follow-on usages of the same operation seed while TF1 graphs and sessions do not, so raising an error helps keep session and eager stateful random number generation in line."
+        "However, `DeterministicRandomTestTool` disallows reusing already-used operation seeds, so make sure the auto-incremented sequences cannot overlap. This is because eager execution generates different numbers for follow-on usages of the same operation seed while TF1 graphs and sessions do not, so raising an error helps keep session and eager stateful random number generation in line."
       ]
     },
     {
@@ -1042,7 +980,7 @@
       },
       "outputs": [],
       "source": [
-        "random_tool = DeterministicTestTool(mode='num_random_ops')\n",
+        "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n",
         "with random_tool.scope():\n",
         "  random_tool.operation_seed = 1\n",
         "  b_prime = tf.random.uniform(shape=(3,3))\n",
@@ -1067,7 +1005,7 @@
       "source": [
         "### Verifying Inference\n",
         "\n",
-        "You can now use the `DeterministicTestTool` to make sure the `InceptionResnetV2` model matches in inference, even when using the random weight initialization. For a stronger test condition due to matching program order, use the `num_random_ops` mode."
+        "You can now use the `DeterministicRandomTestTool` to make sure the `InceptionResnetV2` model matches in inference, even when using the random weight initialization. For a stronger test condition due to matching program order, use the `num_random_ops` mode."
       ]
     },
     {
@@ -1078,7 +1016,7 @@
       },
       "outputs": [],
       "source": [
-        "random_tool = DeterministicTestTool(mode='num_random_ops')\n",
+        "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n",
         "with random_tool.scope():\n",
         "  graph = tf.Graph()\n",
         "  with graph.as_default(), tf.compat.v1.Session(graph=graph) as sess:\n",
@@ -1110,7 +1048,7 @@
         "height, width = 299, 299\n",
         "num_classes = 1000\n",
         "\n",
-        "random_tool = DeterministicTestTool(mode='num_random_ops')\n",
+        "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n",
         "with random_tool.scope():\n",
         "  model = InceptionResnetV2(num_classes)\n",
         "\n",
@@ -1132,7 +1070,7 @@
       "outputs": [],
       "source": [
         "# Verify that the regularization loss and output both match\n",
-        "# when using the DeterministicTestTool:\n",
+        "# when using the DeterministicRandomTestTool:\n",
         "np.testing.assert_allclose(tf1_regularization_loss, tf2_regularization_loss.numpy(), **tol_dict)\n",
         "np.testing.assert_allclose(tf1_output, tf2_output.numpy(), **tol_dict)"
       ]
@@ -1145,7 +1083,7 @@
       "source": [
         "### Verifying Training\n",
         "\n",
-        "Because `DeterministicTestTool` works for *all* stateful random operations (including both weight initialization and computation such as dropout layers), you can use it to verify the models match in training mode as well. You can again use the `num_random_ops` mode because the program order of the stateful random ops matches."
+        "Because `DeterministicRandomTestTool` works for *all* stateful random operations (including both weight initialization and computation such as dropout layers), you can use it to verify the models match in training mode as well. You can again use the `num_random_ops` mode because the program order of the stateful random ops matches."
       ]
     },
     {
@@ -1156,7 +1094,7 @@
       },
       "outputs": [],
       "source": [
-        "random_tool = DeterministicTestTool(mode='num_random_ops')\n",
+        "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n",
         "with random_tool.scope():\n",
         "  graph = tf.Graph()\n",
         "  with graph.as_default(), tf.compat.v1.Session(graph=graph) as sess:\n",
@@ -1188,7 +1126,7 @@
         "height, width = 299, 299\n",
         "num_classes = 1000\n",
         "\n",
-        "random_tool = DeterministicTestTool(mode='num_random_ops')\n",
+        "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n",
         "with random_tool.scope():\n",
         "  model = InceptionResnetV2(num_classes)\n",
         "\n",
@@ -1210,7 +1148,7 @@
       "outputs": [],
       "source": [
         "# Verify that the regularization loss and output both match\n",
-        "# when using the DeterministicTestTool\n",
+        "# when using the DeterministicRandomTestTool\n",
         "np.testing.assert_allclose(tf1_regularization_loss, tf2_regularization_loss.numpy(), **tol_dict)\n",
         "np.testing.assert_allclose(tf1_output, tf2_output.numpy(), **tol_dict)"
       ]
@@ -1230,7 +1168,7 @@
         "id": "xpOAei5vRAPa"
       },
       "source": [
-        "Note: When using the `DeterministicTestTool` in `num_random_ops` mode, it is suggested you directly use and call the `tf.keras.layers.Layer` method decorator when testing for numerical equivalence. Embedding it within a Keras functional model or other Keras models can produce differences in stateful random operation tracing order that can be tricky to reason about or match exactly when comparing TF1.x graphs/sessions and eager execution. \n",
+        "Note: When using the `DeterministicRandomTestTool` in `num_random_ops` mode, it is suggested you directly use and call the `tf.keras.layers.Layer` method decorator when testing for numerical equivalence. Embedding it within a Keras functional model or other Keras models can produce differences in stateful random operation tracing order that can be tricky to reason about or match exactly when comparing TF1.x graphs/sessions and eager execution. \n",
         "\n",
         "For example, calling the `InceptionResnetV2` layer directly with `training=True` interleaves variable initialization with the dropout order according to the network creation order.\n",
         "\n",
@@ -1247,7 +1185,7 @@
       },
       "outputs": [],
       "source": [
-        "random_tool = DeterministicTestTool()\n",
+        "random_tool = v1.keras.utils.DeterministicRandomTestTool()\n",
         "with random_tool.scope():\n",
         "  graph = tf.Graph()\n",
         "  with graph.as_default(), tf.compat.v1.Session(graph=graph) as sess:\n",
@@ -1279,7 +1217,7 @@
         "height, width = 299, 299\n",
         "num_classes = 1000\n",
         "\n",
-        "random_tool = DeterministicTestTool()\n",
+        "random_tool = v1.keras.utils.DeterministicRandomTestTool()\n",
         "with random_tool.scope():\n",
         "  keras_input = tf.keras.Input(shape=(height, width, 3))\n",
         "  layer = InceptionResnetV2(num_classes)\n",
@@ -1303,7 +1241,7 @@
       "outputs": [],
       "source": [
         "# Verify that the regularization loss and output both match\n",
-        "# when using the DeterministicTestTool\n",
+        "# when using the DeterministicRandomTestTool\n",
         "np.testing.assert_allclose(tf1_regularization_loss, tf2_regularization_loss.numpy(), **tol_dict)\n",
         "np.testing.assert_allclose(tf1_output, tf2_output.numpy(), **tol_dict)"
       ]
@@ -1316,7 +1254,7 @@
       "source": [
         "## Step 3b or 4b (optional): Testing with pre-existing checkpoints\n",
         "\n",
-        "After step 3 or step 4 above, it can be useful to run your numerical equivalence tests when starting from pre-existing name-based checkpoints if you have some. This can test both that your legacy checkpoint loading is working correctly and that the model itself is working right. The [Reusing TF1.x checkpoints guide](./reuse_checkpoints.ipynb) covers how to reuse your pre-existing TF1.x checkpoints and transfer them over to TF2 checkpoints.\n"
+        "After step 3 or step 4 above, it can be useful to run your numerical equivalence tests when starting from pre-existing name-based checkpoints if you have some. This can test both that your legacy checkpoint loading is working correctly and that the model itself is working right. The [Reusing TF1.x checkpoints guide](./migrating_checkpoints.ipynb) covers how to reuse your pre-existing TF1.x checkpoints and transfer them over to TF2 checkpoints.\n"
       ]
     },
     {
@@ -1341,7 +1279,6 @@
     "colab": {
       "collapsed_sections": [],
       "name": "validate_correctness.ipynb",
-      "provenance": [],
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/guide/mixed_precision.ipynb b/site/en/guide/mixed_precision.ipynb
index 984f1e8abc6..a19d6f254f3 100644
--- a/site/en/guide/mixed_precision.ipynb
+++ b/site/en/guide/mixed_precision.ipynb
@@ -70,7 +70,7 @@
       "source": [
         "## Overview\n",
         "\n",
-        "Mixed precision is the use of both 16-bit and 32-bit floating-point types in a model during training to make it run faster and use less memory. By keeping certain parts of the model in the 32-bit types for numeric stability, the model will have a lower step time and train equally as well in terms of the evaluation metrics such as accuracy. This guide describes how to use the Keras mixed precision API to speed up your models. Using this API can improve performance by more than 3 times on modern GPUs and 60% on TPUs."
+        "Mixed precision is the use of both 16-bit and 32-bit floating-point types in a model during training to make it run faster and use less memory. By keeping certain parts of the model in the 32-bit types for numeric stability, the model will have a lower step time and train equally as well in terms of the evaluation metrics such as accuracy. This guide describes how to use the Keras mixed precision API to speed up your models. Using this API can improve performance by more than 3 times on modern GPUs, 60% on TPUs and more than 2 times on latest Intel CPUs."
       ]
     },
     {
@@ -81,7 +81,7 @@
       "source": [
         "Today, most models use the float32 dtype, which takes 32 bits of memory. However, there are two lower-precision dtypes, float16 and bfloat16, each which take 16 bits of memory instead. Modern accelerators can run operations faster in the 16-bit dtypes, as they have specialized hardware to run 16-bit computations and 16-bit dtypes can be read from memory faster.\n",
         "\n",
-        "NVIDIA GPUs can run operations in float16 faster than in float32, and TPUs can run operations in bfloat16 faster than float32. Therefore, these lower-precision dtypes should be used whenever possible on those devices. However, variables and a few computations should still be in float32 for numeric reasons so that the model trains to the same quality. The Keras mixed precision API allows you to use a mix of either float16 or bfloat16 with float32, to get the performance benefits from float16/bfloat16 and the numeric stability benefits from float32.\n",
+        "NVIDIA GPUs can run operations in float16 faster than in float32, and TPUs and supporting Intel CPUs can run operations in bfloat16 faster than float32. Therefore, these lower-precision dtypes should be used whenever possible on those devices. However, variables and a few computations should still be in float32 for numeric reasons so that the model trains to the same quality. The Keras mixed precision API allows you to use a mix of either float16 or bfloat16 with float32, to get the performance benefits from float16/bfloat16 and the numeric stability benefits from float32.\n",
         "\n",
         "Note: In this guide, the term \"numeric stability\" refers to how a model's quality is affected by the use of a lower-precision dtype instead of a higher precision dtype. An operation is \"numerically unstable\" in float16 or bfloat16 if running it in one of those dtypes causes the model to have worse evaluation accuracy or other metrics compared to running the operation in float32."
       ]
@@ -118,9 +118,11 @@
       "source": [
         "## Supported hardware\n",
         "\n",
-        "While mixed precision will run on most hardware, it will only speed up models on recent NVIDIA GPUs and Cloud TPUs. NVIDIA GPUs support using a mix of float16 and float32, while TPUs support a mix of bfloat16 and float32.\n",
+        "While mixed precision will run on most hardware, it will only speed up models on recent NVIDIA GPUs, Cloud TPUs and recent Intel CPUs. NVIDIA GPUs support using a mix of float16 and float32, while TPUs and Intel CPUs support a mix of bfloat16 and float32.\n",
         "\n",
-        "Among NVIDIA GPUs, those with compute capability 7.0 or higher will see the greatest performance benefit from mixed precision because they have special hardware units, called Tensor Cores, to accelerate float16 matrix multiplications and convolutions. Older GPUs offer no math performance benefit for using mixed precision, however memory and bandwidth savings can enable some speedups. You can look up the compute capability for your GPU at NVIDIA's [CUDA GPU web page](https://developer.nvidia.com/cuda-gpus). Examples of GPUs that will benefit most from mixed precision include RTX GPUs, the V100, and the A100."
+        "Among NVIDIA GPUs, those with compute capability 7.0 or higher will see the greatest performance benefit from mixed precision because they have special hardware units, called Tensor Cores, to accelerate float16 matrix multiplications and convolutions. Older GPUs offer no math performance benefit for using mixed precision, however memory and bandwidth savings can enable some speedups. You can look up the compute capability for your GPU at NVIDIA's [CUDA GPU web page](https://developer.nvidia.com/cuda-gpus). Examples of GPUs that will benefit most from mixed precision include RTX GPUs, the V100, and the A100.\n",
+       "\n",
+       "Among Intel CPUs, starting with the 4th Gen Intel Xeon Processors (code name Sapphire Rapids), will see the greatest performance benefit from mixed precision as they can accelerate bfloat16 computations using AMX instructions (requires Tensorflow 2.12 or later)."
       ]
     },
     {
@@ -129,7 +131,7 @@
         "id": "-q2hisD60F0_"
       },
       "source": [
-        "Note: If running this guide in Google Colab, the GPU runtime typically has a P100 connected. The P100 has compute capability 6.0 and is not expected to show a significant speedup.\n",
+        "Note: If running this guide in Google Colab, the GPU runtime typically has a P100 connected. The P100 has compute capability 6.0 and is not expected to show a significant speedup. If running on CPU runtime, there may be a slow down as the runtime likely has a CPU without AMX.\n",
         "\n",
         "You can check your GPU type with the following. The command only exists if the\n",
         "NVIDIA drivers are installed, so the following will raise an error otherwise."
@@ -154,7 +156,7 @@
       "source": [
         "All Cloud TPUs support bfloat16.\n",
         "\n",
-        "Even on CPUs and older GPUs, where no speedup is expected, mixed precision APIs can still be used for unit testing, debugging, or just to try out the API. On CPUs, mixed precision will run significantly slower, however."
+        "Even on older Intel CPUs, other x86 CPUs without AMX, and older GPUs, where no speedup is expected, mixed precision APIs can still be used for unit testing, debugging, or just to try out the API. However, mixed_bfloat16 on CPUs without AMX instructions and mixed_float16 on all x86 CPUs will run significantly slower."
       ]
     },
     {
@@ -235,7 +237,7 @@
         "id": "MOFEcna28o4T"
       },
       "source": [
-        "As mentioned before, the `mixed_float16` policy will most significantly improve performance on NVIDIA GPUs with compute capability of at least 7.0. The policy will run on other GPUs and CPUs but may not improve performance. For TPUs, the `mixed_bfloat16` policy should be used instead."
+        "As mentioned before, the `mixed_float16` policy will most significantly improve performance on NVIDIA GPUs with compute capability of at least 7.0. The policy will run on other GPUs and CPUs but may not improve performance. For TPUs and CPUs, the `mixed_bfloat16` policy should be used instead."
       ]
     },
     {
@@ -411,7 +413,7 @@
         "id": "0Sm8FJHegVRN"
       },
       "source": [
-        "This example cast the input data from int8 to float32. You don't cast to float16 since the division by 255 is on the CPU, which runs float16 operations slower than float32 operations. In this case, the performance difference in negligible, but in general you should run input processing math in float32 if it runs on the CPU. The first layer of the model will cast the inputs to float16, as each layer casts floating-point inputs to its compute dtype.\n",
+        "This example casts the input data from int8 to float32. You don't cast to float16 since the division by 255 is on the CPU, which runs float16 operations slower than float32 operations. In this case, the performance difference is negligible, but in general you should run input processing math in float32 if it runs on the CPU. The first layer of the model will cast the inputs to float16, as each layer casts floating-point inputs to its compute dtype.\n",
         "\n",
         "The initial weights of the model are retrieved. This will allow training from scratch again by loading the weights."
       ]
@@ -465,7 +467,7 @@
         " \n",
         "If you are running this guide in Colab, you can compare the performance of mixed precision with float32. To do so, change the policy from `mixed_float16` to `float32` in the \"Setting the dtype policy\" section, then rerun all the cells up to this point. On GPUs with compute capability 7.X, you should see the time per step significantly increase, indicating mixed precision sped up the model.  Make sure to change the policy back to `mixed_float16` and rerun the cells before continuing with the guide.\n",
         "\n",
-        "On GPUs with compute capability of at least 8.0 (Ampere GPUs and above), you likely will see no performance improvement in the toy model in this guide when using mixed precision compared to float32. This is due to the use of [TensorFloat-32](https://www.tensorflow.org/api_docs/python/tf/config/experimental/enable_tensor_float_32_execution), which automatically uses lower precision math in certain float32 ops such as `tf.linalg.matmul`. TensorFloat-32 gives some of the performance advantages of mixed precision when using float32. However, in real-world models, you will still typically see significantly performance improvements from mixed precision due to memory bandwidth savings and ops which TensorFloat-32 does not support.\n",
+        "On GPUs with compute capability of at least 8.0 (Ampere GPUs and above), you likely will see no performance improvement in the toy model in this guide when using mixed precision compared to float32. This is due to the use of [TensorFloat-32](https://www.tensorflow.org/api_docs/python/tf/config/experimental/enable_tensor_float_32_execution), which automatically uses lower precision math in certain float32 ops such as `tf.linalg.matmul`. TensorFloat-32 gives some of the performance advantages of mixed precision when using float32. However, in real-world models, you will still typically experience significant performance improvements from mixed precision due to memory bandwidth savings and ops which TensorFloat-32 does not support.\n",
         "\n",
         "If running mixed precision on a TPU, you will not see as much of a performance gain compared to running mixed precision on GPUs, especially pre-Ampere GPUs. This is because TPUs do certain ops in bfloat16 under the hood even with the default dtype policy of float32. This is similar to how Ampere GPUs use TensorFloat-32 by default. Compared to Ampere GPUs, TPUs typically see less performance gains with mixed precision on real-world models.\n",
         "\n",
@@ -480,7 +482,9 @@
       "source": [
         "## Loss scaling\n",
         "\n",
-        "Loss scaling is a technique which `tf.keras.Model.fit` automatically performs with the `mixed_float16` policy to avoid numeric underflow. This section describes what loss scaling is and the next section describes how to use it with a custom training loop."
+        "Loss scaling is a technique which `tf.keras.Model.fit` automatically performs with the `mixed_float16` policy to avoid numeric underflow. This section describes what loss scaling is and the next section describes how to use it with a custom training loop.\n",
+        "\n",
+        "Note: When using `mixed_bfloat16` policy, there is no need to do loss scaling."
       ]
     },
     {
@@ -612,7 +616,7 @@
         "id": "FVy5gnBqTE9z"
       },
       "source": [
-        "If you want, it is possible choose an explicit loss scale or otherwise customize the loss scaling behavior, but it is highly recommended to keep the default loss scaling behavior, as it has been found to work well on all known models. See the `tf.keras.mixed_precision.LossScaleOptimizer` documention if you want to customize the loss scaling behavior."
+        "If you want, it is possible choose an explicit loss scale or otherwise customize the loss scaling behavior, but it is highly recommended to keep the default loss scaling behavior, as it has been found to work well on all known models. See the `tf.keras.mixed_precision.LossScaleOptimizer` documentation if you want to customize the loss scaling behavior."
       ]
     },
     {
@@ -806,20 +810,21 @@
       "source": [
         "## Summary\n",
         "\n",
-        "- You should use mixed precision if you use TPUs or NVIDIA GPUs with at least compute capability 7.0, as it will improve performance by up to 3x.\n",
+        "- You should use mixed precision if you use TPUs, NVIDIA GPUs with at least compute capability 7.0, or Intel CPUs with support for AMX instructions, as it will improve performance by up to 3x.\n",
         "- You can use mixed precision with the following lines:\n",
         "\n",
         "  ```python\n",
-        "  # On TPUs, use 'mixed_bfloat16' instead\n",
+        "  # On TPUs and CPUs, use 'mixed_bfloat16' instead\n",
         "  mixed_precision.set_global_policy('mixed_float16')\n",
         "  ```\n",
         "\n",
         "* If your model ends in softmax, make sure it is float32. And regardless of what your model ends in, make sure the output is float32.\n",
         "* If you use a custom training loop with `mixed_float16`, in addition to the above lines, you need to wrap your optimizer with a `tf.keras.mixed_precision.LossScaleOptimizer`. Then call `optimizer.get_scaled_loss` to scale the loss, and `optimizer.get_unscaled_gradients` to unscale the gradients.\n",
+        "* If you use a custom training loop with `mixed_bfloat16`, setting the global_policy mentioned above is sufficient.\n",
         "* Double the training batch size if it does not reduce evaluation accuracy\n",
         "* On GPUs, ensure most tensor dimensions are a multiple of $8$ to maximize performance\n",
         "\n",
-        "For more examples of mixed precision using the `tf.keras.mixed_precision` API, check the [official models repository](https://github.com/tensorflow/models/tree/master/official). Most official models, such as [ResNet](https://github.com/tensorflow/models/tree/master/official/vision/image_classification) and [Transformer](https://github.com/tensorflow/models/blob/master/official/nlp/transformer), will run using mixed precision by passing `--dtype=fp16`.\n"
+        "For an example of mixed precision using the `tf.keras.mixed_precision` API, check [functions and classes related to training performance](https://github.com/tensorflow/models/blob/master/official/modeling/performance.py). Check out the official models, such as [Transformer](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/transformer_encoder_block.py), for details.\n"
       ]
     }
   ],
diff --git a/site/en/guide/profiler.md b/site/en/guide/profiler.md
index 1cd19c109fe..dee8a5a84af 100644
--- a/site/en/guide/profiler.md
+++ b/site/en/guide/profiler.md
@@ -55,7 +55,7 @@ found.
 When you run profiling with CUDA® Toolkit in a Docker environment or on Linux,
 you may encounter issues related to insufficient CUPTI privileges
 (`CUPTI_ERROR_INSUFFICIENT_PRIVILEGES`). Go to the
-[NVIDIA Developer Docs](https://developer.nvidia.com/nvidia-development-tools-solutions-ERR_NVGPUCTRPERM-permission-issue-performance-counters){:.external}
+[NVIDIA Developer Docs](https://developer.nvidia.com/nvidia-development-tools-solutions-ERR_NVGPUCTRPERM-permission-issue-performance-counters)
 to learn more about how you can resolve these issues on Linux.
 
 To resolve CUPTI privilege issues in a Docker environment, run
@@ -694,7 +694,7 @@ first few batches to avoid inaccuracies due to initialization overhead.
     An example for profiling multiple workers:
 
     ```python
-    # E.g. your worker IP addresses are 10.0.0.2, 10.0.0.3, 10.0.0.4, and you
+    # E.g., your worker IP addresses are 10.0.0.2, 10.0.0.3, 10.0.0.4, and you
     # would like to profile for a duration of 2 seconds.
     tf.profiler.experimental.client.trace(
         'grpc://10.0.0.2:8466,grpc://10.0.0.3:8466,grpc://10.0.0.4:8466',
@@ -845,7 +845,7 @@ more efficient by casting to different data types <b><i>after</i></b> applying
 spatial transformations, such as flipping, cropping, rotating, etc.
 
 Note: Some ops like `tf.image.resize` transparently change the `dtype` to
-`fp32`. Make sure you normalize your data to lie between `0` and `1` if its not
+`fp32`. Make sure you normalize your data to lie between `0` and `1` if it's not
 done automatically. Skipping this step could lead to `NaN` errors if you have
 enabled [AMP](https://developer.nvidia.com/automatic-mixed-precision).
 
diff --git a/site/en/guide/ragged_tensor.ipynb b/site/en/guide/ragged_tensor.ipynb
index 4bc0d679499..ba0be2928ce 100644
--- a/site/en/guide/ragged_tensor.ipynb
+++ b/site/en/guide/ragged_tensor.ipynb
@@ -81,6 +81,7 @@
       },
       "outputs": [],
       "source": [
+        "!pip install --pre -U tensorflow\n",
         "import math\n",
         "import tensorflow as tf"
       ]
@@ -109,7 +110,7 @@
       "source": [
         "### What you can do with a ragged tensor\n",
         "\n",
-        "Ragged tensors are supported by more than a hundred TensorFlow operations, including math operations (such as `tf.add` and `tf.reduce_mean`), array operations (such as `tf.concat` and `tf.tile`), string manipulation ops (such as `tf.substr`), control flow operations (such as `tf.while_loop` and `tf.map_fn`), and many others:"
+        "Ragged tensors are supported by more than a hundred TensorFlow operations, including math operations (such as `tf.add` and `tf.reduce_mean`), array operations (such as `tf.concat` and `tf.tile`), string manipulation ops (such as `tf.strings.substr`), control flow operations (such as `tf.while_loop` and `tf.map_fn`), and many others:"
       ]
     },
     {
@@ -673,14 +674,14 @@
       "source": [
         "### Keras\n",
         "\n",
-        "[tf.keras](https://www.tensorflow.org/guide/keras) is TensorFlow's high-level API for building and training deep learning models. Ragged tensors may be passed as inputs to a Keras model by setting `ragged=True` on `tf.keras.Input` or `tf.keras.layers.InputLayer`.  Ragged tensors may also be passed between Keras layers, and returned by Keras models.  The following example shows a toy LSTM model that is trained using ragged tensors."
+        "[tf.keras](https://www.tensorflow.org/guide/keras) is TensorFlow's high-level API for building and training deep learning models. It doesn't have ragged support. But it does support masked tensors. So the easiest way to use a ragged tensor in a Keras model  is to convert the ragged tensor to a dense tensor, using `.to_tensor()` and then using Keras's builtin masking:"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "pHls7hQVJlk5"
+        "id": "ucYf2sSzTvQo"
       },
       "outputs": [],
       "source": [
@@ -690,26 +691,77 @@
         "     'She turned me into a newt.',\n",
         "     'A newt?',\n",
         "     'Well, I got better.'])\n",
-        "is_question = tf.constant([True, False, True, False])\n",
-        "\n",
+        "is_question = tf.constant([True, False, True, False])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "MGYKmizJTw8B"
+      },
+      "outputs": [],
+      "source": [
         "# Preprocess the input strings.\n",
         "hash_buckets = 1000\n",
         "words = tf.strings.split(sentences, ' ')\n",
         "hashed_words = tf.strings.to_hash_bucket_fast(words, hash_buckets)\n",
-        "\n",
+        "hashed_words.to_list()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "7FTujwOlUT8J"
+      },
+      "outputs": [],
+      "source": [
+        "hashed_words.to_tensor()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "vzWudaESUBOZ"
+      },
+      "outputs": [],
+      "source": [
+        "tf.keras.Input?"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "pHls7hQVJlk5"
+      },
+      "outputs": [],
+      "source": [
         "# Build the Keras model.\n",
         "keras_model = tf.keras.Sequential([\n",
-        "    tf.keras.layers.Input(shape=[None], dtype=tf.int64, ragged=True),\n",
-        "    tf.keras.layers.Embedding(hash_buckets, 16),\n",
-        "    tf.keras.layers.LSTM(32, use_bias=False),\n",
+        "    tf.keras.layers.Embedding(hash_buckets, 16, mask_zero=True),\n",
+        "    tf.keras.layers.LSTM(32, return_sequences=True, use_bias=False),\n",
+        "    tf.keras.layers.GlobalAveragePooling1D(),\n",
         "    tf.keras.layers.Dense(32),\n",
         "    tf.keras.layers.Activation(tf.nn.relu),\n",
         "    tf.keras.layers.Dense(1)\n",
         "])\n",
         "\n",
         "keras_model.compile(loss='binary_crossentropy', optimizer='rmsprop')\n",
-        "keras_model.fit(hashed_words, is_question, epochs=5)\n",
-        "print(keras_model.predict(hashed_words))"
+        "keras_model.fit(hashed_words.to_tensor(), is_question, epochs=5)\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "1IAjjmdTU9OU"
+      },
+      "outputs": [],
+      "source": [
+        "print(keras_model.predict(hashed_words.to_tensor()))"
       ]
     },
     {
@@ -798,7 +850,7 @@
       "source": [
         "### Datasets\n",
         "\n",
-        "[tf.data](https://www.tensorflow.org/guide/data) is an API that enables you to build complex input pipelines from simple, reusable pieces.  Its core data structure is `tf.data.Dataset`, which represents a sequence of elements, in which each element consists of one or more components. "
+        "[tf.data](https://www.tensorflow.org/guide/data) is an API that enables you to build complex input pipelines from simple, reusable pieces.  Its core data structure is `tf.data.Dataset`, which represents a sequence of elements, in which each element consists of one or more components."
       ]
     },
     {
@@ -1077,9 +1129,11 @@
         "import tempfile\n",
         "\n",
         "keras_module_path = tempfile.mkdtemp()\n",
-        "tf.saved_model.save(keras_model, keras_module_path)\n",
-        "imported_model = tf.saved_model.load(keras_module_path)\n",
-        "imported_model(hashed_words)"
+        "keras_model.save(keras_module_path+\"/my_model.keras\")\n",
+        "\n",
+        "imported_model = tf.keras.models.load_model(keras_module_path+\"/my_model.keras\")\n",
+        "\n",
+        "imported_model(hashed_words.to_tensor())"
       ]
     },
     {
@@ -1439,7 +1493,7 @@
         "\n",
         "1.  Use `tf.RaggedTensor.to_list` to convert the ragged tensor to a nested Python list.\n",
         "2.  Use `tf.RaggedTensor.numpy` to convert the ragged tensor to a NumPy array whose values are nested NumPy arrays.\n",
-        "3.  Decompose the ragged tensor into its components, using the `tf.RaggedTensor.values` and `tf.RaggedTensor.row_splits` properties, or row-paritioning methods such as `tf.RaggedTensor.row_lengths` and `tf.RaggedTensor.value_rowids`.\n",
+        "3.  Decompose the ragged tensor into its components, using the `tf.RaggedTensor.values` and `tf.RaggedTensor.row_splits` properties, or row-partitioning methods such as `tf.RaggedTensor.row_lengths` and `tf.RaggedTensor.value_rowids`.\n",
         "4.  Use Python indexing to select values from the ragged tensor.\n"
       ]
     },
@@ -1459,13 +1513,267 @@
         "print(\"Indexed value:\", rt[1].numpy())"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "J87jMZa0M_YW"
+      },
+      "source": [
+        "## Ragged Shapes\n",
+        "\n",
+        "The shape of a tensor specifies the size of each axis.  For example, the shape of `[[1, 2], [3, 4], [5, 6]]` is `[3, 2]`, since there are 3 rows and 2 columns.  TensorFlow has two separate but related ways to describe shapes:\n",
+        "\n",
+        "* ***static shape***: Information about axis sizes that is known statically (e.g., while tracing a `tf.function`).  May be partially specified.\n",
+        "\n",
+        "* ***dynamic shape***: Runtime information about the axis sizes."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "IOETE_OLPLZo"
+      },
+      "source": [
+        "### Static shape\n",
+        "\n",
+        "A Tensor's static shape contains information about its axis sizes that is known at graph-construction time.  For both `tf.Tensor` and `tf.RaggedTensor`, it is available using the `.shape` property, and is encoded using `tf.TensorShape`:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "btGDjT4uNgQy"
+      },
+      "outputs": [],
+      "source": [
+        "x = tf.constant([[1, 2], [3, 4], [5, 6]])\n",
+        "x.shape  # shape of a tf.tensor"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "__OgvmrGPEjq"
+      },
+      "outputs": [],
+      "source": [
+        "rt = tf.ragged.constant([[1], [2, 3], [], [4]])\n",
+        "rt.shape  # shape of a tf.RaggedTensor"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9EWnQd3qPWaw"
+      },
+      "source": [
+        "The static shape of a ragged dimension is always `None` (i.e., unspecified).  However, the inverse is not true -- if a `TensorShape` dimension is `None`, then that could indicate that the dimension is ragged, *or* it could indicate that the dimension is uniform but that its size is not statically known."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "75E9YXYMNfne"
+      },
+      "source": [
+        "### Dynamic shape\n",
+        "\n",
+        "A tensor's dynamic shape contains information about its axis sizes that is known when the graph is run.  It is constructed using the `tf.shape` operation.  For `tf.Tensor`, `tf.shape` returns the shape as a 1D integer `Tensor`, where `tf.shape(x)[i]` is the size of axis `i`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "kWJ7Cn1EQTD_"
+      },
+      "outputs": [],
+      "source": [
+        "x = tf.constant([['a', 'b'], ['c', 'd'], ['e', 'f']])\n",
+        "tf.shape(x)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "BeZEfxwmRcSv"
+      },
+      "source": [
+        "However, a 1D `Tensor` is not expressive enough to describe the shape of a `tf.RaggedTensor`.  Instead, the dynamic shape for ragged tensors is encoded using a dedicated type, `tf.experimental.DynamicRaggedShape`.  In the following example, the `DynamicRaggedShape` returned by `tf.shape(rt)` indicates that the ragged tensor has 4 rows, with lengths 1, 3, 0, and 2:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "nZc2wqgQQUFU"
+      },
+      "outputs": [],
+      "source": [
+        "rt = tf.ragged.constant([[1], [2, 3, 4], [], [5, 6]])\n",
+        "rt_shape = tf.shape(rt)\n",
+        "print(rt_shape)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "EphU60YvTf98"
+      },
+      "source": [
+        "#### Dynamic shape: operations\n",
+        "\n",
+        "`DynamicRaggedShape`s can be used with most TensorFlow ops that expect shapes, including `tf.reshape`, `tf.zeros`, `tf.ones`. `tf.fill`, `tf.broadcast_dynamic_shape`, and `tf.broadcast_to`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "pclAODLXT6Gr"
+      },
+      "outputs": [],
+      "source": [
+        "print(f\"tf.reshape(x, rt_shape) = {tf.reshape(x, rt_shape)}\")\n",
+        "print(f\"tf.zeros(rt_shape) = {tf.zeros(rt_shape)}\")\n",
+        "print(f\"tf.ones(rt_shape) = {tf.ones(rt_shape)}\")\n",
+        "print(f\"tf.fill(rt_shape, 9) = {tf.fill(rt_shape, 'x')}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rNP_3_btRAHj"
+      },
+      "source": [
+        "#### Dynamic shape: indexing and slicing\n",
+        "\n",
+        "`DynamicRaggedShape` can be also be indexed to get the sizes of uniform dimensions.  For example, we can find the number of rows in a raggedtensor using `tf.shape(rt)[0]` (just as we would for a non-ragged tensor):"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "MzQvPhsxS6HN"
+      },
+      "outputs": [],
+      "source": [
+        "rt_shape[0]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wvr2iT6zS_e8"
+      },
+      "source": [
+        "However, it is an error to use indexing to try to retrieve the size of a ragged dimension, since it doesn't have a single size.  (Since `RaggedTensor` keeps track of which axes are ragged, this error is only thrown during eager execution or when tracing a `tf.function`; it will never be thrown when executing a concrete function.)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "HgGMk0LeTGik"
+      },
+      "outputs": [],
+      "source": [
+        "try:\n",
+        "  rt_shape[1]\n",
+        "except ValueError as e:\n",
+        "  print(\"Got expected ValueError:\", e)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5QUsdawGU0SM"
+      },
+      "source": [
+        "`DynamicRaggedShape`s can also be sliced, as long as the slice either begins with axis `0`, or contains only dense dimensions."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "APT72EaBU70t"
+      },
+      "outputs": [],
+      "source": [
+        "rt_shape[:1]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "a-Wl9IrQXcdY"
+      },
+      "source": [
+        "#### Dynamic shape: encoding\n",
+        "\n",
+        "`DynamicRaggedShape` is encoded using two fields:\n",
+        "\n",
+        "* `inner_shape`: An integer vector giving the shape of a dense `tf.Tensor`.\n",
+        "* `row_partitions`: A list of `tf.experimental.RowPartition` objects, describing how the outermost dimension of that inner shape should be partitioned to add ragged axes.\n",
+        "\n",
+        "For more information about row partitions, see the \"RaggedTensor encoding\" section below, and the API docs for `tf.experimental.RowPartition`."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jfeY9tTcV_zL"
+      },
+      "source": [
+        "#### Dynamic shape: construction\n",
+        "\n",
+        "`DynamicRaggedShape` is most often constructed by applying `tf.shape` to a `RaggedTensor`, but it can also be constructed directly:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "NSRgD667WwIZ"
+      },
+      "outputs": [],
+      "source": [
+        "tf.experimental.DynamicRaggedShape(\n",
+        "    row_partitions=[tf.experimental.RowPartition.from_row_lengths([5, 3, 2])],\n",
+        "    inner_shape=[10, 8])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "EjzVjs9MXIIA"
+      },
+      "source": [
+        "If the lengths of all rows are known statically, `DynamicRaggedShape.from_lengths` can also be used to construct a dynamic ragged shape.  (This is mostly useful for testing and demonstration code, since it's rare for the lengths of ragged dimensions to be known statically).\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "gMxCzADUYIjY"
+      },
+      "outputs": [],
+      "source": [
+        "tf.experimental.DynamicRaggedShape.from_lengths([4, (2, 1, 0, 8), 12])"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "EdljbNPq-PWS"
       },
       "source": [
-        "## Broadcasting\n",
+        "### Broadcasting\n",
         "\n",
         "Broadcasting is the process of making tensors with different shapes have compatible shapes for elementwise operations. For more background on broadcasting, refer to:\n",
         "\n",
@@ -1491,7 +1799,7 @@
         "id": "-S2hOUWx-PWU"
       },
       "source": [
-        "### Broadcasting examples"
+        "#### Broadcasting examples"
       ]
     },
     {
@@ -1870,7 +2178,6 @@
   ],
   "metadata": {
     "colab": {
-      "collapsed_sections": [],
       "name": "ragged_tensor.ipynb",
       "toc_visible": true
     },
diff --git a/site/en/guide/random_numbers.ipynb b/site/en/guide/random_numbers.ipynb
index 37c83ae76a0..f8b824ad906 100644
--- a/site/en/guide/random_numbers.ipynb
+++ b/site/en/guide/random_numbers.ipynb
@@ -166,7 +166,7 @@
       "source": [
         "See the *Algorithms* section below for more information about it.\n",
         "\n",
-        "Another way to create a generator is with `Generator.from_non_deterministic_state`. A generator created this way will start from a non-deterministic state, depending on e.g. time and OS."
+        "Another way to create a generator is with `Generator.from_non_deterministic_state`. A generator created this way will start from a non-deterministic state, depending on e.g., time and OS."
       ]
     },
     {
@@ -268,7 +268,7 @@
       "source": [
         "Note: In theory, you can use constructors such as `from_seed` instead of `split` here to obtain a new generator, but by doing so you lose the guarantee that the new generator is independent of the global generator. You will also run the risk that you may accidentally create two generators with the same seed or with seeds that lead to overlapping random-number streams.\n",
         "\n",
-        "You can do splitting recursively, calling `split` on splitted generators. There are no limits (barring integer overflow) on the depth of recursions."
+        "You can do splitting recursively, calling `split` on split generators. There are no limits (barring integer overflow) on the depth of recursions."
       ]
     },
     {
@@ -325,7 +325,7 @@
       "source": [
         "#### Creating generators inside `tf.function` \n",
         "\n",
-        "Creation of generators inside a `tf.function` can only happend during the first run of the function. "
+        "Creation of generators inside a `tf.function` can only happened during the first run of the function. "
       ]
     },
     {
diff --git a/site/en/guide/saved_model.ipynb b/site/en/guide/saved_model.ipynb
index 355a8240977..2601e504669 100644
--- a/site/en/guide/saved_model.ipynb
+++ b/site/en/guide/saved_model.ipynb
@@ -74,9 +74,10 @@
         "- Low-level `tf.saved_model` API. This document describes how to use this API in detail.\n",
         " - Save: `tf.saved_model.save(model, path_to_dir)`\n",
         " - Load: `model = tf.saved_model.load(path_to_dir)`\n",
-        "- High-level `tf.keras.Model` API. Refer to [the keras save and serialize guide](keras/save_and_serialize.ipynb).\n",
+        "- High-level `tf.keras.Model` API. Refer to [the keras save and serialize guide](https://www.tensorflow.org/guide/keras/save_and_serialize).\n",
         "- If you just want to save/load weights during training, refer to [the checkpoints guide](./checkpoint.ipynb).\n",
-        "\n"
+        "\n",
+        "Caution: TensorFlow models are code and it is important to be careful with untrusted code. Learn more in [Using TensorFlow securely](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md).\n"
       ]
     },
     {
@@ -85,8 +86,24 @@
         "id": "9SuIC7FiI9g8"
       },
       "source": [
-        "## Creating a SavedModel from Keras\n",
-        "\n",
+        "## Creating a SavedModel from Keras"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "AtSmftAvhJvE"
+      },
+      "source": [
+        "Deprecated: For Keras objects, it's recommended to use the new high-level `.keras` format and `tf.keras.Model.export`, as demonstrated in the guide [here](https://www.tensorflow.org/guide/keras/save_and_serialize). The low-level SavedModel format continues to be supported for existing code."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "eLSOptpYhJvE"
+      },
+      "source": [
         "For a quick introduction, this section exports a pre-trained Keras model and serves image classification requests with it. The rest of the guide will fill in details and discuss other ways to create SavedModels."
       ]
     },
@@ -132,10 +149,10 @@
         "file = tf.keras.utils.get_file(\n",
         "    \"grace_hopper.jpg\",\n",
         "    \"https://storage.googleapis.com/download.tensorflow.org/example_images/grace_hopper.jpg\")\n",
-        "img = tf.keras.preprocessing.image.load_img(file, target_size=[224, 224])\n",
+        "img = tf.keras.utils.load_img(file, target_size=[224, 224])\n",
         "plt.imshow(img)\n",
         "plt.axis('off')\n",
-        "x = tf.keras.preprocessing.image.img_to_array(img)\n",
+        "x = tf.keras.utils.img_to_array(img)\n",
         "x = tf.keras.applications.mobilenet.preprocess_input(\n",
         "    x[tf.newaxis,...])"
       ]
@@ -353,7 +370,9 @@
       "source": [
         "The `assets` directory contains files used by the TensorFlow graph, for example text files used to initialize vocabulary tables. It is unused in this example.\n",
         "\n",
-        "SavedModels may have an `assets.extra` directory for any files not used by the TensorFlow graph, for example information for consumers about what to do with the SavedModel. TensorFlow itself does not use this directory."
+        "SavedModels may have an `assets.extra` directory for any files not used by the TensorFlow graph, for example information for consumers about what to do with the SavedModel. TensorFlow itself does not use this directory.\n",
+        "\n",
+        "The `fingerprint.pb` file contains the [fingerprint](https://en.wikipedia.org/wiki/Fingerprint_(computing)) of the SavedModel, which is composed of several 64-bit hashes that uniquely identify the contents of the SavedModel. The fingerprinting API is currently experimental, but `tf.saved_model.experimental.read_fingerprint` can be used to read the SavedModel fingerprint into a `tf.saved_model.experimental.Fingerprint` object."
       ]
     },
     {
@@ -407,7 +426,7 @@
         "\n",
         "Briefly, `tf.function` works by tracing the Python code to generate a ConcreteFunction (a callable wrapper around `tf.Graph`). When saving a `tf.function`, you're really saving the `tf.function`'s cache of ConcreteFunctions.\n",
         "\n",
-        "To learn more about the relationship between `tf.function` and ConcreteFunctions, see the [tf.function guide](../../guide/function)."
+        "To learn more about the relationship between `tf.function` and ConcreteFunctions, refer to the [tf.function guide](function.ipynb)."
       ]
     },
     {
@@ -492,7 +511,7 @@
       },
       "outputs": [],
       "source": [
-        "optimizer = tf.optimizers.SGD(0.05)\n",
+        "optimizer = tf.keras.optimizers.SGD(0.05)\n",
         "\n",
         "def train_step():\n",
         "  with tf.GradientTape() as tape:\n",
@@ -620,7 +639,7 @@
       "outputs": [],
       "source": [
         "imported_with_signatures = tf.saved_model.load(module_with_signature_path)\n",
-        "list(imported_with_signatures.signatures.keys())\n"
+        "list(imported_with_signatures.signatures.keys())  # [\"serving_default\"]"
       ]
     },
     {
@@ -655,8 +674,12 @@
       },
       "outputs": [],
       "source": [
-        "imported_with_multiple_signatures = tf.saved_model.load(module_multiple_signatures_path)\n",
-        "list(imported_with_multiple_signatures.signatures.keys())"
+        "imported_with_multiple_signatures = tf.saved_model.load(\n",
+        "    module_multiple_signatures_path\n",
+        ")\n",
+        "list(\n",
+        "    imported_with_multiple_signatures.signatures.keys()\n",
+        ")  # [\"serving_default\", \"array_input\"]"
       ]
     },
     {
@@ -681,7 +704,7 @@
         "    super(CustomModuleWithOutputName, self).__init__()\n",
         "    self.v = tf.Variable(1.)\n",
         "\n",
-        "  @tf.function(input_signature=[tf.TensorSpec([], tf.float32)])\n",
+        "  @tf.function(input_signature=[tf.TensorSpec(None, tf.float32)])\n",
         "  def __call__(self, x):\n",
         "    return {'custom_output_name': x * self.v}\n",
         "\n",
@@ -701,7 +724,41 @@
       "outputs": [],
       "source": [
         "imported_with_output_name = tf.saved_model.load(module_output_path)\n",
-        "imported_with_output_name.signatures['serving_default'].structured_outputs"
+        "imported_with_output_name.signatures[\n",
+        "    'serving_default'\n",
+        "].structured_outputs  # {'custom_output_name': TensorSpec(shape=<unknown>, dtype=tf.float32, name='custom_output_name')}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Q4bCK55x1IBW"
+      },
+      "source": [
+        "## Proto-splitting\n",
+        "\n",
+        "Note: This feature will be part of the TensorFlow 2.15 release. It is currently available in the nightly build which you cqan install with `pip install tf-nightly`.\n",
+        "\n",
+        "Due to limits of the protobuf implementation, proto sizes cannot exceed 2GB. This can lead to the following errors when attempting to save very large models:\n",
+        "\n",
+        "```\n",
+        "ValueError: Message tensorflow.SavedModel exceeds maximum protobuf size of 2GB: ...\n",
+        "```\n",
+        "\n",
+        "```\n",
+        "google.protobuf.message.DecodeError: Error parsing message as the message exceeded the protobuf limit with type 'tensorflow.GraphDef'\n",
+        "```\n",
+        "\n",
+        "If you wish to save models that exceed the 2GB limit, then you'll need to save using the new proto-splitting option:\n",
+        "\n",
+        "```python\n",
+        "tf.saved_model.save(\n",
+        "  ...,\n",
+        "  options=tf.saved_model.SaveOptions(experimental_image_format=True)\n",
+        ")\n",
+        "```\n",
+        "\n",
+        "More information can be found in the [Proto Splitter / Merger Library guide](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/proto_splitter/g3doc/in-depth-guide.md)."
       ]
     },
     {
@@ -759,7 +816,7 @@
         "additional command to build `saved_model_cli`:\n",
         "\n",
         "```\n",
-        "$ bazel build tensorflow/python/tools:saved_model_cli\n",
+        "$ bazel build //tensorflow/python/tools:saved_model_cli\n",
         "```\n",
         "\n",
         "### Overview of commands\n",
@@ -975,7 +1032,6 @@
   ],
   "metadata": {
     "colab": {
-      "collapsed_sections": [],
       "name": "saved_model.ipynb",
       "provenance": [],
       "toc_visible": true
diff --git a/site/en/guide/sparse_tensor.ipynb b/site/en/guide/sparse_tensor.ipynb
index 2395c6e6365..3d4daca7fad 100644
--- a/site/en/guide/sparse_tensor.ipynb
+++ b/site/en/guide/sparse_tensor.ipynb
@@ -79,7 +79,7 @@
       "source": [
         "## Sparse tensors in TensorFlow\n",
         "\n",
-        "TensorFlow represents sparse tensors through the `tf.SparseTensor` object. Currently, sparse tensors in TensorFlow are encoded using the coordinate list (COO) format. This encoding format is optimized for hyper-sparse matrices such as embeddings.\n",
+        "TensorFlow represents sparse tensors through the `tf.sparse.SparseTensor` object. Currently, sparse tensors in TensorFlow are encoded using the coordinate list (COO) format. This encoding format is optimized for hyper-sparse matrices such as embeddings.\n",
         "\n",
         "The COO encoding for sparse tensors is comprised of:\n",
         "\n",
@@ -87,9 +87,9 @@
         "  * `indices`: A 2D tensor with shape `[N, rank]`, containing the indices of the nonzero values.\n",
         "  * `dense_shape`: A 1D tensor with shape `[rank]`, specifying the shape of the tensor.\n",
         "\n",
-        "A ***nonzero*** value in the context of a `tf.SparseTensor` is a value that's not explicitly encoded. It is possible to explicitly include zero values in the `values` of a COO sparse matrix, but these \"explicit zeros\" are generally not included when referring to nonzero values in a sparse tensor.\n",
+        "A ***nonzero*** value in the context of a `tf.sparse.SparseTensor` is a value that's not explicitly encoded. It is possible to explicitly include zero values in the `values` of a COO sparse matrix, but these \"explicit zeros\" are generally not included when referring to nonzero values in a sparse tensor.\n",
         "\n",
-        "Note: `tf.SparseTensor` does not require that indices/values be in any particular order, but several ops assume that they're in row-major order. Use `tf.sparse.reorder` to create a copy of the sparse tensor that is sorted in the canonical row-major order. "
+        "Note: `tf.sparse.SparseTensor` does not require that indices/values be in any particular order, but several ops assume that they're in row-major order. Use `tf.sparse.reorder` to create a copy of the sparse tensor that is sorted in the canonical row-major order. "
       ]
     },
     {
@@ -98,7 +98,7 @@
         "id": "6Aq7ruwlyz79"
       },
       "source": [
-        "## Creating a `tf.SparseTensor`\n",
+        "## Creating a `tf.sparse.SparseTensor`\n",
         "\n",
         "Construct sparse tensors by directly specifying their `values`, `indices`, and `dense_shape`."
       ]
@@ -122,7 +122,7 @@
       },
       "outputs": [],
       "source": [
-        "st1 = tf.SparseTensor(indices=[[0, 3], [2, 4]],\n",
+        "st1 = tf.sparse.SparseTensor(indices=[[0, 3], [2, 4]],\n",
         "                      values=[10, 20],\n",
         "                      dense_shape=[3, 10])"
       ]
@@ -252,11 +252,11 @@
       },
       "outputs": [],
       "source": [
-        "st_a = tf.SparseTensor(indices=[[0, 2], [3, 4]],\n",
+        "st_a = tf.sparse.SparseTensor(indices=[[0, 2], [3, 4]],\n",
         "                       values=[31, 2], \n",
         "                       dense_shape=[4, 10])\n",
         "\n",
-        "st_b = tf.SparseTensor(indices=[[0, 2], [7, 0]],\n",
+        "st_b = tf.sparse.SparseTensor(indices=[[0, 2], [3, 0]],\n",
         "                       values=[56, 38],\n",
         "                       dense_shape=[4, 10])\n",
         "\n",
@@ -282,7 +282,7 @@
       },
       "outputs": [],
       "source": [
-        "st_c = tf.SparseTensor(indices=([0, 1], [1, 0], [1, 1]),\n",
+        "st_c = tf.sparse.SparseTensor(indices=([0, 1], [1, 0], [1, 1]),\n",
         "                       values=[13, 15, 17],\n",
         "                       dense_shape=(2,2))\n",
         "\n",
@@ -309,14 +309,14 @@
       },
       "outputs": [],
       "source": [
-        "sparse_pattern_A = tf.SparseTensor(indices = [[2,4], [3,3], [3,4], [4,3], [4,4], [5,4]],\n",
+        "sparse_pattern_A = tf.sparse.SparseTensor(indices = [[2,4], [3,3], [3,4], [4,3], [4,4], [5,4]],\n",
         "                         values = [1,1,1,1,1,1],\n",
         "                         dense_shape = [8,5])\n",
-        "sparse_pattern_B = tf.SparseTensor(indices = [[0,2], [1,1], [1,3], [2,0], [2,4], [2,5], [3,5], \n",
+        "sparse_pattern_B = tf.sparse.SparseTensor(indices = [[0,2], [1,1], [1,3], [2,0], [2,4], [2,5], [3,5], \n",
         "                                              [4,5], [5,0], [5,4], [5,5], [6,1], [6,3], [7,2]],\n",
         "                         values = [1,1,1,1,1,1,1,1,1,1,1,1,1,1],\n",
         "                         dense_shape = [8,6])\n",
-        "sparse_pattern_C = tf.SparseTensor(indices = [[3,0], [4,0]],\n",
+        "sparse_pattern_C = tf.sparse.SparseTensor(indices = [[3,0], [4,0]],\n",
         "                         values = [1,1],\n",
         "                         dense_shape = [8,6])\n",
         "\n",
@@ -381,7 +381,7 @@
       },
       "outputs": [],
       "source": [
-        "st2_plus_5 = tf.SparseTensor(\n",
+        "st2_plus_5 = tf.sparse.SparseTensor(\n",
         "    st2.indices,\n",
         "    st2.values + 5,\n",
         "    st2.dense_shape)\n",
@@ -394,7 +394,7 @@
         "id": "GFhO2ZZ53ga1"
       },
       "source": [
-        "## Using `tf.SparseTensor` with other TensorFlow APIs\n",
+        "## Using `tf.sparse.SparseTensor` with other TensorFlow APIs\n",
         "\n",
         "Sparse tensors work transparently with these TensorFlow APIs:\n",
         "\n",
@@ -449,7 +449,7 @@
         "y = tf.keras.layers.Dense(4)(x)\n",
         "model = tf.keras.Model(x, y)\n",
         "\n",
-        "sparse_data = tf.SparseTensor(\n",
+        "sparse_data = tf.sparse.SparseTensor(\n",
         "    indices = [(0,0),(0,1),(0,2),\n",
         "               (4,3),(5,0),(5,1)],\n",
         "    values = [1,1,1,1,1,1],\n",
@@ -569,9 +569,9 @@
         "\n",
         "`tf.train.Example` is a standard protobuf encoding for TensorFlow data. When using sparse tensors with `tf.train.Example`, you can:\n",
         "\n",
-        "* Read variable-length data into a `tf.SparseTensor` using `tf.io.VarLenFeature`. However, you should consider using `tf.io.RaggedFeature` instead.\n",
+        "* Read variable-length data into a `tf.sparse.SparseTensor` using `tf.io.VarLenFeature`. However, you should consider using `tf.io.RaggedFeature` instead.\n",
         "\n",
-        "* Read arbitrary sparse data into a `tf.SparseTensor` using `tf.io.SparseFeature`, which uses three separate feature keys to store the `indices`, `values`, and `dense_shape`."
+        "* Read arbitrary sparse data into a `tf.sparse.SparseTensor` using `tf.io.SparseFeature`, which uses three separate feature keys to store the `indices`, `values`, and `dense_shape`."
       ]
     },
     {
@@ -597,7 +597,7 @@
         "def f(x,y):\n",
         "  return tf.sparse.sparse_dense_matmul(x,y)\n",
         "\n",
-        "a = tf.SparseTensor(indices=[[0, 3], [2, 4]],\n",
+        "a = tf.sparse.SparseTensor(indices=[[0, 3], [2, 4]],\n",
         "                    values=[15, 25],\n",
         "                    dense_shape=[3, 10])\n",
         "\n",
@@ -616,11 +616,11 @@
       "source": [
         "## Distinguishing missing values from zero values\n",
         "\n",
-        "Most ops on `tf.SparseTensor`s treat missing values and explicit zero values identically. This is by design — a `tf.SparseTensor` is supposed to act just like a dense tensor.\n",
+        "Most ops on `tf.sparse.SparseTensor`s treat missing values and explicit zero values identically. This is by design — a `tf.sparse.SparseTensor` is supposed to act just like a dense tensor.\n",
         "\n",
         "However, there are a few cases where it can be useful to distinguish zero values from missing values. In particular, this allows for one way to encode missing/unknown data in your training data. For example, consider a use case where you have a tensor of scores (that can have any floating point value from -Inf to +Inf), with some missing scores. You can encode this tensor using a sparse tensor where the explicit zeros are known zero scores but the implicit zero values actually represent missing data and not zero. \n",
         "\n",
-        "Note: This is generally not the intended usage of `tf.SparseTensor`s; and you might want to also consier other techniques for encoding this such as for example using a separate mask tensor that identifies the locations of known/unknown values. However, exercise caution while using this approach, since most sparse operations will treat explicit and implicit zero values identically."
+        "Note: This is generally not the intended usage of `tf.sparse.SparseTensor`s; and you might want to also consider other techniques for encoding this such as for example using a separate mask tensor that identifies the locations of known/unknown values. However, exercise caution while using this approach, since most sparse operations will treat explicit and implicit zero values identically."
       ]
     },
     {
@@ -680,8 +680,7 @@
   "metadata": {
     "colab": {
       "collapsed_sections": [],
-      "name": "sparse_tensor_guide.ipynb",
-      "provenance": [],
+      "name": "sparse_tensor.ipynb",
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/guide/tensor.ipynb b/site/en/guide/tensor.ipynb
index 45dbd37fb20..2eb261aad75 100644
--- a/site/en/guide/tensor.ipynb
+++ b/site/en/guide/tensor.ipynb
@@ -80,7 +80,7 @@
         "id": "VQ3s2J8Vgowq"
       },
       "source": [
-        "Tensors are multi-dimensional arrays with a uniform type (called a `dtype`).  You can see all supported `dtypes` at `tf.dtypes.DType`.\n",
+        "Tensors are multi-dimensional arrays with a uniform type (called a `dtype`).  You can see all supported `dtypes` at `tf.dtypes`.\n",
         "\n",
         "If you're familiar with [NumPy](https://numpy.org/devdocs/user/quickstart.html), tensors are (kind of) like `np.arrays`.\n",
         "\n",
@@ -95,7 +95,7 @@
       "source": [
         "## Basics\n",
         "\n",
-        "Let's create some basic tensors."
+        "First, create some basic tensors."
       ]
     },
     {
@@ -326,7 +326,7 @@
         "a = tf.constant([[1, 2],\n",
         "                 [3, 4]])\n",
         "b = tf.constant([[1, 1],\n",
-        "                 [1, 1]]) # Could have also said `tf.ones([2,2])`\n",
+        "                 [1, 1]]) # Could have also said `tf.ones([2,2], dtype=tf.int32)`\n",
         "\n",
         "print(tf.add(a, b), \"\\n\")\n",
         "print(tf.multiply(a, b), \"\\n\")\n",
@@ -352,7 +352,7 @@
         "id": "S3_vIAl2JPVc"
       },
       "source": [
-        "Tensors are used in all kinds of operations (ops)."
+        "Tensors are used in all kinds of operations (or \"Ops\")."
       ]
     },
     {
@@ -368,11 +368,53 @@
         "# Find the largest value\n",
         "print(tf.reduce_max(c))\n",
         "# Find the index of the largest value\n",
-        "print(tf.argmax(c))\n",
+        "print(tf.math.argmax(c))\n",
         "# Compute the softmax\n",
         "print(tf.nn.softmax(c))"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0MNM-q7-MZLz"
+      },
+      "source": [
+        "Note: Typically, anywhere a TensorFlow function expects a `Tensor` as input, the function will also accept anything that can be converted to a `Tensor` using `tf.convert_to_tensor`. See below for an example."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "_wch0N8xNEt-"
+      },
+      "outputs": [],
+      "source": [
+        "tf.convert_to_tensor([1,2,3])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ngqIeWYeNJVI"
+      },
+      "outputs": [],
+      "source": [
+        "tf.reduce_max([1,2,3])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ThVMxqbVNOq3"
+      },
+      "outputs": [],
+      "source": [
+        "tf.reduce_max(np.array([1,2,3]))"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -393,7 +435,7 @@
         "* **Shape**: The length (number of elements) of each of the axes of a tensor.\n",
         "* **Rank**: Number of tensor axes.  A scalar has rank 0, a vector has rank 1, a matrix is rank 2.\n",
         "* **Axis** or **Dimension**: A particular dimension of a tensor.\n",
-        "* **Size**: The total number of items in the tensor, the product shape vector.\n"
+        "* **Size**: The total number of items in the tensor, the product of the shape vector's elements.\n"
       ]
     },
     {
@@ -461,6 +503,37 @@
         "print(\"Total number of elements (3*2*4*5): \", tf.size(rank_4_tensor).numpy())"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2ZGZp_JOOPOv"
+      },
+      "source": [
+        "But note that the `Tensor.ndim` and `Tensor.shape` attributes don't return `Tensor` objects. If you need a `Tensor` use the `tf.rank` or `tf.shape` function. This difference is subtle, but it can be important when building graphs (later)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Ptq0-y6APCpD"
+      },
+      "outputs": [],
+      "source": [
+        "tf.rank(rank_4_tensor)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "HslrDOEBPICN"
+      },
+      "outputs": [],
+      "source": [
+        "tf.shape(rank_4_tensor)"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -919,7 +992,7 @@
         "Except for [tf.RaggedTensor](#ragged_tensors), such shapes will only occur in the context of TensorFlow's symbolic, graph-building  APIs:\n",
         "\n",
         "* [tf.function](function.ipynb) \n",
-        "* The [keras functional API](keras/functional.ipynb).\n"
+        "* The [keras functional API](https://www.tensorflow.org/guide/keras/functional).\n"
       ]
     },
     {
@@ -962,7 +1035,7 @@
       "source": [
         "## Broadcasting\n",
         "\n",
-        "Broadcasting is a concept borrowed from the [equivalent feature in NumPy](https://numpy.org/doc/stable/user/basics.html).  In short, under certain conditions, smaller tensors are \"stretched\" automatically to fit larger tensors when running combined operations on them.\n",
+        "Broadcasting is a concept borrowed from the [equivalent feature in NumPy](https://numpy.org/doc/stable/user/basics.broadcasting.html).  In short, under certain conditions, smaller tensors are \"stretched\" automatically to fit larger tensors when running combined operations on them.\n",
         "\n",
         "The simplest and most common case is when you attempt to multiply or add a tensor to a scalar.  In that case, the scalar is broadcast to be the same shape as the other argument. "
       ]
@@ -1372,7 +1445,7 @@
         "id": "st9OxrUxWSKY"
       },
       "source": [
-        "And `tf.string.to_number`:"
+        "And `tf.strings.to_number`:"
       ]
     },
     {
@@ -1491,7 +1564,6 @@
         "Tce3stUlHN0L"
       ],
       "name": "tensor.ipynb",
-      "provenance": [],
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/guide/tensor_slicing.ipynb b/site/en/guide/tensor_slicing.ipynb
index 9f58a206de6..c5cb2d71356 100644
--- a/site/en/guide/tensor_slicing.ipynb
+++ b/site/en/guide/tensor_slicing.ipynb
@@ -635,7 +635,6 @@
     "colab": {
       "collapsed_sections": [],
       "name": "tensor_slicing.ipynb",
-      "provenance": [],
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/guide/tf_numpy.ipynb b/site/en/guide/tf_numpy.ipynb
index 9b469241a3b..3083acb147d 100644
--- a/site/en/guide/tf_numpy.ipynb
+++ b/site/en/guide/tf_numpy.ipynb
@@ -70,7 +70,7 @@
       "source": [
         "## Overview\n",
         "\n",
-        "TensorFlow implements a subset of the [NumPy API](https://numpy.org/doc/1.16), available as `tf.experimental.numpy`. This allows running NumPy code, accelerated by TensorFlow, while also allowing access to all of TensorFlow's APIs."
+        "TensorFlow implements a subset of the [NumPy API](https://numpy.org/doc/stable/index.html), available as `tf.experimental.numpy`. This allows running NumPy code, accelerated by TensorFlow, while also allowing access to all of TensorFlow's APIs."
       ]
     },
     {
@@ -142,7 +142,7 @@
         "\n",
         "An instance of `tf.experimental.numpy.ndarray`, called **ND Array**, represents a multidimensional dense array of a given `dtype` placed on a certain device. It is an alias to `tf.Tensor`. Check out the ND array class for useful methods like `ndarray.T`, `ndarray.reshape`, `ndarray.ravel` and others.\n",
         "\n",
-        "First create an ND array object, and then invoke different methods. "
+        "First create an ND array object, and then invoke different methods."
       ]
     },
     {
@@ -170,11 +170,28 @@
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "Mub8-dvJMUr4"
+        "id": "-BOY8CGRKEhE"
       },
       "source": [
         "### Type promotion\n",
         "\n",
+        "There are 4 options for type promotion in TensorFlow.\n",
+        "\n",
+        "- By default, TensorFlow raises errors instead of promoting types for mixed type operations.\n",
+        "- Running `tf.numpy.experimental_enable_numpy_behavior()` switches TensorFlow to use `NumPy` type promotion rules (described below).\n",
+        "- After TensorFlow 2.15, there are two new options (refer to [TF NumPy Type Promotion](tf_numpy_type_promotion.ipynb) for details):\n",
+        "  - `tf.numpy.experimental_enable_numpy_behavior(dtype_conversion_mode=\"all\")` uses Jax type promotion rules.\n",
+        "  - `tf.numpy.experimental_enable_numpy_behavior(dtype_conversion_mode=\"safe\")` uses Jax type promotion rules, but disallows certain unsafe promotions."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "SXskSHrX5J45"
+      },
+      "source": [
+        "#### NumPy Type Promotion\n",
+        "\n",
         "TensorFlow NumPy APIs have well-defined semantics for converting literals to ND array, as well as for performing type promotion on ND array inputs. Please see [`np.result_type`](https://numpy.org/doc/1.16/reference/generated/numpy.result_type.html) for more details."
       ]
     },
@@ -200,7 +217,7 @@
         "          (tnp.int32, tnp.int64, tnp.float32, tnp.float64)]\n",
         "for i, v1 in enumerate(values):\n",
         "  for v2 in values[i + 1:]:\n",
-        "    print(\"%s + %s => %s\" % \n",
+        "    print(\"%s + %s => %s\" %\n",
         "          (v1.dtype.name, v2.dtype.name, (v1 + v2).dtype.name))"
       ]
     },
@@ -932,8 +949,8 @@
   "metadata": {
     "accelerator": "GPU",
     "colab": {
-      "collapsed_sections": [],
       "name": "tf_numpy.ipynb",
+      "provenance": [],
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/guide/tf_numpy_type_promotion.ipynb b/site/en/guide/tf_numpy_type_promotion.ipynb
new file mode 100644
index 00000000000..f984310822a
--- /dev/null
+++ b/site/en/guide/tf_numpy_type_promotion.ipynb
@@ -0,0 +1,1138 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ZjN_IJ8mhJ-4"
+      },
+      "source": [
+        "##### Copyright 2023 The TensorFlow Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "sY3Ffd83hK3b"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "03Pw58e6mTHI"
+      },
+      "source": [
+        "# TF-NumPy Type Promotion"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "l9nPKvxK-_pM"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/guide/tf_numpy_type_promotion\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/guide/tf_numpy_type_promotion.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/guide/tf_numpy_type_promotion.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/guide/tf_numpy_type_promotion.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "uma-W5v__DYh"
+      },
+      "source": [
+        "## Overview\n",
+        "\n",
+        "There are 4 options for type promotion in TensorFlow.\n",
+        "\n",
+        "- By default, TensorFlow raises errors instead of promoting types for mixed type operations.\n",
+        "- Running `tf.numpy.experimental_enable_numpy_behavior()` switches TensorFlow to use [NumPy type promotion rules](https://www.tensorflow.org/guide/tf_numpy#type_promotion).\n",
+        "- **This doc** describes two new options that will be available in TensorFlow 2.15 (or currently in `tf-nightly`):"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "vMvEKDFOsau7"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install -q tf_nightly"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "a6hOFBfPsd3y"
+      },
+      "source": [
+        " **Note**: `experimental_enable_numpy_behavior` changes the behavior of all of TensorFlow."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ob1HNwUmYR5b"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "AJR558zjAZQu"
+      },
+      "outputs": [],
+      "source": [
+        "import numpy as np\n",
+        "import tensorflow as tf\n",
+        "import tensorflow.experimental.numpy as tnp\n",
+        "\n",
+        "print(\"Using TensorFlow version %s\" % tf.__version__)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "M6tacoy0DU6e"
+      },
+      "source": [
+        "### Enabling the new type promotion\n",
+        "\n",
+        "In order to use the [JAX-like type promotion](https://jax.readthedocs.io/en/latest/type_promotion.html) in TF-Numpy, specify either `'all'` or `'safe'` as the dtype conversion mode when enabling NumPy behavior for TensorFlow.\n",
+        "\n",
+        "This new system (with `dtype_conversion_mode=\"all\"`) is associative, commutative, and makes it easy to control what width of float you end up with (it doesn't automatically convert to wider floats). It does introduce some risks of overflows and precision loss, but `dtype_conversion_mode=\"safe\"` forces you to handle those cases explicitly. The two modes are explained more in detail in the [next section](#two_modes)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "TfCyofpFDQxm"
+      },
+      "outputs": [],
+      "source": [
+        "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"all\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "sEMXK8-ZWMun"
+      },
+      "source": [
+        "<a name=\"two_modes\">\n",
+        "\n",
+        "## Two Modes : ALL mode vs SAFE mode\n",
+        "\n",
+        "In the new type promotion system, we introduce two modes: `ALL` mode and `SAFE` mode. `SAFE` mode is used to mitigate the concerns of \"risky\" promotions that can result in precision loss or bit-widening."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-ULvTWj_KnHU"
+      },
+      "source": [
+        "### Dtypes\n",
+        "\n",
+        "We will be using the following abbreviations for brevity.\n",
+        "\n",
+        "*   `b` means `tf.bool`\n",
+        "*   `u8` means `tf.uint8`\n",
+        "*   `i16` means `tf.int16`\n",
+        "*   `i32` means `tf.int32`\n",
+        "*   `bf16` means `tf.bfloat16`\n",
+        "*   `f32` means `tf.float32`\n",
+        "*   `f64` means `tf.float64`\n",
+        "*   `i32*` means Python `int` or weakly-typed `i32`\n",
+        "*   `f32*` means Python `float` or weakly-typed `f32`\n",
+        "*   `c128*` means Python `complex` or weakly-typed `c128`\n",
+        "\n",
+        "The asterisk (*) denotes that the corresponding type is “weak” - such a dtype is temporarily inferred by the system, and could defer to other dtypes. This concept is explained more in detail [here](#weak_tensor)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hXZxLCkuzzq3"
+      },
+      "source": [
+        "### Example of precision losing operations\n",
+        "\n",
+        "In the following example, `i32` + `f32` is allowed in `ALL` mode but\n",
+        "not in `SAFE` mode due to the risk of precision loss."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Y-yeIvstWStL"
+      },
+      "outputs": [],
+      "source": [
+        "# i32 + f32 returns a f32 result in ALL mode.\n",
+        "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"all\")\n",
+        "a = tf.constant(10, dtype = tf.int32)\n",
+        "b = tf.constant(5.0, dtype = tf.float32)\n",
+        "a + b  # <tf.Tensor: shape=(), dtype=float32, numpy=15.0>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "JNNmZow2WY3G"
+      },
+      "outputs": [],
+      "source": [
+        "# This promotion is not allowed in SAFE mode.\n",
+        "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"safe\")\n",
+        "a = tf.constant(10, dtype = tf.int32)\n",
+        "b = tf.constant(5.0, dtype = tf.float32)\n",
+        "try:\n",
+        "  a + b\n",
+        "except TypeError as e:\n",
+        "   print(f'{type(e)}: {e}')  # TypeError: explicitly specify the dtype or switch to ALL mode."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "f0x4Qhff0AKS"
+      },
+      "source": [
+        "### Example of bit-widening operations\n",
+        "\n",
+        "In the following example, `i8` + `u32` is allowed in `ALL` mode but\n",
+        "not in `SAFE` mode due to bit-widening, which means using more bits than the number of bits in the inputs. Note that the new type promotion semantics only allows necessary bit-widening."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Etbv-WoWzUXf"
+      },
+      "outputs": [],
+      "source": [
+        "# i8 + u32 returns an i64 result in ALL mode.\n",
+        "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"all\")\n",
+        "a = tf.constant(10, dtype = tf.int8)\n",
+        "b = tf.constant(5, dtype = tf.uint32)\n",
+        "a + b"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "yKRdvtvw0Lvt"
+      },
+      "outputs": [],
+      "source": [
+        "# This promotion is not allowed in SAFE mode.\n",
+        "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"safe\")\n",
+        "a = tf.constant(10, dtype = tf.int8)\n",
+        "b = tf.constant(5, dtype = tf.uint32)\n",
+        "try:\n",
+        "  a + b\n",
+        "except TypeError as e:\n",
+        "   print(f'{type(e)}: {e}')  # TypeError: explicitly specify the dtype or switch to ALL mode."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yh2BwqUzH3C3"
+      },
+      "source": [
+        "## A System Based on a Lattice"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "HHUnfTPiYVN5"
+      },
+      "source": [
+        "### Type Promotion Lattice\n",
+        "\n",
+        "The new type promotion behavior is determined via the following type promotion lattice:\n",
+        "\n",
+        "![Type Promotion Lattice](https://tensorflow.org/guide/images/new_type_promotion/type_promotion_lattice.png)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "QykluwRyDDle"
+      },
+      "source": [
+        "More specifically, promotion between any two types is determined by finding the first common child of the two nodes (including the nodes themselves).\n",
+        "\n",
+        "For example, in the diagram above, the first common child of `i8` and `i32` is `i32` because the two nodes intersect for the first time at `i32` when following the direction of the arrows.\n",
+        "\n",
+        "Similarly as another example, the result promotion type between `u64` and `f16` would be `f16`."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nthziRHaDAUY"
+      },
+      "source": [
+        "<a name=\"promotion_table\">\n",
+        "\n",
+        "### Type Promotion Table\n",
+        "\n",
+        "Following the lattice generates the binary promotion table below:\n",
+        "\n",
+        "**Note**: `SAFE` mode disallows the highlighted cells. `ALL` mode allows all cases.\n",
+        "\n",
+        "![Type Promotion Table](https://tensorflow.org/guide/images/new_type_promotion/type_promotion_table.png)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "TPDt5QTkucSC"
+      },
+      "source": [
+        "## Advantages of The New Type Promotion\n",
+        "\n",
+        "We adopt a JAX-like lattice-based system for our new type promotion, which offers the following advantages:"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "NUS_b13nue1p"
+      },
+      "source": [
+        "<a name=\"lattice_system_design\">\n",
+        "\n",
+        "#### Advantages of Lattice-Based System\n",
+        "\n",
+        "First, using a lattice-based system ensures three very important properties:\n",
+        "\n",
+        "*   Existence: There is a unique result promotion type for any combinations of types.\n",
+        "*   Commutativity: `a + b = b + a`\n",
+        "*   Associativity: `a + (b + c) = (a + b) = c`\n",
+        "\n",
+        "These three properties are critical for constructing a type promotion semantics that is consistent and predictable."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Sz88hRR6uhls"
+      },
+      "source": [
+        "#### Advantages of JAX-like Lattice System\n",
+        "\n",
+        "Another crucial advantage of the JAX-like lattice system is that outside unsigned ints, it avoids all wider-than-necessary promotions. This means you cannot get 64-bit results without 64-bit inputs. This is especially beneficial for working on accelerators as it avoids unnecessary 64-bit values, which was frequent in the old type promotion."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rlylb7ieOVbJ"
+      },
+      "source": [
+        "However, this comes with a trade-off: mixed float/integer promotion is very prone to precision loss. For instance, in the example below, `i64` + `f16` results in promoting `i64` to `f16`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "abqIkV02OXEF"
+      },
+      "outputs": [],
+      "source": [
+        "# The first input is promoted to f16 in ALL mode.\n",
+        "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"all\")\n",
+        "tf.constant(1, tf.int64) + tf.constant(3.2, tf.float16)  # <tf.Tensor: shape=(), dtype=float16, numpy=4.2>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mYnh1gZdObfI"
+      },
+      "source": [
+        "To migitage this concern, we introduced a `SAFE` mode that will disallow these \"risky\" promotions.\n",
+        "\n",
+        "**Note**: To learn more about the design considerations in constructing the lattice system, please refer to the [Design of Type Promotion Semantics for JAX](https://jax.readthedocs.io/en/latest/jep/9407-type-promotion.html)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gAc7LFV0S2dP"
+      },
+      "source": [
+        "<a name=\"weak_tensor\">\n",
+        "\n",
+        "## WeakTensor"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "olQ2gsFlS9BH"
+      },
+      "source": [
+        "### Overview\n",
+        "\n",
+        "*Weak tensors* are Tensors that are \"weakly typed\", similar to a [concept in JAX](https://jax.readthedocs.io/en/latest/type_promotion.html#weakly-typed-values-in-jax).\n",
+        "\n",
+        "`WeakTensor`'s dtype is temporarily inferred by the system, and could defer to other dtypes. This concept is introduced in the new type promotion to prevent unwanted type promotion within binary operations between TF values and values with no explicitly user-specified type, such as Python scalar literals."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MYmoFIqZTFtw"
+      },
+      "source": [
+        "For instance, in the example below, `tf.constant(1.2)` is considered \"weak\" because it doesn't have a specific dtype. Therefore, `tf.constant(1.2)` defers to the type of `tf.constant(3.1, tf.float16)`, resulting in a `f16` output."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "eSBv_mzyTE97"
+      },
+      "outputs": [],
+      "source": [
+        "tf.constant(1.2) + tf.constant(3.1, tf.float16)  # <tf.Tensor: shape=(), dtype=float16, numpy=4.3>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KxuqBIFuTm5Z"
+      },
+      "source": [
+        "### WeakTensor Construction\n",
+        "\n",
+        "WeakTensors are created if you create a tensor without specifying a dtype the result is a WeakTensor. You can check whether a Tensor is \"weak\" or not by checking the weak attribute at the end of the Tensor's string representation."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "7UmunnJ8True3"
+      },
+      "source": [
+        "**First Case**: When `tf.constant` is called with an input with no user-specified dtype."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "fLEtMluNTsI5"
+      },
+      "outputs": [],
+      "source": [
+        "tf.constant(5)  # <tf.Tensor: shape=(), dtype=int32, numpy=5, weak=True>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ZQX6MBWHTt__"
+      },
+      "outputs": [],
+      "source": [
+        "tf.constant([5.0, 10.0, 3])  # <tf.Tensor: shape=(3,), dtype=float32, numpy=array([ 5., 10.,  3.], dtype=float32), weak=True>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ftsKSC5BTweP"
+      },
+      "outputs": [],
+      "source": [
+        "# A normal Tensor is created when dtype arg is specified.\n",
+        "tf.constant(5, tf.int32)  # <tf.Tensor: shape=(), dtype=int32, numpy=5>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "RqhoRy5iTyag"
+      },
+      "source": [
+        "**Second Case**: When an input with no user-specified dtype is passed into a [WeakTensor-supporting API](#weak_tensor_apis)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "DuwpgoQJTzE-"
+      },
+      "outputs": [],
+      "source": [
+        "tf.math.abs([100.0, 4.0])  # <tf.Tensor: shape=(2,), dtype=float32, numpy=array([100., 4.], dtype=float32), weak=True>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "UTcoR1xvR39k"
+      },
+      "source": [
+        "##Effects of turning on the new type promotion\n",
+        "\n",
+        "Below is a non-exhaustive list of changes that result from turning on the new type promotion.\n",
+        "\n",
+        "*   More consistent and predictable promotion results.\n",
+        "*   Reduced risk of bit-widening.\n",
+        "*   `tf.Tensor` mathematical dunder methods use new type promotion.\n",
+        "*   `tf.constant` can return `WeakTensor`.\n",
+        "*   `tf.constant` allows implicit conversions when a Tensor input with a dtype different from the `dtype` arg is passed in.\n",
+        "*   `tf.Variable` in-place ops (`assign`, `assign-add`, `assign-sub`) allow implicit conversions.\n",
+        "*   `tnp.array(1)` and `tnp.array(1.0)` returns 32-bit WeakTensor.\n",
+        "*   `WeakTensor`s will be created and used for [WeakTensor-supporting unary and binary API](#weak_tensor_apis)'s.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KyvonwYcsFX2"
+      },
+      "source": [
+        "### More consistent and predictable promotion results\n",
+        "\n",
+        "Using a [lattice-based system](#lattice_system_design) allows the new type promotion to produce consistent and predictable type promotion results."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "q0Z1njfb7lRa"
+      },
+      "source": [
+        "#### Old Type Promotion\n",
+        "\n",
+        "Changing the order of operations produces inconsistent results using old type promotion."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "M1Ca9v4m7z8e"
+      },
+      "outputs": [],
+      "source": [
+        "# Setup\n",
+        "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"legacy\")\n",
+        "a = np.array(1, dtype=np.int8)\n",
+        "b = tf.constant(1)\n",
+        "c = np.array(1, dtype=np.float16)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "WwhTzJ-a4rTc"
+      },
+      "outputs": [],
+      "source": [
+        "# (a + b) + c throws an InvalidArgumentError.\n",
+        "try:\n",
+        "  tf.add(tf.add(a, b), c)\n",
+        "except tf.errors.InvalidArgumentError as e:\n",
+        "  print(f'{type(e)}: {e}')  # InvalidArgumentError"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "d3qDgVYn7ezT"
+      },
+      "outputs": [],
+      "source": [
+        "# (b + a) + c returns an i32 result.\n",
+        "tf.add(tf.add(b, a), c)  # <tf.Tensor: shape=(), dtype=int32, numpy=3>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YMH1skEs7oI5"
+      },
+      "source": [
+        "#### New Type Promotion\n",
+        "\n",
+        "New type promotion produces consistent results regardless of the order."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "BOHyJJ8z8uCN"
+      },
+      "outputs": [],
+      "source": [
+        "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"all\")\n",
+        "a = np.array(1, dtype=np.int8)\n",
+        "b = tf.constant(1)\n",
+        "c = np.array(1, dtype=np.float16)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ZUKU70jf7E1l"
+      },
+      "outputs": [],
+      "source": [
+        "# (a + b) + c returns a f16 result.\n",
+        "tf.add(tf.add(a, b), c)  # <tf.Tensor: shape=(), dtype=float16, numpy=3.0>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "YOEycjFx7qDn"
+      },
+      "outputs": [],
+      "source": [
+        "# (b + a) + c also returns a f16 result.\n",
+        "tf.add(tf.add(b, a), c)  # <tf.Tensor: shape=(), dtype=float16, numpy=3.0>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "FpGMkm6aJsn6"
+      },
+      "source": [
+        "### Reduced risk of bit-widening"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JxV2AL-U9Grg"
+      },
+      "source": [
+        "#### Old Type Promotion\n",
+        "\n",
+        "Old type promotion often resulted in 64-bit results."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "7L1pxyvn9MlP"
+      },
+      "outputs": [],
+      "source": [
+        "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"legacy\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "zMJVFdWf4XHp"
+      },
+      "outputs": [],
+      "source": [
+        "np.array(3.2, np.float16) + tf.constant(1, tf.int8) + tf.constant(50)  # <tf.Tensor: shape=(), dtype=float64, numpy=54.19921875>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fBhUH_wD9Is7"
+      },
+      "source": [
+        "#### New Type Promotion\n",
+        "\n",
+        "New type promotion returns results with minimal number of bits necessary."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "aJsj2ZyI9T9Y"
+      },
+      "outputs": [],
+      "source": [
+        "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"all\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "jj0N_Plp4X9l"
+      },
+      "outputs": [],
+      "source": [
+        "np.array(3.2, np.float16) + tf.constant(1, tf.int8) + tf.constant(50)  # <tf.Tensor: shape=(), dtype=float16, numpy=54.2>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yKUx7xe-KZ5O"
+      },
+      "source": [
+        "### tf.Tensor mathematical dunder methods\n",
+        "\n",
+        "All `tf.Tensor` mathematical dunder methods will follow the new type promotion."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2c3icBUX4wNl"
+      },
+      "outputs": [],
+      "source": [
+        "-tf.constant(5)  # <tf.Tensor: shape=(), dtype=int32, numpy=-5, weak=True>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ydJHQjid45s7"
+      },
+      "outputs": [],
+      "source": [
+        "tf.constant(5, tf.int16) - tf.constant(1, tf.float32)  # <tf.Tensor: shape=(), dtype=float32, numpy=4.0>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "pLbIjIvbKqcU"
+      },
+      "source": [
+        "### tf.Variable in-place ops\n",
+        "\n",
+        "Implicit conversions will be allowed in `tf.Variable` in-place ops.\n",
+        "\n",
+        "**Note**: Any promotion that results in a dtype that is different from the variable's original dtype will be not allowed. This is because `tf.Variable` cannot change its dtype."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "QsXhyK1h-i5S"
+      },
+      "outputs": [],
+      "source": [
+        "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"all\")\n",
+        "a = tf.Variable(10, tf.int32)\n",
+        "a.assign_add(tf.constant(5, tf.int16))  # <tf.Variable shape=() dtype=int32, numpy=15>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PiA4H-otLDit"
+      },
+      "source": [
+        "### tf.constant implicit conversions\n",
+        "\n",
+        "In the old type promotion, `tf.constant` required an input Tensor to have the same dtype as the dtype argument. However, in the new type promotion, we implicitly convert Tensor to the specified dtype."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ArrQ9Dj0_OR8"
+      },
+      "outputs": [],
+      "source": [
+        "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"all\")\n",
+        "a = tf.constant(10, tf.int16)\n",
+        "tf.constant(a, tf.float32)  # <tf.Tensor: shape=(), dtype=float32, numpy=10.0>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WAcK_-XnLWaP"
+      },
+      "source": [
+        "### TF-NumPy Array\n",
+        "\n",
+        "`tnp.array` defaults to `i32*` and `f32*` for python inputs using the new type promotion."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "K1pZnYNh_ahm"
+      },
+      "outputs": [],
+      "source": [
+        "tnp.array(1)  # <tf.Tensor: shape=(), dtype=int32, numpy=1, weak=True>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "QoQl2PYP_fMT"
+      },
+      "outputs": [],
+      "source": [
+        "tnp.array(1.0)  # <tf.Tensor: shape=(), dtype=int32, numpy=1, weak=True>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wK5DpQ3Pz3k5"
+      },
+      "source": [
+        "##Input Type Inference\n",
+        "\n",
+        "This is how different inputs' types are inferred in the new type promotion.\n",
+        "\n",
+        "\n",
+        "*   `tf.Tensor`: Since `tf.Tensor` has a dtype property, we don't do further inference.\n",
+        "*   NumPy types: This includes types like `np.array(1)`, `np.int16(1)`, and `np.float`. Since NumPy inputs also have a dtype property, we take the dtype property as the result inference type. Note that NumPy defaults to `i64` and `f64`.\n",
+        "*   Python scalars/Nested types: This includes types like `1`, `[1, 2, 3]`, and `(1.0, 2.0)`.\n",
+        "   *   Python `int` is inferred as `i32*`.\n",
+        "   *   Python `float` is inferred as `f32*`.\n",
+        "   *   Python `complex` is inferred as `c128*`.\n",
+        "*  If the input doesn't fall into any of the above categories but has a dtype property, we take the dtype property as the result inference type."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "g_SPfalfSPgg"
+      },
+      "source": [
+        "# Further Reading\n",
+        "\n",
+        "The new type promotion closely resembles JAX-NumPy's type promotion. If you want to know more details about the new type promotion and the design choices, check out the resources below.\n",
+        "\n",
+        "*  [JAX Type Promotion Semantics](https://jax.readthedocs.io/en/latest/type_promotion.html)\n",
+        "*  [Design of Type Promotion Semantics for JAX](https://jax.readthedocs.io/en/latest/jep/9407-type-promotion.html)\n",
+        "*  [Old TF-NumPy Promotion Semantics](https://www.tensorflow.org/guide/tf_numpy#type_promotion)\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Qg5xBbImT31S"
+      },
+      "source": [
+        "# References"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gjB0CVhVXBfW"
+      },
+      "source": [
+        "<a name=\"weak_tensor_apis\">\n",
+        "\n",
+        "## WeakTensor-supporting APIs"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_GVbqlN9aBS2"
+      },
+      "source": [
+        "Below is a list of APIs that supports `WeakTensor`.\n",
+        "\n",
+        "For an unary op, this means that if an input with no user-specified type is passed in, it will return a `WeakTensor`.\n",
+        "\n",
+        "For a binary op, it will follow the promotion table [here](#promotion_table). It may or may not return a `WeakTensor` depending on the promotion result of the two inputs.\n",
+        "\n",
+        "**Note**: All mathematical operations (`+`, `-`, `*`, ...) are supported."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Gi-G68Z8WN2P"
+      },
+      "source": [
+        "* `tf.bitwise.invert`\n",
+        "* `tf.clip_by_value`\n",
+        "* `tf.debugging.check_numerics`\n",
+        "* `tf.expand_dims`\n",
+        "* `tf.identity`\n",
+        "* `tf.image.adjust_brightness`\n",
+        "* `tf.image.adjust_gamma`\n",
+        "* `tf.image.extract_patches`\n",
+        "* `tf.image.random_brightness`\n",
+        "* `tf.image.stateless_random_brightness`\n",
+        "* `tf.linalg.diag`\n",
+        "* `tf.linalg.diag_part`\n",
+        "* `tf.linalg.matmul`\n",
+        "* `tf.linalg.matrix_transpose`\n",
+        "* `tf.linalg.tensor_diag_part`\n",
+        "* `tf.linalg.trace`\n",
+        "* `tf.math.abs`\n",
+        "* `tf.math.acos`\n",
+        "* `tf.math.acosh`\n",
+        "* `tf.math.add`\n",
+        "* `tf.math.angle`\n",
+        "* `tf.math.asin`\n",
+        "* `tf.math.asinh`\n",
+        "* `tf.math.atan`\n",
+        "* `tf.math.atanh`\n",
+        "* `tf.math.ceil`\n",
+        "* `tf.math.conj`\n",
+        "* `tf.math.cos`\n",
+        "* `tf.math.cosh`\n",
+        "* `tf.math.digamma`\n",
+        "* `tf.math.divide_no_nan`\n",
+        "* `tf.math.divide`\n",
+        "* `tf.math.erf`\n",
+        "* `tf.math.erfc`\n",
+        "* `tf.math.erfcinv`\n",
+        "* `tf.math.erfinv`\n",
+        "* `tf.math.exp`\n",
+        "* `tf.math.expm1`\n",
+        "* `tf.math.floor`\n",
+        "* `tf.math.floordiv`\n",
+        "* `tf.math.floormod`\n",
+        "* `tf.math.imag`\n",
+        "* `tf.math.lgamma`\n",
+        "* `tf.math.log1p`\n",
+        "* `tf.math.log_sigmoid`\n",
+        "* `tf.math.log`\n",
+        "* `tf.math.multiply_no_nan`\n",
+        "* `tf.math.multiply`\n",
+        "* `tf.math.ndtri`\n",
+        "* `tf.math.negative`\n",
+        "* `tf.math.pow`\n",
+        "* `tf.math.real`\n",
+        "* `tf.math.real`\n",
+        "* `tf.math.reciprocal_no_nan`\n",
+        "* `tf.math.reciprocal`\n",
+        "* `tf.math.reduce_euclidean_norm`\n",
+        "* `tf.math.reduce_logsumexp`\n",
+        "* `tf.math.reduce_max`\n",
+        "* `tf.math.reduce_mean`\n",
+        "* `tf.math.reduce_min`\n",
+        "* `tf.math.reduce_prod`\n",
+        "* `tf.math.reduce_std`\n",
+        "* `tf.math.reduce_sum`\n",
+        "* `tf.math.reduce_variance`\n",
+        "* `tf.math.rint`\n",
+        "* `tf.math.round`\n",
+        "* `tf.math.rsqrt`\n",
+        "* `tf.math.scalar_mul`\n",
+        "* `tf.math.sigmoid`\n",
+        "* `tf.math.sign`\n",
+        "* `tf.math.sin`\n",
+        "* `tf.math.sinh`\n",
+        "* `tf.math.softplus`\n",
+        "* `tf.math.special.bessel_i0`\n",
+        "* `tf.math.special.bessel_i0e`\n",
+        "* `tf.math.special.bessel_i1`\n",
+        "* `tf.math.special.bessel_i1e`\n",
+        "* `tf.math.special.bessel_j0`\n",
+        "* `tf.math.special.bessel_j1`\n",
+        "* `tf.math.special.bessel_k0`\n",
+        "* `tf.math.special.bessel_k0e`\n",
+        "* `tf.math.special.bessel_k1`\n",
+        "* `tf.math.special.bessel_k1e`\n",
+        "* `tf.math.special.bessel_y0`\n",
+        "* `tf.math.special.bessel_y1`\n",
+        "* `tf.math.special.dawsn`\n",
+        "* `tf.math.special.expint`\n",
+        "* `tf.math.special.fresnel_cos`\n",
+        "* `tf.math.special.fresnel_sin`\n",
+        "* `tf.math.special.spence`\n",
+        "* `tf.math.sqrt`\n",
+        "* `tf.math.square`\n",
+        "* `tf.math.subtract`\n",
+        "* `tf.math.tan`\n",
+        "* `tf.math.tanh`\n",
+        "* `tf.nn.depth_to_space`\n",
+        "* `tf.nn.elu`\n",
+        "* `tf.nn.gelu`\n",
+        "* `tf.nn.leaky_relu`\n",
+        "* `tf.nn.log_softmax`\n",
+        "* `tf.nn.relu6`\n",
+        "* `tf.nn.relu`\n",
+        "* `tf.nn.selu`\n",
+        "* `tf.nn.softsign`\n",
+        "* `tf.nn.space_to_depth`\n",
+        "* `tf.nn.swish`\n",
+        "* `tf.ones_like`\n",
+        "* `tf.realdiv`\n",
+        "* `tf.reshape`\n",
+        "* `tf.squeeze`\n",
+        "* `tf.stop_gradient`\n",
+        "* `tf.transpose`\n",
+        "* `tf.truncatediv`\n",
+        "* `tf.truncatemod`\n",
+        "* `tf.zeros_like`\n",
+        "* `tf.experimental.numpy.abs`\n",
+        "* `tf.experimental.numpy.absolute`\n",
+        "* `tf.experimental.numpy.amax`\n",
+        "* `tf.experimental.numpy.amin`\n",
+        "* `tf.experimental.numpy.angle`\n",
+        "* `tf.experimental.numpy.arange`\n",
+        "* `tf.experimental.numpy.arccos`\n",
+        "* `tf.experimental.numpy.arccosh`\n",
+        "* `tf.experimental.numpy.arcsin`\n",
+        "* `tf.experimental.numpy.arcsinh`\n",
+        "* `tf.experimental.numpy.arctan`\n",
+        "* `tf.experimental.numpy.arctanh`\n",
+        "* `tf.experimental.numpy.around`\n",
+        "* `tf.experimental.numpy.array`\n",
+        "* `tf.experimental.numpy.asanyarray`\n",
+        "* `tf.experimental.numpy.asarray`\n",
+        "* `tf.experimental.numpy.ascontiguousarray`\n",
+        "* `tf.experimental.numpy.average`\n",
+        "* `tf.experimental.numpy.bitwise_not`\n",
+        "* `tf.experimental.numpy.cbrt`\n",
+        "* `tf.experimental.numpy.ceil`\n",
+        "* `tf.experimental.numpy.conj`\n",
+        "* `tf.experimental.numpy.conjugate`\n",
+        "* `tf.experimental.numpy.copy`\n",
+        "* `tf.experimental.numpy.cos`\n",
+        "* `tf.experimental.numpy.cosh`\n",
+        "* `tf.experimental.numpy.cumprod`\n",
+        "* `tf.experimental.numpy.cumsum`\n",
+        "* `tf.experimental.numpy.deg2rad`\n",
+        "* `tf.experimental.numpy.diag`\n",
+        "* `tf.experimental.numpy.diagflat`\n",
+        "* `tf.experimental.numpy.diagonal`\n",
+        "* `tf.experimental.numpy.diff`\n",
+        "* `tf.experimental.numpy.empty_like`\n",
+        "* `tf.experimental.numpy.exp2`\n",
+        "* `tf.experimental.numpy.exp`\n",
+        "* `tf.experimental.numpy.expand_dims`\n",
+        "* `tf.experimental.numpy.expm1`\n",
+        "* `tf.experimental.numpy.fabs`\n",
+        "* `tf.experimental.numpy.fix`\n",
+        "* `tf.experimental.numpy.flatten`\n",
+        "* `tf.experimental.numpy.flip`\n",
+        "* `tf.experimental.numpy.fliplr`\n",
+        "* `tf.experimental.numpy.flipud`\n",
+        "* `tf.experimental.numpy.floor`\n",
+        "* `tf.experimental.numpy.full_like`\n",
+        "* `tf.experimental.numpy.imag`\n",
+        "* `tf.experimental.numpy.log10`\n",
+        "* `tf.experimental.numpy.log1p`\n",
+        "* `tf.experimental.numpy.log2`\n",
+        "* `tf.experimental.numpy.log`\n",
+        "* `tf.experimental.numpy.max`\n",
+        "* `tf.experimental.numpy.mean`\n",
+        "* `tf.experimental.numpy.min`\n",
+        "* `tf.experimental.numpy.moveaxis`\n",
+        "* `tf.experimental.numpy.nanmean`\n",
+        "* `tf.experimental.numpy.negative`\n",
+        "* `tf.experimental.numpy.ones_like`\n",
+        "* `tf.experimental.numpy.positive`\n",
+        "* `tf.experimental.numpy.prod`\n",
+        "* `tf.experimental.numpy.rad2deg`\n",
+        "* `tf.experimental.numpy.ravel`\n",
+        "* `tf.experimental.numpy.real`\n",
+        "* `tf.experimental.numpy.reciprocal`\n",
+        "* `tf.experimental.numpy.repeat`\n",
+        "* `tf.experimental.numpy.reshape`\n",
+        "* `tf.experimental.numpy.rot90`\n",
+        "* `tf.experimental.numpy.round`\n",
+        "* `tf.experimental.numpy.signbit`\n",
+        "* `tf.experimental.numpy.sin`\n",
+        "* `tf.experimental.numpy.sinc`\n",
+        "* `tf.experimental.numpy.sinh`\n",
+        "* `tf.experimental.numpy.sort`\n",
+        "* `tf.experimental.numpy.sqrt`\n",
+        "* `tf.experimental.numpy.square`\n",
+        "* `tf.experimental.numpy.squeeze`\n",
+        "* `tf.experimental.numpy.std`\n",
+        "* `tf.experimental.numpy.sum`\n",
+        "* `tf.experimental.numpy.swapaxes`\n",
+        "* `tf.experimental.numpy.tan`\n",
+        "* `tf.experimental.numpy.tanh`\n",
+        "* `tf.experimental.numpy.trace`\n",
+        "* `tf.experimental.numpy.transpose`\n",
+        "* `tf.experimental.numpy.triu`\n",
+        "* `tf.experimental.numpy.vander`\n",
+        "* `tf.experimental.numpy.var`\n",
+        "* `tf.experimental.numpy.zeros_like`"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "name": "tf_numpy_type_promotion.ipynb",
+      "provenance": [],
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/guide/tpu.ipynb b/site/en/guide/tpu.ipynb
index f64450ba04c..49eee544bec 100644
--- a/site/en/guide/tpu.ipynb
+++ b/site/en/guide/tpu.ipynb
@@ -6,7 +6,7 @@
         "id": "Tce3stUlHN0L"
       },
       "source": [
-        "##### Copyright 2018 The TensorFlow Authors.\n"
+        "##### Copyright 2024 The TensorFlow Authors.\n"
       ]
     },
     {
@@ -61,7 +61,9 @@
         "id": "Ys81cOhXOWUP"
       },
       "source": [
-        "Before you run this Colab notebook, make sure that your hardware accelerator is a TPU by checking your notebook settings: **Runtime** > **Change runtime type** > **Hardware accelerator** > **TPU**."
+        "This guide demonstrates how to perform basic training on [Tensor Processing Units (TPUs)](https://cloud.google.com/tpu/) and TPU Pods, a collection of TPU devices connected by dedicated high-speed network interfaces, with `tf.keras` and custom training loops.\n",
+        "\n",
+        "TPUs are Google's custom-developed application-specific integrated circuits (ASICs) used to accelerate machine learning workloads. They are available through [Google Colab](https://colab.research.google.com/), the [TPU Research Cloud](https://sites.research.google/trc/), and [Cloud TPU](https://cloud.google.com/tpu)."
       ]
     },
     {
@@ -73,6 +75,17 @@
         "## Setup"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ebf7f8489bb7"
+      },
+      "source": [
+        "Before you run this Colab notebook, make sure that your hardware accelerator is a TPU by checking your notebook settings: **Runtime** > **Change runtime type** > **Hardware accelerator** > **TPU v2**.\n",
+        "\n",
+        "Import some necessary libraries, including TensorFlow Datasets:"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -95,7 +108,7 @@
       "source": [
         "## TPU initialization\n",
         "\n",
-        "TPUs are typically Cloud TPU workers, which are different from the local process running the user's Python program. Thus, you need to do some initialization work to connect to the remote cluster and initialize the TPUs. Note that the `tpu` argument to `tf.distribute.cluster_resolver.TPUClusterResolver` is a special address just for Colab. If you are running your code on Google Compute Engine (GCE), you should instead pass in the name of your Cloud TPU."
+        "TPUs are typically [Cloud TPU](https://cloud.google.com/tpu/docs/) workers, which are different from the local process running the user's Python program. Thus, you need to do some initialization work to connect to the remote cluster and initialize the TPUs. Note that the `tpu` argument to `tf.distribute.cluster_resolver.TPUClusterResolver` is a special address just for Colab. If you are running your code on Google Compute Engine (GCE), you should instead pass in the name of your Cloud TPU."
       ]
     },
     {
@@ -115,7 +128,7 @@
       },
       "outputs": [],
       "source": [
-        "resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')\n",
+        "resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='local')\n",
         "tf.config.experimental_connect_to_cluster(resolver)\n",
         "# This is the TPU initialization code that has to be at the beginning.\n",
         "tf.tpu.experimental.initialize_tpu_system(resolver)\n",
@@ -159,7 +172,7 @@
       "source": [
         "## Distribution strategies\n",
         "\n",
-        "Usually you run your model on multiple TPUs in a data-parallel way. To distribute your model on multiple TPUs (or other accelerators), TensorFlow offers several distribution strategies. You can replace your distribution strategy and the model will run on any given (TPU) device. Check the [distribution strategy guide](./distributed_training.ipynb) for more information."
+        "Usually, you run your model on multiple TPUs in a data-parallel way. To distribute your model on multiple TPUs (as well as multiple GPUs or multiple machines), TensorFlow offers the `tf.distribute.Strategy` API. You can replace your distribution strategy and the model will run on any given (TPU) device. Learn more in the [Distributed training with TensorFlow](./distributed_training.ipynb) guide."
       ]
     },
     {
@@ -168,6 +181,8 @@
         "id": "DcDPMZs-9uLJ"
       },
       "source": [
+        "Using the `tf.distribute.TPUStrategy` option implements synchronous distributed training. TPUs provide their own implementation of efficient all-reduce and other collective operations across multiple TPU cores, which are used in `TPUStrategy`.\n",
+        "\n",
         "To demonstrate this, create a `tf.distribute.TPUStrategy` object:"
       ]
     },
@@ -188,7 +203,7 @@
         "id": "JlaAmswWPsU6"
       },
       "source": [
-        "To replicate a computation so it can run in all TPU cores, you can pass it into the `strategy.run` API. Below is an example that shows all cores receiving the same inputs `(a, b)` and performing matrix multiplication on each core independently. The outputs will be the values from all the replicas."
+        "To replicate a computation so it can run in all TPU cores, you can pass it into the `Strategy.run` API. Below is an example that shows all cores receiving the same inputs `(a, b)` and performing matrix multiplication on each core independently. The outputs will be the values from all the replicas."
       ]
     },
     {
@@ -216,7 +231,7 @@
       "source": [
         "## Classification on TPUs\n",
         "\n",
-        "Having covered the basic concepts, consider a more concrete example. This section demonstrates how to use the distribution strategy—`tf.distribute.TPUStrategy`—to train a Keras model on a Cloud TPU.\n"
+        "Having covered the basic concepts, consider a more concrete example. This section demonstrates how to use the distribution strategy—`tf.distribute.TPUStrategy`—to train a Keras model on a Cloud TPU."
       ]
     },
     {
@@ -227,7 +242,7 @@
       "source": [
         "### Define a Keras model\n",
         "\n",
-        "Start with a definition of a `Sequential` Keras model for image classification on the MNIST dataset using Keras. It's no different than what you would use if you were training on CPUs or GPUs. Note that Keras model creation needs to be inside `strategy.scope`, so the variables can be created on each TPU device. Other parts of the code are not necessary to be inside the strategy scope."
+        "Start with a definition of a [`Sequential` Keras model](https://www.tensorflow.org/guide/keras/sequential_model) for image classification on the MNIST dataset. It's no different than what you would use if you were training on CPUs or GPUs. Note that Keras model creation needs to be inside the `Strategy.scope`, so the variables can be created on each TPU device. Other parts of the code are not necessary to be inside the `Strategy` scope."
       ]
     },
     {
@@ -239,13 +254,32 @@
       "outputs": [],
       "source": [
         "def create_model():\n",
+        "  regularizer = tf.keras.regularizers.L2(1e-5)\n",
         "  return tf.keras.Sequential(\n",
-        "      [tf.keras.layers.Conv2D(256, 3, activation='relu', input_shape=(28, 28, 1)),\n",
-        "       tf.keras.layers.Conv2D(256, 3, activation='relu'),\n",
+        "      [tf.keras.layers.Conv2D(256, 3, input_shape=(28, 28, 1),\n",
+        "                              activation='relu',\n",
+        "                              kernel_regularizer=regularizer),\n",
+        "       tf.keras.layers.Conv2D(256, 3,\n",
+        "                              activation='relu',\n",
+        "                              kernel_regularizer=regularizer),\n",
         "       tf.keras.layers.Flatten(),\n",
-        "       tf.keras.layers.Dense(256, activation='relu'),\n",
-        "       tf.keras.layers.Dense(128, activation='relu'),\n",
-        "       tf.keras.layers.Dense(10)])"
+        "       tf.keras.layers.Dense(256,\n",
+        "                             activation='relu',\n",
+        "                             kernel_regularizer=regularizer),\n",
+        "       tf.keras.layers.Dense(128,\n",
+        "                             activation='relu',\n",
+        "                             kernel_regularizer=regularizer),\n",
+        "       tf.keras.layers.Dense(10,\n",
+        "                             kernel_regularizer=regularizer)])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "h-2qaXgfyONQ"
+      },
+      "source": [
+        "This model puts L2 regularization terms on the weights of each layer, so that the custom training loop below can show how you pick them up from `Model.losses`."
       ]
     },
     {
@@ -256,9 +290,9 @@
       "source": [
         "### Load the dataset\n",
         "\n",
-        "Efficient use of the `tf.data.Dataset` API is critical when using a Cloud TPU, as it is impossible to use the Cloud TPUs unless you can feed them data quickly enough. You can learn more about dataset performance in the [Input pipeline performance guide](./data_performance.ipynb).\n",
+        "Efficient use of the `tf.data.Dataset` API is critical when using a Cloud TPU. You can learn more about dataset performance in the [Input pipeline performance guide](./data_performance.ipynb).\n",
         "\n",
-        "For all but the simplest experiments (using `tf.data.Dataset.from_tensor_slices` or other in-graph data), you need to store all data files read by the Dataset in Google Cloud Storage (GCS) buckets.\n",
+        "If you are using [TPU Nodes](https://cloud.google.com/tpu/docs/managing-tpus-tpu-vm), you need to store all data files read by the TensorFlow `Dataset` in [Google Cloud Storage (GCS) buckets](https://cloud.google.com/tpu/docs/storage-buckets). If you are using [TPU VMs](https://cloud.google.com/tpu/docs/users-guide-tpu-vm), you can store data wherever you like. For more information on TPU Nodes and TPU VMs, refer to the [TPU System Architecture](https://cloud.google.com/tpu/docs/system-architecture-tpu-vm) documentation.\n",
         "\n",
         "For most use cases, it is recommended to convert your data into the `TFRecord` format and use a `tf.data.TFRecordDataset` to read it. Check the [TFRecord and tf.Example tutorial](../tutorials/load_data/tfrecord.ipynb) for details on how to do this. It is not a hard requirement and you can use other dataset readers, such as `tf.data.FixedLengthRecordDataset` or `tf.data.TextLineDataset`.\n",
         "\n",
@@ -266,7 +300,7 @@
         "\n",
         "Regardless of the data format used, it is strongly recommended that you use large files on the order of 100MB. This is especially important in this networked setting, as the overhead of opening a file is significantly higher.\n",
         "\n",
-        "As shown in the code below, you should use the `tensorflow_datasets` module to get a copy of the MNIST training and test data. Note that `try_gcs` is specified to use a copy that is available in a public GCS bucket. If you don't specify this, the TPU will not be able to access the downloaded data. "
+        "As shown in the code below, you should use the Tensorflow Datasets `tfds.load` module to get a copy of the MNIST training and test data. Note that `try_gcs` is specified to use a copy that is available in a public GCS bucket. If you don't specify this, the TPU will not be able to access the downloaded data."
       ]
     },
     {
@@ -311,7 +345,7 @@
       "source": [
         "### Train the model using Keras high-level APIs\n",
         "\n",
-        "You can train your model with Keras `fit` and `compile` APIs. There is nothing TPU-specific in this step—you write the code as if you were using mutliple GPUs and a `MirroredStrategy` instead of the `TPUStrategy`. You can learn more in the [Distributed training with Keras](https://www.tensorflow.org/tutorials/distribute/keras) tutorial."
+        "You can train your model with Keras `Model.fit` and `Model.compile` APIs. There is nothing TPU-specific in this step—you write the code as if you were using multiple GPUs and a `MirroredStrategy` instead of the `TPUStrategy`. You can learn more in the [Distributed training with Keras](../tutorials/distribute/keras.ipynb) tutorial."
       ]
     },
     {
@@ -338,7 +372,7 @@
         "model.fit(train_dataset,\n",
         "          epochs=5,\n",
         "          steps_per_epoch=steps_per_epoch,\n",
-        "          validation_data=test_dataset, \n",
+        "          validation_data=test_dataset,\n",
         "          validation_steps=validation_steps)"
       ]
     },
@@ -348,7 +382,7 @@
         "id": "8hSGBIYtUugJ"
       },
       "source": [
-        "To reduce Python overhead and maximize the performance of your TPU, pass in the argument—`steps_per_execution`—to `Model.compile`. In this example, it increases throughput by about 50%:"
+        "To reduce Python overhead and maximize the performance of your TPU, pass in the `steps_per_execution` argument to Keras `Model.compile`. In this example, it increases throughput by about 50%:"
       ]
     },
     {
@@ -382,7 +416,7 @@
       "source": [
         "### Train the model using a custom training loop\n",
         "\n",
-        "You can also create and train your model using `tf.function` and `tf.distribute` APIs directly. You can use the `strategy.experimental_distribute_datasets_from_function` API to distribute the dataset given a dataset function. Note that in the example below the batch size passed into the dataset is the per-replica batch size instead of the global batch size. To learn more, check out the [Custom training with tf.distribute.Strategy](https://www.tensorflow.org/tutorials/distribute/custom_training) tutorial.\n"
+        "You can also create and train your model using `tf.function` and `tf.distribute` APIs directly. You can use the `Strategy.distribute_datasets_from_function` API to distribute the `tf.data.Dataset` given a dataset function. Note that in the example below the batch size passed into the `Dataset` is the per-replica batch size instead of the global batch size. To learn more, check out the [Custom training with `tf.distribute.Strategy`](../tutorials/distribute/custom_training.ipynb) tutorial.\n"
       ]
     },
     {
@@ -391,7 +425,7 @@
         "id": "DxdgXPAL6iFE"
       },
       "source": [
-        "First, create the model, datasets and tf.functions:"
+        "First, create the model, datasets and `tf.function`s:"
       ]
     },
     {
@@ -402,8 +436,8 @@
       },
       "outputs": [],
       "source": [
-        "# Create the model, optimizer and metrics inside the strategy scope, so that the\n",
-        "# variables can be mirrored on each device.\n",
+        "# Create the model, optimizer and metrics inside the `tf.distribute.Strategy`\n",
+        "# scope, so that the variables can be mirrored on each device.\n",
         "with strategy.scope():\n",
         "  model = create_model()\n",
         "  optimizer = tf.keras.optimizers.Adam()\n",
@@ -411,11 +445,11 @@
         "  training_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(\n",
         "      'training_accuracy', dtype=tf.float32)\n",
         "\n",
-        "# Calculate per replica batch size, and distribute the datasets on each TPU\n",
-        "# worker.\n",
+        "# Calculate per replica batch size, and distribute the `tf.data.Dataset`s\n",
+        "# on each TPU worker.\n",
         "per_replica_batch_size = batch_size // strategy.num_replicas_in_sync\n",
         "\n",
-        "train_dataset = strategy.experimental_distribute_datasets_from_function(\n",
+        "train_dataset = strategy.distribute_datasets_from_function(\n",
         "    lambda _: get_dataset(per_replica_batch_size, is_training=True))\n",
         "\n",
         "@tf.function\n",
@@ -427,9 +461,13 @@
         "    images, labels = inputs\n",
         "    with tf.GradientTape() as tape:\n",
         "      logits = model(images, training=True)\n",
-        "      loss = tf.keras.losses.sparse_categorical_crossentropy(\n",
+        "      per_example_loss = tf.keras.losses.sparse_categorical_crossentropy(\n",
         "          labels, logits, from_logits=True)\n",
-        "      loss = tf.nn.compute_average_loss(loss, global_batch_size=batch_size)\n",
+        "      loss = tf.nn.compute_average_loss(per_example_loss)\n",
+        "      model_losses = model.losses\n",
+        "      if model_losses:\n",
+        "        loss += tf.nn.scale_regularization_loss(tf.add_n(model_losses))\n",
+        "\n",
         "    grads = tape.gradient(loss, model.trainable_variables)\n",
         "    optimizer.apply_gradients(list(zip(grads, model.trainable_variables)))\n",
         "    training_loss.update_state(loss * strategy.num_replicas_in_sync)\n",
@@ -463,7 +501,7 @@
         "\n",
         "  for step in range(steps_per_epoch):\n",
         "    train_step(train_iterator)\n",
-        "  print('Current step: {}, training loss: {}, accuracy: {}%'.format(\n",
+        "  print('Current step: {}, training loss: {}, training accuracy: {}%'.format(\n",
         "      optimizer.iterations.numpy(),\n",
         "      round(float(training_loss.result()), 4),\n",
         "      round(float(training_accuracy.result()) * 100, 2)))\n",
@@ -479,9 +517,9 @@
       "source": [
         "### Improving performance with multiple steps inside `tf.function`\n",
         "\n",
-        "You can improve the performance by running multiple steps within a `tf.function`. This is achieved by wrapping the `strategy.run` call with a `tf.range` inside `tf.function`, and AutoGraph will convert it to a `tf.while_loop` on the TPU worker.\n",
+        "You can improve the performance by running multiple steps within a `tf.function`. This is achieved by wrapping the `Strategy.run` call with a `tf.range` inside `tf.function`, and AutoGraph will convert it to a `tf.while_loop` on the TPU worker. You can learn more about `tf.function`s in the [Better performance with `tf.function`](./function.ipynb) guide.\n",
         "\n",
-        "Despite the improved performance, there are tradeoffs with this method compared to running a single step inside `tf.function`. Running multiple steps in a `tf.function` is less flexible—you cannot run things eagerly or arbitrary Python code within the steps.\n"
+        "Despite the improved performance, there are tradeoffs with this method compared to running a single step inside a `tf.function`. Running multiple steps in a `tf.function` is less flexible—you cannot run things eagerly or arbitrary Python code within the steps.\n"
       ]
     },
     {
@@ -501,9 +539,12 @@
         "    images, labels = inputs\n",
         "    with tf.GradientTape() as tape:\n",
         "      logits = model(images, training=True)\n",
-        "      loss = tf.keras.losses.sparse_categorical_crossentropy(\n",
+        "      per_example_loss = tf.keras.losses.sparse_categorical_crossentropy(\n",
         "          labels, logits, from_logits=True)\n",
-        "      loss = tf.nn.compute_average_loss(loss, global_batch_size=batch_size)\n",
+        "      loss = tf.nn.compute_average_loss(per_example_loss)\n",
+        "      model_losses = model.losses\n",
+        "      if model_losses:\n",
+        "        loss += tf.nn.scale_regularization_loss(tf.add_n(model_losses))\n",
         "    grads = tape.gradient(loss, model.trainable_variables)\n",
         "    optimizer.apply_gradients(list(zip(grads, model.trainable_variables)))\n",
         "    training_loss.update_state(loss * strategy.num_replicas_in_sync)\n",
@@ -512,11 +553,11 @@
         "  for _ in tf.range(steps):\n",
         "    strategy.run(step_fn, args=(next(iterator),))\n",
         "\n",
-        "# Convert `steps_per_epoch` to `tf.Tensor` so the `tf.function` won't get \n",
+        "# Convert `steps_per_epoch` to `tf.Tensor` so the `tf.function` won't get\n",
         "# retraced if the value changes.\n",
         "train_multiple_steps(train_iterator, tf.convert_to_tensor(steps_per_epoch))\n",
         "\n",
-        "print('Current step: {}, training loss: {}, accuracy: {}%'.format(\n",
+        "print('Current step: {}, training loss: {}, training accuracy: {}%'.format(\n",
         "      optimizer.iterations.numpy(),\n",
         "      round(float(training_loss.result()), 4),\n",
         "      round(float(training_accuracy.result()) * 100, 2)))"
@@ -530,19 +571,27 @@
       "source": [
         "## Next steps\n",
         "\n",
-        "- [Google Cloud TPU documentation](https://cloud.google.com/tpu/docs/): How to set up and run a Google Cloud TPU.\n",
+        "To learn more about Cloud TPUs and how to use them:\n",
+        "\n",
+        "- [Google Cloud TPU](https://cloud.google.com/tpu): The Google Cloud TPU homepage.\n",
+        "- [Google Cloud TPU documentation](https://cloud.google.com/tpu/docs/): Google Cloud TPU documentation, which includes:\n",
+        "  - [Introduction to Cloud TPU](https://cloud.google.com/tpu/docs/intro-to-tpu): An overview of working with Cloud TPUs.\n",
+        "  - [Cloud TPU quickstarts](https://cloud.google.com/tpu/docs/quick-starts): Quickstart introductions to working with Cloud TPU VMs using TensorFlow and other main machine learning frameworks.\n",
         "- [Google Cloud TPU Colab notebooks](https://cloud.google.com/tpu/docs/colabs): End-to-end training examples.\n",
         "- [Google Cloud TPU performance guide](https://cloud.google.com/tpu/docs/performance-guide): Enhance Cloud TPU performance further by adjusting Cloud TPU configuration parameters for your application\n",
-        "- [Distributed training with TensorFlow](./distributed_training.ipynb): How to use distribution strategies—including `tf.distribute.TPUStrategy`—with examples showing best practices."
+        "- [Distributed training with TensorFlow](./distributed_training.ipynb): How to use distribution strategies—including `tf.distribute.TPUStrategy`—with examples showing best practices.\n",
+        "- TPU embeddings: TensorFlow includes specialized support for training embeddings on TPUs via `tf.tpu.experimental.embedding`. In addition, [TensorFlow Recommenders](https://www.tensorflow.org/recommenders) has `tfrs.layers.embedding.TPUEmbedding`. Embeddings provide efficient and dense representations, capturing complex similarities and relationships between features. TensorFlow's TPU-specific embedding support allows you to train embeddings that are larger than the memory of a single TPU device, and to use sparse and ragged inputs on TPUs.\n",
+        "- [TPU Research Cloud (TRC)](https://sites.research.google/trc/about/): TRC enables researchers to apply for access to a cluster of more than 1,000 Cloud TPU devices.\n"
       ]
     }
   ],
   "metadata": {
     "accelerator": "TPU",
     "colab": {
-      "collapsed_sections": [],
       "name": "tpu.ipynb",
-      "toc_visible": true
+      "toc_visible": true,
+      "machine_shape": "hm",
+      "gpuType": "V28"
     },
     "kernelspec": {
       "display_name": "Python 3",
@@ -551,4 +600,4 @@
   },
   "nbformat": 4,
   "nbformat_minor": 0
-}
+}
\ No newline at end of file
diff --git a/site/en/guide/variable.ipynb b/site/en/guide/variable.ipynb
index 1538218e914..868ee9119e2 100644
--- a/site/en/guide/variable.ipynb
+++ b/site/en/guide/variable.ipynb
@@ -166,7 +166,7 @@
       "source": [
         "print(\"A variable:\", my_variable)\n",
         "print(\"\\nViewed as a tensor:\", tf.convert_to_tensor(my_variable))\n",
-        "print(\"\\nIndex of highest value:\", tf.argmax(my_variable))\n",
+        "print(\"\\nIndex of highest value:\", tf.math.argmax(my_variable))\n",
         "\n",
         "# This creates a new tensor; it does not reshape the variable.\n",
         "print(\"\\nCopying and reshaping: \", tf.reshape(my_variable, [1,4]))"
@@ -298,7 +298,7 @@
         "\n",
         "However, you can override this.  In this snippet, place a float tensor and a variable on the CPU, even if a GPU is available.  By turning on device placement logging (see [Setup](#scrollTo=xZoJJ4vdvTrD)), you can see where the variable is placed. \n",
         "\n",
-        "Note: Although manual placement works, using [distribution strategies](distributed_training) can be a more convenient and scalable way to optimize your computation.\n",
+        "Note: Although manual placement works, using [distribution strategies](distributed_training.ipynb) can be a more convenient and scalable way to optimize your computation.\n",
         "\n",
         "If you run this notebook on different backends with and without a GPU you will see different logging.  *Note that logging device placement must be turned on at the start of the session.*"
       ]
@@ -359,7 +359,7 @@
       "source": [
         "Note: Because `tf.config.set_soft_device_placement` is turned on by default, even if you run this code on a device without a GPU, it will still run.  The multiplication step will happen on the CPU.\n",
         "\n",
-        "For more on distributed training, see [our guide](distributed_training)."
+        "For more on distributed training, refer to the [guide](distributed_training.ipynb)."
       ]
     },
     {
diff --git a/site/en/guide/versions.md b/site/en/guide/versions.md
index b20eea717bc..5b1206cc5f4 100644
--- a/site/en/guide/versions.md
+++ b/site/en/guide/versions.md
@@ -6,10 +6,11 @@ to modify TensorFlow while preserving compatibility.
 
 ## Semantic versioning 2.0
 
-TensorFlow follows Semantic Versioning 2.0 ([semver](http://semver.org)) for its
-public API. Each release version of TensorFlow has the form `MAJOR.MINOR.PATCH`.
-For example, TensorFlow version 1.2.3 has `MAJOR` version 1, `MINOR` version 2,
-and `PATCH` version 3. Changes to each number have the following meaning:
+TensorFlow mostly follows Semantic Versioning 2.0 ([semver](http://semver.org))
+for its public API. Each release version of TensorFlow has the form
+`MAJOR.MINOR.PATCH`. For example, TensorFlow version 1.2.3 has `MAJOR` version
+1, `MINOR` version 2, and `PATCH` version 3. Changes to each number have the
+following meaning:
 
 * **MAJOR**:  Potentially backwards incompatible changes.  Code and data that
   worked with a previous major release will not necessarily work with the new
@@ -22,6 +23,10 @@ and `PATCH` version 3. Changes to each number have the following meaning:
   data that worked with a previous minor release *and* which depends only on the
   non-experimental public API will continue to work unchanged.  For details on
   what is and is not the public API, see [What is covered](#what_is_covered).
+  Note that TensorFlow sometimes makes breaking changes in new minor releases,
+  where the impact is expected to be minor. For examples of these kinds of
+  changes, see the "Breaking Changes" sections for past minor releases at
+  https://github.com/tensorflow/tensorflow/releases.
 
 * **PATCH**: Backwards compatible bug fixes.
 
@@ -34,44 +39,153 @@ release 0.12.1.  However, release 1.1.1 was backwards *compatible* with release
 Only the public APIs of TensorFlow are backwards compatible across minor and
 patch versions.  The public APIs consist of
 
-* All the documented [Python](../api_docs/python) functions and classes in the
-  `tensorflow` module and its submodules, except for
+*   All the documented [Python](https://www.tensorflow.org/api_docs/python)
+    functions and classes in the `tensorflow` module and its submodules, except
+    for
+
+    *   Private symbols: any function, class, etc., whose name start with `_`
+    *   Experimental and `tf.contrib` symbols, see [below](#not_covered) for
+        details.
+
+    Note that the code in the `examples/` and `tools/` directories is not
+    reachable through the `tensorflow` Python module and is thus not covered by
+    the compatibility guarantee.
+
+    If a symbol is available through the `tensorflow` Python module or its
+    submodules, but is not documented, then it is **not** considered part of the
+    public API.
+
+*   The compatibility API (in Python, the `tf.compat` module). At major
+    versions, we may release utilities and additional endpoints to help users
+    with the transition to a new major version. These API symbols are deprecated
+    and not supported (i.e., we will not add any features, and we will not fix
+    bugs other than to fix vulnerabilities), but they do fall under our
+    compatibility guarantees.
+
+*   The TensorFlow C API:
+
+    *   [tensorflow/c/c_api.h](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/c/c_api.h)
+
+*   The following protocol buffer files:
+
+    *   [`attr_value`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/attr_value.proto)
+    *   [`config`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/config.proto)
+    *   [`event`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/event.proto)
+    *   [`graph`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/graph.proto)
+    *   [`op_def`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/op_def.proto)
+    *   [`reader_base`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/reader_base.proto)
+    *   [`summary`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/summary.proto)
+    *   [`tensor`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor.proto)
+    *   [`tensor_shape`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor_shape.proto)
+    *   [`types`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/types.proto)
+
+<a name="lite"></a>
+
+## Separate version number for TensorFlow Lite
+
+Currently TensorFlow Lite is distributed as a part of TensorFlow. However, we
+reserve the right to in future release changes to the TensorFlow Lite APIs on a
+different schedule than for the other TensorFlow APIs, or even to move
+TensorFlow Lite into a separate source distribution and/or a separate source
+repository than TensorFlow.
+
+Because of this, we use a different version number for TensorFlow Lite
+(`TFLITE_VERSION_STRING` in `tensorflow/lite/version.h`, and `TfLiteVersion()`
+in `tensorflow/lite/c/c_api.h`) than for TensorFlow (`TF_VERSION_STRING` in
+`tensorflow/core/public/release_version.h`, and `TF_Version()` in
+`tensorflow/c/c_api.h`). Currently, these two version numbers happen to have the
+same value. But in future, they may diverge; for example, we may increment the
+major version number for TensorFlow Lite without incrementing the major version
+number for TensorFlow, or vice versa.
+
+The API surface that is covered by the TensorFlow Lite version number is
+comprised of the following public APIs:
+
+*   The TensorFlow Lite C API:
+
+    *   [tensorflow/lite/c/c_api.h](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/c/c_api.h)
+    *   [tensorflow/lite/c/c_api_types.h](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/c/c_api_types.h).
+
+*   The TensorFlow Lite Android (Java/Kotlin) API:
+
+    *   In `org.tensorflow.lite`:
+        *   [org.tensorflow.lite.TensorFlowLite](https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/TensorFlowLite)
+        *   [org.tensorflow.lite.InterpreterApi](https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/InterpreterApi)
+        *   [org.tensorflow.lite.Delegate](https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/Delegate)
+        *   [org.tensorflow.lite.DelegateFactory](https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/DelegateFactory)
+        *   [org.tensorflow.lite.Tensor](https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/Tensor)
+        *   [org.tensorflow.lite.DataType](https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/DataType)
+        *   [org.tensorflow.lite.RuntimeFlavor](https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/RuntimeFlavor)
+    *   In `org.tensorflow.lite.gpu`:
+        *   [org.tensorflow.lite.gpu.GpuDelegate](https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/gpu/GpuDelegate)
+        *   [org.tensorflow.lite.gpu.GpuDelegateFactory](https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/gpu/GpuDelegateFactory)
+
+*   The TensorFlow Lite Objective-C APIs:
+
+    *   [tensorflow/lite/objc/apis/](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/objc/apis/)
+        *   TFLCoreMLDelegate.h
+        *   TFLDelegate.h
+        *   TFLInterpreter.h
+        *   TFLInterpreterOptions.h
+        *   TFLMetalDelegate.h
+        *   TFLQuantizationParameters.h
+        *   TFLSignatureRunner.h
+        *   TFLTensorFlowLite.h
+        *   TFLTensor.h
+
+*   The TensorFlow Lite Swift APIs:
+
+    *   [tensorflow/lite/swift/Sources/](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/swift/Sources/).
+        *   CoreMLDelegate.swift
+        *   Delegate.swift
+        *   InterpreterError.swift
+        *   Interpreter.swift
+        *   MetalDelegate.swift
+        *   Model.swift
+        *   QuantizationParameters.swift
+        *   SignatureRunnerError.swift
+        *   SignatureRunner.swift
+        *   TensorFlowLite.swift
+        *   Tensor.swift
+
+Experimental symbols are not covered; see [below](#not_covered) for details.
+
+## Separate version number for TensorFlow Lite Extension APIs
+
+TensorFlow Lite provides C APIs for extending the TensorFlow Lite interpreter
+with "custom ops", which provide user-defined operations in a graph, or
+"delegates", which allow delegating the computation for a graph (or for a subset
+of a graph) to a custom backend. These APIs, which we collectively call the
+"TensorFlow Lite Extension APIs", require more intimate dependencies on some of
+the details of the TensorFlow Lite implementation.
+
+We reserve the right to in future release changes to these APIs, potentially
+including non-backwards-compatible changes, on a different schedule than for the
+other TensorFlow Lite APIs. So we use a different version number for the
+TensorFlow Lite Extension APIs than the version numbers for TensorFlow Lite or
+TensorFlow (which were described in the previous section). We are introducing
+some new APIs in TensorFlow Lite version 2.15 to get the TensorFlow Lite
+Extension APIs version (`TFLITE_EXTENSION_APIS_VERSION_STRING` in
+`tensorflow/lite/version.h`, and TfLiteExtensionApisVersion() in
+`tensorflow/lite/c/c_api.h`). The version number for the TensorFlow Lite
+Extension APIs is currently the same as the version number for TensorFlow and
+TensorFlow Lite. But in future, they may diverge; for example, we may increment
+the major version number for the TensorFlow Lite Extension APIs without
+incrementing the major version number for TensorFlow Lite, or vice versa.
+
+The API surface that is covered by the TensorFlow Lite Extension APIs version
+number is comprised of the following public APIs:
+
+*   [tensorflow/lite/c/c_api_opaque.h](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/c/c_api_opaque.h)
+*   [tensorflow/lite/c/common.h](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/c/common.h)
+*   [tensorflow/lite/c/builtin_op_data.h](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/c/builtin_op_data.h)
+*   [tensorflow/lite/builtin_ops.h](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/builtin_ops.h)
+
+Again, experimental symbols are not covered; see [below](#not_covered) for
+details.
+
+<e name="not_covered"></a>
 
-    * Private symbols: any function, class, etc., whose name start with `_`
-    * Experimental and `tf.contrib` symbols, see [below](#not_covered) for
-      details.
-
-  Note that the code in the `examples/` and `tools/` directories is not
-  reachable through the `tensorflow` Python module and is thus not covered by
-  the compatibility guarantee.
-
-  If a symbol is available through the `tensorflow` Python module or its
-  submodules, but is not documented, then it is **not** considered part of the
-  public API.
-
-* The compatibility API (in Python, the `tf.compat` module). At major versions,
-  we may release utilities and additional endpoints to help users with the
-  transition to a new major version. These API symbols are deprecated and not
-  supported (i.e., we will not add any features, and we will not fix bugs
-  other than to fix vulnerabilities), but they do fall under our compatibility
-  guarantees.
-
-* The [C API](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/c/c_api.h).
-
-* The following protocol buffer files:
-
-    * [`attr_value`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/attr_value.proto)
-    * [`config`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/config.proto)
-    * [`event`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/event.proto)
-    * [`graph`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/graph.proto)
-    * [`op_def`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/op_def.proto)
-    * [`reader_base`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/reader_base.proto)
-    * [`summary`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/summary.proto)
-    * [`tensor`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor.proto)
-    * [`tensor_shape`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor_shape.proto)
-    * [`types`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/types.proto)
-
-<a name="not_covered"></a>
 ## What is *not* covered
 
 Some parts of TensorFlow can change in backward incompatible ways at any point.
@@ -82,21 +196,27 @@ These include:
     particular, the following are not covered by any compatibility guarantees:
 
     -   any symbol in the `tf.contrib` module or its submodules;
-    -   any symbol (module, function, argument, property, class, or constant)
-        whose name contains `experimental` or `Experimental`; or
-    -   any symbol whose fully qualified name includes a module or class which
-        is itself experimental. This includes fields and submessages of any
-        protocol buffer called `experimental`.
+    -   any symbol (module, function, argument, property, class, constant, type,
+        package, etc.) whose name contains `experimental` or `Experimental`; or
+    -   any symbol whose fully qualified name includes a module or class or
+        package which is itself experimental. This includes fields and
+        submessages of any protocol buffer called `experimental`.
 
 *   **Other languages**: TensorFlow APIs in languages other than Python and C,
     such as:
 
-    -   [C++](../install/lang_c.md) (exposed through header files in
-        [`tensorflow/cc`](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/cc)).
-    -   [Java](../install/lang_java.md),
-    -   [Go](../install/lang_go.md)
+    -   [C++](../install/lang_c.ipynb) (exposed through header files in
+        [`tensorflow/cc/`](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/cc)).
+    -   [Java](../install/lang_java_legacy.md),
+    -   [Go](https://github.com/tensorflow/build/blob/master/golang_install_guide/README.md)
     -   [JavaScript](https://www.tensorflow.org/js)
 
+    and TensorFlow **Lite** APIs in languages other than Java/Kotlin, C,
+    Objective-C, and Swift, in particular
+
+    -   **C++** (exposed through header files in
+        [`tensorflow/lite/`](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/))
+
 *   **Details of composite ops:** Many public functions in Python expand to
     several primitive ops in the graph, and these details will be part of any
     graphs saved to disk as `GraphDef`s. These details may change for minor
@@ -222,7 +342,8 @@ This section is relevant only when making incompatible changes to the `GraphDef`
 format, such as when adding ops, removing ops, or changing the functionality
 of existing ops.  The previous section should suffice for most users.
 
-<a id="backward_forward"/>
+<a id="backward_forward"></a>
+
 
 ### Backward and partial forward compatibility
 
@@ -253,7 +374,9 @@ guidelines for evolving `GraphDef` versions.
 There are different data versions for graphs and checkpoints. The two data
 formats evolve at different rates from each other and also at different rates
 from TensorFlow. Both versioning systems are defined in
-[`core/public/version.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/version.h).
+[`core/public/version.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/version.h)
+and
+[`core/public/release_version.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/release_version.h).
 Whenever a new version is added, a note is added to the header detailing what
 changed and the date.
 
@@ -353,7 +476,7 @@ existing producer scripts will not suddenly use the new functionality.
 1.  Add a new similar op named `SomethingV2` or similar and go through the
     process of adding it and switching existing Python wrappers to use it.
     To ensure forward compatibility use the checks suggested in
-    [compat.py](https://www.tensorflow.org/code/tensorflow/python/compat/compat.py)
+    [compat.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/compat/compat.py)
     when changing the Python wrappers.
 2.  Remove the old op (Can only take place with a major version change due to
     backward compatibility).
diff --git a/site/en/hub/README.md b/site/en/hub/README.md
deleted file mode 100644
index 3ca77792508..00000000000
--- a/site/en/hub/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-Welcome to the warp zone!
-
-# TensorFlow Hub
-
-These docs are available here: https://github.com/tensorflow/hub/tree/master/docs
diff --git a/site/en/hub/_book.yaml b/site/en/hub/_book.yaml
new file mode 100644
index 00000000000..4a969d413bc
--- /dev/null
+++ b/site/en/hub/_book.yaml
@@ -0,0 +1,85 @@
+upper_tabs:
+# Tabs left of dropdown menu
+- include: /_upper_tabs_left.yaml
+- include: /api_docs/_upper_tabs_api.yaml
+# Dropdown menu
+- name: Resources
+  path: /resources
+  is_default: true
+  menu:
+  - include: /resources/_menu_toc.yaml
+  lower_tabs:
+    # Subsite tabs
+    other:
+    # [Guide]
+    - name: "Guide"
+      contents:
+      # TF Hub Platform overview.
+      - heading: Getting Started
+      - title: Overview
+        path: /hub/overview
+      - title: Installation
+        path: /hub/installation
+      - title: Community and support
+        path: /hub/community
+      # Python library usage information
+      - heading: Using the library
+      - title: Overview
+        path: /hub/lib_overview
+      - title: SavedModels for TensorFlow 2
+        path: /hub/tf2_saved_model
+      - title: Caching model downloads
+        path: /hub/caching
+      - title: Migration to TF2
+        path: /hub/migration_tf2
+      - title: Model compatibility for TF1/TF2
+        path: /hub/model_compatibility
+      - title: "Deprecated: TF1 Hub format"
+        path: /hub/tf1_hub_module
+        status: deprecated
+      # SavedModel APIs
+      - heading: Common SavedModel APIs
+      - title: Overview
+        path: /hub/common_saved_model_apis/index.md
+      - title: Reusable SavedModels (for all tasks)
+        path: /hub/reusable_saved_models
+      - title: Image tasks
+        path: /hub/common_saved_model_apis/images
+      - title: Text tasks
+        path: /hub/common_saved_model_apis/text
+      # Publishing models
+      - heading: Publishing models
+      - title: Publishing process
+        path: /hub/publish
+      - title: Data portability and deletion
+        path: /hub/portability_and_deletion
+      # Advanced developer info
+      - heading: Advanced developer info
+      - title: Model formats
+        path: /hub/model_formats
+      - title: Model hosting protocol
+        path: /hub/hosting
+      - title: Build from source
+        path: /hub/build_from_source
+      - title: Common issues
+        path: /hub/common_issues
+      - title: Contribute to TensorFlow Hub
+        path: /hub/contribute
+    # [Tutorials]
+    - name: Tutorials
+      path: /hub/tutorials
+      contents:
+      - include: /hub/tutorials/_toc.yaml
+    # [API]
+    - name: API
+      skip_translation: true
+      contents:
+      - include: /hub/api_docs/python/hub/_toc.yaml
+    # [Models]
+    - name: "Models ↗"
+      contents:
+      - title: Models
+        path: https://tfhub.dev
+        status: external
+
+- include: /_upper_tabs_right.yaml
diff --git a/site/en/hub/_index.yaml b/site/en/hub/_index.yaml
new file mode 100644
index 00000000000..00e67b15265
--- /dev/null
+++ b/site/en/hub/_index.yaml
@@ -0,0 +1,145 @@
+# This file is rendered on tensorflow.org/hub.
+# ../README.md is rendered on github.com/tensorflow/hub.
+# Both link to ./overview.md and ./*.md for detailed docs.
+book_path: /hub/_book.yaml
+project_path: /hub/_project.yaml
+description: >
+  TensorFlow Hub is a repository of trained machine learning models ready for fine-tuning and
+  deployable anywhere. Reuse trained models like BERT and Faster R-CNN with just a few lines of code.
+landing_page:
+  custom_css_path: /site-assets/css/style.css
+  rows:
+  - heading: TensorFlow Hub is a repository of trained machine learning models.
+    items:
+    - classname:
+        tfo-landing-row-item-code-block
+        devsite-landing-row-50
+      description: >
+        TensorFlow Hub is a repository of trained machine learning models ready for fine-tuning and
+        deployable anywhere. Reuse trained models like BERT and Faster R-CNN with just a few lines of code.
+      list:
+      - heading: See the guide
+        description: Learn about how to use TensorFlow Hub and how it works.
+        path: /hub/overview
+        icon:
+          path: /hub/images/guide_basics.png
+      - heading: See tutorials
+        description: Tutorials show you end-to-end examples using TensorFlow Hub.
+        path: /hub/tutorials
+        icon:
+          path: /site-assets/images/marketing/learn/lite-pick.svg
+      - heading: See models
+        description: Find trained TF, TFLite, and TF.js models for your use case.
+        path: https://tfhub.dev
+        icon:
+          path: /site-assets/images/marketing/learn/js-run.svg
+      code_block: |
+        <pre class = "prettyprint">
+          !pip install --upgrade tensorflow_hub
+
+          import tensorflow_hub as hub
+
+          model = hub.KerasLayer("https://tfhub.dev/google/nnlm-en-dim128/2")
+          embeddings = model(["The rain in Spain.", "falls",
+                              "mainly", "In the plain!"])
+
+          print(embeddings.shape)  #(4,128)
+        </pre>
+  - options:
+    - cards
+    - centered-header
+    heading: >
+      <hr><br>Models
+    description: >
+      Find trained models from the TensorFlow community on <a href="https://tfhub.dev">TFHub.dev</a>
+    items:
+    - heading: BERT
+      description: Check out BERT for NLP tasks including text classification and question answering.
+      path: https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3
+      image_path: /hub/images/bert.png
+      buttons:
+      - label: See the model<span class="material-icons icon-after" aria-hidden="true">north_east</span>
+        path: https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3
+    - heading: Object detection
+      description: Use the Faster R-CNN Inception ResNet V2 640x640 model for detecting objects in images.
+      path: https://tfhub.dev/tensorflow/faster_rcnn/inception_resnet_v2_640x640/1
+      image_path: /hub/images/object_detection.png
+      buttons:
+      - label: See the model<span class="material-icons icon-after" aria-hidden="true">north_east</span>
+        path: https://tfhub.dev/tensorflow/faster_rcnn/inception_resnet_v2_640x640/1
+    - heading: Style transfer
+      description: Transfer the style of one image to another using the image style transfer model.
+      path: https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2
+      image_path: /hub/images/style_transfer.png
+      buttons:
+      - label: See the model<span class="material-icons icon-after" aria-hidden="true">north_east</span>
+        path: https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2
+    - heading: On-device food classifier
+      description: Use this TFLite model to classify photos of food on a mobile device.
+      path: https://tfhub.dev/google/lite-model/aiy/vision/classifier/food_V1/1
+      image_path: /hub/images/food.png
+      buttons:
+      - label: See the model<span class="material-icons icon-after" aria-hidden="true">north_east</span>
+        path: https://tfhub.dev/google/lite-model/aiy/vision/classifier/food_V1/1
+  - options:
+    - cards
+    - centered-header
+    heading: >
+      <hr><br>News & announcements
+    description: >
+      Check out <a href="https://blog.tensorflow.org/search?label=TensorFlow+Hub">our blog</a> for more announcements and view the latest <a href="https://twitter.com/search?q=%23TFHub%20from%3ATensorFlow&src=typed_query&f=live">#TFHub updates</a> on Twitter
+    items:
+    - heading: TensorFlow Hub for Real World Impact at Google I/O
+      youtube_id: BE5nkhFe3AE
+      description: >
+        Learn how you can use TensorFlow Hub to build ML solutions with real world impact.
+      buttons:
+      - label: Watch the video<span class="material-icons icon-after" aria-hidden="true">north_east</span>
+        path: https://www.youtube.com/watch?v=BE5nkhFe3AE
+    - heading: "On-device ML solutions"
+      description: >
+        To explore ML solutions for your mobile and web apps including TensorFlow Hub, visit the Google on-device machine learning page.
+      path: https://g.co/on-device-ml
+      image_path: /hub/images/odml.png
+      buttons:
+      - label: Visit the site<span class="material-icons icon-after" aria-hidden="true">north_east</span>
+        path: https://g.co/on-device-ml
+    - heading: "Making BERT Easier with Preprocessing Models From TensorFlow Hub"
+      description: >
+        TensorFlow Hub makes BERT simple to use with new preprocessing models.
+      path: https://blog.tensorflow.org/2020/12/making-bert-easier-with-preprocessing-models-from-tensorflow-hub.html
+      image_path: /hub/images/bert_preprocess_wide.png
+      buttons:
+      - label: Read the blog<span class="material-icons icon-after" aria-hidden="true">arrow_forward</span>
+        path: https://blog.tensorflow.org/2020/12/making-bert-easier-with-preprocessing-models-from-tensorflow-hub.html
+    - heading: "From singing to musical scores: Estimating pitch with SPICE and Tensorflow Hub"
+      description: >
+        Learn how to use the SPICE model to automatically transcribe sheet music from live audio.
+      path: https://blog.tensorflow.org/2020/06/estimating-pitch-with-spice-and-tensorflow-hub.html
+      image_path: /hub/images/spice_blog.png
+      buttons:
+      - label: Read the blog<span class="material-icons icon-after" aria-hidden="true">arrow_forward</span>
+        path: https://blog.tensorflow.org/2020/06/estimating-pitch-with-spice-and-tensorflow-hub.html
+  - options:
+    - cards
+    - centered-header
+    heading: >
+      <hr><br>Community
+    description: Join the TensorFlow Hub community
+    items:
+    - heading: <span style="font-size:20px;">TensorFlow Hub on GitHub</span>
+      icon:
+        path: /hub/images/github_icon.svg
+      path: https://github.com/tensorflow/hub
+    - heading: <span style="font-size:20px;">Contribute models</span>
+      icon:
+        name: publish
+      path: /hub/publish
+  - options:
+    - cta
+    items:
+    - heading: Get started with TensorFlow Hub
+      buttons:
+      - label: Find trained models <span class="material-icons icon-after" aria-hidden="true">north_east</span>
+        path: https://tfhub.dev
+        classname: button
diff --git a/site/en/hub/_redirects.yaml b/site/en/hub/_redirects.yaml
new file mode 100644
index 00000000000..bee1cbec873
--- /dev/null
+++ b/site/en/hub/_redirects.yaml
@@ -0,0 +1,7 @@
+redirects:
+- from: /hub/becoming_a_publisher
+  to: /hub/publish
+- from: /hub/writing_model_documentation
+  to: /hub/writing_documentation#model
+- from: /hub/creating_a_collection
+  to: /hub/writing_documentation#collection
diff --git a/site/en/hub/build_from_source.md b/site/en/hub/build_from_source.md
new file mode 100644
index 00000000000..42e19eb6208
--- /dev/null
+++ b/site/en/hub/build_from_source.md
@@ -0,0 +1,195 @@
+
+<!-- Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+=============================================================================-->
+
+# Creating the TensorFlow Hub pip package using Linux
+
+Note: This document is for developers interested in modifying TensorFlow Hub
+itself. To _use_ TensorFlow Hub, see the [Install instructions](installation.md)
+
+If you make changes to TensorFlow Hub pip package, you will likely want to
+rebuild the pip package from source to try out your changes.
+
+This requires:
+
+*   Python
+*   TensorFlow
+*   Git
+*   [Bazel](https://docs.bazel.build/versions/master/install.html)
+
+Alternatively, if you install the protobuf compiler you can
+[try out your changes without using bazel](#develop).
+
+## Setup a virtualenv {:#setup}
+
+### Activate virtualenv
+
+Install virtualenv if it's not installed already:
+
+```shell
+~$ sudo apt-get install python-virtualenv
+```
+
+Create a virtual environment for the package creation:
+
+```shell
+~$ virtualenv --system-site-packages tensorflow_hub_env
+```
+
+And activate it:
+
+```shell
+~$ source ~/tensorflow_hub_env/bin/activate  # bash, sh, ksh, or zsh
+~$ source ~/tensorflow_hub_env/bin/activate.csh  # csh or tcsh
+```
+
+### Clone the TensorFlow Hub repository.
+
+```shell
+(tensorflow_hub_env)~/$ git clone https://github.com/tensorflow/hub
+(tensorflow_hub_env)~/$ cd hub
+```
+
+## Test your changes
+
+### Run TensorFlow Hub's tests
+
+```shell
+(tensorflow_hub_env)~/hub/$ bazel test tensorflow_hub:all
+```
+
+## Build and install the package
+
+### Build TensorFlow Hub pip packaging script
+
+To build a pip package for TensorFlow Hub:
+
+```shell
+(tensorflow_hub_env)~/hub/$ bazel build tensorflow_hub/pip_package:build_pip_package
+```
+
+### Create the TensorFlow Hub pip package
+
+```shell
+(tensorflow_hub_env)~/hub/$ bazel-bin/tensorflow_hub/pip_package/build_pip_package \
+/tmp/tensorflow_hub_pkg
+```
+
+### Install and test the pip package (optional)
+
+Run the following commands to install the pip package.
+
+```shell
+(tensorflow_hub_env)~/hub/$ pip install /tmp/tensorflow_hub_pkg/*.whl
+```
+
+Test import TensorFlow Hub:
+
+```shell
+(tensorflow_hub_env)~/hub/$ cd ..  # exit the directory to avoid confusion
+(tensorflow_hub_env)~/$ python -c "import tensorflow_hub as hub"
+```
+
+## "Developer" install (experimental)
+
+<a id="develop"></a>
+
+Warning: This approach to running TensorFlow is experimental, and not officially
+supported by the TensorFlow Hub team.
+
+Building the package with bazel is the only officially supported method. However
+if you are unfamiliar with bazel simpler to work with open source tools. For
+that you can do a "developer install" of the package.
+
+This installation method allows you to install the working directory into your
+python environment, so that ongoing changes are reflected when you import the
+package.
+
+### Setup the repository
+
+First setup the virtualenv and repository, as described [above](#setup).
+
+### Install `protoc`
+
+Because TensorFlow Hub uses protobufs you will need the protobuf compiler to
+create the necessary python `_pb2.py` files from the `.proto` files.
+
+#### On a Mac:
+
+```
+(tensorflow_hub_env)~/hub/$ brew install protobuf
+```
+
+#### On Linux
+
+```
+(tensorflow_hub_env)~/hub/$ sudo apt install protobuf-compiler
+```
+
+### Compile the `.proto` files
+
+Initially there are no `_pb2.py` files in the directory:
+
+```
+(tensorflow_hub_env)~/hub/$ ls -1 tensorflow_hub/*_pb2.py
+```
+
+Run `protoc` to create them:
+
+```
+(tensorflow_hub_env)~/hub/$ protoc -I=tensorflow_hub --python_out=tensorflow_hub tensorflow_hub/*.proto
+(tensorflow_hub_env)~/hub/$ ls -1 tensorflow_hub/*_pb2.py
+```
+
+<pre>
+tensorflow_hub/image_module_info_pb2.py
+tensorflow_hub/module_attachment_pb2.py
+tensorflow_hub/module_def_pb2.py
+</pre>
+
+Note: Don't forget to recompile the `_pb2.py` files if you make changes to the
+`.proto` definitions.
+
+### Import directly from the repository
+
+With the `_pb2.py` files in place, you can use try out your modifications
+directly from the TensorFlow Hub directory:
+
+```
+(tensorflow_hub_env)~/$ python -c "import tensorflow_hub as hub"
+```
+
+### Install in "developer" mode
+
+Or to use this from outside the repository root, you can use the `setup.py
+develop` installation:
+
+```
+(tensorflow_hub_env)~/hub/$ python tensorflow_hub/pip_package/setup.py develop
+```
+
+Now you can use your local changes in a regular python virtualenv, without the
+need to rebuild and install the pip package for each new change:
+
+```shell
+(tensorflow_hub_env)~/hub/$ cd ..  # exit the directory to avoid confusion
+(tensorflow_hub_env)~/$ python -c "import tensorflow_hub as hub"
+```
+
+## De-activate the virtualenv
+
+```shell
+(tensorflow_hub_env)~/hub/$ deactivate
+```
diff --git a/site/en/hub/caching.md b/site/en/hub/caching.md
new file mode 100644
index 00000000000..678b2c22af0
--- /dev/null
+++ b/site/en/hub/caching.md
@@ -0,0 +1,86 @@
+
+# Caching model downloads from TF Hub
+
+## Overview
+
+The `tensorflow_hub` library currently supports two modes for downloading
+models. By default, a model is downloaded as a compressed archive and cached on
+disk. Secondly, models can directly be read from remote storage into TensorFlow.
+Either way, the calls to `tensorflow_hub` functions in the actual Python code
+can and should continue to use the canonical tfhub.dev URLs of models, which are
+portable across systems and navigable for documentation. In the rare case that
+user code needs the actual filesystem location (after downloading  and
+decompressing, or after resolving a model handle into a filesystem path),
+it can be obtained by the function `hub.resolve(handle)`.
+
+### Caching of compressed downloads
+
+The `tensorflow_hub` library by default caches models on the filesystem when
+they have been downloaded from tfhub.dev (or other [hosting sites](hosting.md))
+and decompressed. This mode is recommended for most environments, except if disk
+space is scarce but network bandwidth and latency are superb.
+
+The download location defaults to a local temporary directory but can be
+customized by setting the environment variable `TFHUB_CACHE_DIR` (recommended)
+or by passing the command-line flag `--tfhub_cache_dir`. The default cache
+location `/tmp/tfhub_modules` (or whatever `os.path.join(tempfile.gettempdir(),
+"tfhub_modules")` is evaluated to) should work in most cases.
+
+Users who prefer persistent caching across system reboots can instead set
+`TFHUB_CACHE_DIR` to a location in their home directory. For example, a user of
+the bash shell on a Linux system can add a line like the following to
+`~/.bashrc`:
+
+```bash
+export TFHUB_CACHE_DIR=$HOME/.cache/tfhub_modules
+```
+
+...restart the shell, and then this location will be used. When using a
+persistent location, be aware that there is no automatic cleanup.
+
+### Reading from remote storage
+
+Users can instruct the `tensorflow_hub` library to directly read models from
+remote storage (GCS) instead of downloading the models locally with:
+
+```shell
+os.environ["TFHUB_MODEL_LOAD_FORMAT"] = "UNCOMPRESSED"
+```
+
+or by setting the command-line flag `--tfhub_model_load_format` to
+`UNCOMPRESSED`. This way, no caching directory is needed, which is especially
+helpful in environments that provide little disk space but a fast internet
+connection.
+
+### Running on TPU in Colab notebooks
+
+On [colab.research.google.com](https://colab.research.google.com), downloading
+compressed models will conflict with the TPU runtime since the computation
+workload is delegated to another machine that does not have access to the cache
+location by default. There are two workarounds for this situation:
+
+#### 1) Use a GCS bucket that the TPU worker can access
+
+The easiest solution is to instruct the `tensorflow_hub` library to read the
+models from TF Hub's GCS bucket as explained above. Users with their own GCS
+bucket can instead specify a directory in their bucket as the cache location
+with code like:
+
+```python
+import os
+os.environ["TFHUB_CACHE_DIR"] = "gs://my-bucket/tfhub-modules-cache"
+```
+
+...before calling the `tensorflow_hub` library.
+
+#### 2) Redirect all reads through the Colab host
+
+Another workaround is to redirect all reads (even of large variables) through
+the Colab host:
+
+```python
+load_options =
+tf.saved_model.LoadOptions(experimental_io_device='/job:localhost')
+reloaded_model = hub.load("https://tfhub.dev/...", options=load_options)
+```
+**Note:** See more information regarding valid handles [here](tf2_saved_model.md#model_handles).
diff --git a/site/en/hub/common_issues.md b/site/en/hub/common_issues.md
new file mode 100644
index 00000000000..03ba4a62a8e
--- /dev/null
+++ b/site/en/hub/common_issues.md
@@ -0,0 +1,148 @@
+
+# Common issues
+
+If your issue is not listed here, please search the
+[github issues](https://github.com/tensorflow/hub/issues) before filling a new
+one.
+
+**Note:** This documentation uses TFhub.dev URL handles in examples. See more
+information regarding other valid handle types [here](tf2_saved_model.md#model_handles).
+
+## TypeError: 'AutoTrackable' object is not callable
+
+```python
+# BAD: Raises error
+embed = hub.load('https://tfhub.dev/google/nnlm-en-dim128/1')
+embed(['my text', 'batch'])
+```
+
+This error frequently arises when loading models in TF1 Hub format with the
+`hub.load()` API in TF2. Adding the correct signature should fix this problem.
+See the [TF-Hub migration guide for TF2](migration_tf2.md) for more details on
+moving to TF2 and the use of models in TF1 Hub format in TF2.
+
+```python
+
+embed = hub.load('https://tfhub.dev/google/nnlm-en-dim128/1')
+embed.signatures['default'](['my text', 'batch'])
+```
+
+## Cannot download a module
+
+In the process of using a module from an URL there are many errors that can show
+up due to the network stack. Often this is a problem specific to the machine
+running the code and not an issue with the library. Here is a list of the common
+ones:
+
+*   **"EOF occurred in violation of protocol"** - This issue is likely to be
+    generated if the installed python version does not support the TLS
+    requirements of the server hosting the module. Notably, python 2.7.5 is
+    known to fail resolving modules from tfhub.dev domain. **FIX**: Please
+    update to a newer python version.
+
+*   **"cannot verify tfhub.dev's certificate"** - This issue is likely to be
+    generated if something on the network is trying to act as the dev gTLD.
+    Before .dev was used as a gTLD, developers and frameworks would sometimes
+    use .dev names to help testing code. **FIX:** Identify and reconfigure the
+    software that intercepts name resolution in the ".dev" domain.
+
+*   Failures to write to the cache directory `/tmp/tfhub_modules` (or similar):
+    see [Caching](caching.md) for what that is and how to change its location.
+
+If the above errors and fixes do not work, one can try to manually download a
+module by simulating the protocol of attaching `?tf-hub-format=compressed` to
+the URL to download a tar compressed file that has to be manually decompressed
+into a local file. The path to the local file can then be used instead of the
+URL. Here is a quick example:
+
+```bash
+# Create a folder for the TF hub module.
+$ mkdir /tmp/moduleA
+# Download the module, and uncompress it to the destination folder. You might want to do this manually.
+$ curl -L "https://tfhub.dev/google/universal-sentence-encoder/2?tf-hub-format=compressed" | tar -zxvC /tmp/moduleA
+# Test to make sure it works.
+$ python
+> import tensorflow_hub as hub
+> hub.Module("/tmp/moduleA")
+```
+
+## Running inference on a pre-initialized module
+
+If you are writing a Python program that applies a module many times on input
+data, you can apply the following recipes. (Note: For serving requests in
+production services, consider
+[TensorFlow Serving](https://www.tensorflow.org/tfx/guide/serving) or other
+scalable, Python-free solutions.)
+
+Assuming your use-case model is **initialization** and subsequent **requests**
+(for example Django, Flask, custom HTTP server, etc.), you can set-up the
+serving as follows:
+
+### TF2 SavedModels
+
+*   In the initialization part:
+    *   Load the TF2.0 model.
+
+```python
+import tensorflow_hub as hub
+
+embedding_fn = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")
+```
+
+*   In the request part:
+    *   Use the embedding function to run inference.
+
+```python
+embedding_fn(["Hello world"])
+```
+
+This call of a tf.function is optimized for performance, see
+[tf.function guide](https://www.tensorflow.org/guide/function).
+
+### TF1 Hub modules
+
+*   In the initialization part:
+    *   Build the graph with a **placeholder** - entry point into the graph.
+    *   Initialize the session.
+
+```python
+import tensorflow as tf
+import tensorflow_hub as hub
+
+# Create graph and finalize (finalizing optional but recommended).
+g = tf.Graph()
+with g.as_default():
+  # We will be feeding 1D tensors of text into the graph.
+  text_input = tf.placeholder(dtype=tf.string, shape=[None])
+  embed = hub.Module("https://tfhub.dev/google/universal-sentence-encoder/2")
+  embedded_text = embed(text_input)
+  init_op = tf.group([tf.global_variables_initializer(), tf.tables_initializer()])
+g.finalize()
+
+# Create session and initialize.
+session = tf.Session(graph=g)
+session.run(init_op)
+```
+
+*   In the request part:
+    *   Use the session to feed data into the graph through the placeholder.
+
+```python
+result = session.run(embedded_text, feed_dict={text_input: ["Hello world"]})
+```
+
+## Cannot change a model's dtype (e.g., float32 to bfloat16)
+
+TensorFlow's SavedModels (shared on TF Hub or otherwise) contain operations that
+work on fixed data types (often, float32 for the weights and intermediate
+activations of neural networks). These cannot be changed after the fact when
+loading the SavedModel (but model publishers can choose to publish different
+models with different data types).
+
+## Update a model version
+
+The documentation metadata of model versions can be updated. However, the
+version's assets (model files) are immutable. If you want to change the model
+assets, you can publish a newer version of the model. It's a good practice to
+extend the documentation with a change log that describes what changed between
+versions.
diff --git a/site/en/hub/common_saved_model_apis/images.md b/site/en/hub/common_saved_model_apis/images.md
new file mode 100644
index 00000000000..5413f0adc07
--- /dev/null
+++ b/site/en/hub/common_saved_model_apis/images.md
@@ -0,0 +1,155 @@
+
+# Common SavedModel APIs for Image Tasks
+
+This page describes how [TF2 SavedModels](../tf2_saved_model.md) for
+image-related tasks should implement the
+[Reusable SavedModel API](../reusable_saved_models.md). (This replaces the
+[Common Signatures for Images](../common_signatures/images.md) for the
+now-deprecated [TF1 Hub format](../tf1_hub_module).)
+
+<a name="feature-vector"></a>
+
+## Image Feature Vector
+
+### Usage summary
+
+An **image feature vector** is a dense 1-D tensor that represents a whole image,
+typically for use by a simple feed-forward classifier in the consumer model. (In
+terms of classic CNNs, this is the bottleneck value after the spatial extent has
+been pooled or flattened away, but before classification is done; for that, see
+[image classification](#classification) below.)
+
+A Reusable SavedModel for image feature extraction has a `__call__` method on
+the root object that maps a batch of images to a batch of feature vectors. It
+can be used like so:
+
+```python
+obj = hub.load("path/to/model")  # That's tf.saved_model.load() after download.
+images = ...  # A batch of images with shape [batch_size, height, width, 3].
+features = obj(images)   # A batch with shape [batch_size, num_features].
+```
+
+In Keras, the equivalent is
+
+```python
+features = hub.KerasLayer("path/to/model")(images)
+```
+
+The input follows the general convention for [input of images](#input). The
+model documentation specifies the permissible range for `height` and `width` of
+the input.
+
+The output is a single tensor of dtype `float32` and shape `[batch_size,
+num_features]`. The `batch_size` is the same as in the input. `num_features` is
+a module-specific constant independent of input size.
+
+### API details
+
+The [Reusable SavedModel API](../reusable_saved_models.md) also provides a list
+of `obj.variables` (e.g., for initialization when not loading eagerly).
+
+A model that supports fine-tuning provides a list of `obj.trainable_variables`.
+It may require you to pass `training=True` to execute in training mode (e.g.,
+for dropout). Some models allow optional arguments to override hyperparameters
+(e.g., dropout rate; to be described in model documentation). The model may also
+provide a list of `obj.regularization_losses`. For details, see the
+[Reusable SavedModel API](../reusable_saved_models.md).
+
+In Keras, this is taken care of by `hub.KerasLayer`: initialize it with
+`trainable=True` to enable fine-tuning, and (in the rare case that hparam
+overrides apply) with `arguments=dict(some_hparam=some_value, ...))`.
+
+### Notes
+
+Applying dropout to the output features (or not) should be left to the model
+consumer. The SavedModel itself should not perform dropout on the actual outputs
+(even if it uses dropout internally in other places).
+
+### Examples
+
+Reusable SavedModels for image feature vectors are used in
+
+*   the Colab tutorial
+    [Retraining an Image Classifier](https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_image_retraining.ipynb),
+
+<a name="classification"></a>
+
+## Image Classification
+
+### Usage summary
+
+**Image classification** maps the pixels of an image to linear scores (logits)
+for membership in the classes of a taxonomy _selected by the module publisher_.
+This allows model consumers to to draw conclusions from the particular
+classification learned by the publisher module. (For image classification with
+a new set of classes, it is common to reuse an
+[Image Feature Vector](#feature-vector) model with a new classifier instead.)
+
+A Reusable SavedModel for image classification has a `__call__` method on the
+root object that maps a batch of images to a batch of logits. It can be used
+like so:
+
+```python
+obj = hub.load("path/to/model")  # That's tf.saved_model.load() after download.
+images = ...  # A batch of images with shape [batch_size, height, width, 3].
+logits = obj(images)   # A batch with shape [batch_size, num_classes].
+```
+
+In Keras, the equivalent is
+
+```python
+logits = hub.KerasLayer("path/to/model")(images)
+```
+
+The input follows the general convention for [input of images](#input). The
+model documentation specifies the permissible range for `height` and `width` of
+the input.
+
+The output `logits` is a single tensor of dtype `float32` and shape
+`[batch_size, num_classes]`. The `batch_size` is the same as in the input.
+`num_classes` is the number of classes in the classification, which is a
+model-specific constant.
+
+The value `logits[i, c]` is a score predicting the membership of example `i` in
+the class with index `c`.
+
+It depends on the underlying classification whether these scores are meant to be
+used with softmax (for mutually exclusive classes), sigmoid (for orthogonal
+classes), or something else. The module documentation should describe this, and
+refer to a definition of the class indices.
+
+### API details
+
+The [Reusable SavedModel API](../reusable_saved_models.md) also provides a list
+of `obj.variables` (e.g., for initialization when not loading eagerly).
+
+A model that supports fine-tuning provides a list of `obj.trainable_variables`.
+It may require you to pass `training=True` to execute in training mode (e.g.,
+for dropout). Some models allow optional arguments to override hyperparameters
+(e.g., dropout rate; to be described in model documentation). The model may also
+provide a list of `obj.regularization_losses`. For details, see the
+[Reusable SavedModel API](../reusable_saved_models.md).
+
+In Keras, this is taken care of by `hub.KerasLayer`: initialize it with
+`trainable=True` to enable fine-tuning, and (in the rare case that hparam
+overrides apply) with `arguments=dict(some_hparam=some_value, ...))`.
+
+<a name="input"></a>
+
+## Image input
+
+This is common to all types of image models.
+
+A model that takes a batch of images as input accepts them as a dense 4-D tensor
+of dtype `float32` and shape `[batch_size, height, width, 3]` whose elements are
+RGB color values of pixels normalized to the range [0, 1]. This is what you get
+from `tf.image.decode_*()` followed by `tf.image.convert_image_dtype(...,
+tf.float32)`.
+
+The model accepts any `batch_size`. The model documentation specifies the
+permissible range for `height` and `width`. The last dimension is fixed to 3 RGB
+channels.
+
+It is recommended that models use the `channels_last` (or `NHWC`) layout of
+Tensors throughout, and leave it to TensorFlow's graph optimizer to rewrite to
+`channels_first` (or `NCHW`) if needed.
diff --git a/site/en/hub/common_saved_model_apis/index.md b/site/en/hub/common_saved_model_apis/index.md
new file mode 100644
index 00000000000..356505f9952
--- /dev/null
+++ b/site/en/hub/common_saved_model_apis/index.md
@@ -0,0 +1,46 @@
+
+# Common SavedModel APIs for TF Hub
+
+## Introduction
+
+[TensorFlow Hub](https://tfhub.dev) hosts models for a variety of tasks. Models
+for the same task are encouraged to implement a common API so that model
+consumers can easily exchange them without modifying the code that uses them,
+even if they come from different publishers.
+
+The goal is to make exchanging different models for the same task as simple as
+switching a string-valued hyperparameter. With that, model consumers can easily
+find the best one for their problem.
+
+This directory collects specifications of common APIs for models in the
+[TF2 SavedModel format](../tf2_saved_model.md). (It replaces the
+[Common Signatures](../common_signatures/index.md) for the now-deprecated
+[TF1 Hub format](../tf1_hub_module.md).)
+
+## Reusable SavedModel: the common foundation
+
+The [Reusable SavedModel API](../reusable_saved_models.md) defines general
+conventions how to load a SavedModel back into a Python program and reuse it as
+part of a bigger TensorFlow model.
+
+Basic usage:
+
+```python
+obj = hub.load("path/to/model")  # That's tf.saved_model.load() after download.
+outputs = obj(inputs, training=False)  # Invokes the tf.function obj.__call__.
+```
+
+Key point: This uses the object-based interface to restored SavedModels that was
+added in TensorFlow 2, not the SavedModel signatures for serving.
+
+For Keras users, the `hub.KerasLayer` class relies on this API to wrap the
+Reusable SavedModel as a Keras Layer (shielding Keras users from its details),
+with inputs and outputs according to the task-specific APIs listed below.
+
+## Task-specific APIs
+
+These refine the [Reusable SavedModel API](../reusable_saved_models.md) with
+conventions for particular ML tasks and types of data.
+
+*   [Image tasks](images.md)
+*   [Text tasks](text.md)
diff --git a/site/en/hub/common_saved_model_apis/text.md b/site/en/hub/common_saved_model_apis/text.md
new file mode 100644
index 00000000000..209319f27a9
--- /dev/null
+++ b/site/en/hub/common_saved_model_apis/text.md
@@ -0,0 +1,361 @@
+
+# Common SavedModel APIs for Text Tasks
+
+This page describes how [TF2 SavedModels](../tf2_saved_model.md) for
+text-related tasks should implement the
+[Reusable SavedModel API](../reusable_saved_models.md). (This replaces and
+extends the [Common Signatures for Text](../common_signatures/text.md) for the
+now-deprecated [TF1 Hub format](../tf1_hub_module).)
+
+## Overview
+
+There are several APIs to compute **text embeddings** (also known as dense
+representations of text, or text feature vectors).
+
+*   The API for *text embeddings from text inputs* is implemented by a
+    SavedModel that maps a batch of strings to a batch of embedding vectors.
+    This is very easy to use, and many models on TF Hub have implemented it.
+    However, this does not allow fine-tuning the model on TPU.
+
+*   The API for *text embeddings with preprocessed inputs* solves the same task,
+    but is implemented by two separate SavedModels:
+
+    *   a *preprocessor* that can run inside a tf.data input pipeline and
+        converts strings and other variable-length data into numeric Tensors,
+    *   an *encoder* that accepts the results of the preprocessor and performs
+        the trainable part of the embedding computation.
+
+    This split allows inputs to be preprocessed asynchronously before being fed
+    into the training loop. In particular, it allows building encoders that can
+    be run and fine-tuned on [TPU](https://www.tensorflow.org/guide/tpu).
+
+*   The API for *text embeddings with Transformer encoders* extends the API for
+    text embeddings from preprocessed inputs to the particular case of BERT and
+    other Transformer encoders.
+
+    *   The *preprocessor* is extended to build encoder inputs from more than
+        one segment of input text.
+    *   The *Transformer encoder* exposes the context-aware embeddings of
+        individual tokens.
+
+In each case, the text inputs are UTF-8 encoded strings, typically of plain
+text, unless the model documentation provides otherwise.
+
+Regardless of API, different models have been pre-trained on text from different
+languages and domains, and with different tasks in mind. Therefore, not every
+text embedding model is suitable for every problem.
+
+<a name="feature-vector"></a>
+<a name="text-embeddings-from-text"></a>
+
+## Text Embedding from Text Inputs
+
+A SavedModel for **text embeddings from text inputs** accepts a batch of inputs
+in a string Tensor of shape `[batch_size]` and maps them to a float32 Tensor of
+shape `[batch_size, dim]` with dense representations (feature vectors) of the
+inputs.
+
+### Usage synopsis
+
+```python
+obj = hub.load("path/to/model")
+text_input = ["A long sentence.",
+              "single-word",
+              "http://example.com"]
+embeddings = obj(text_input)
+```
+
+Recall from the [Reusable SavedModel API](../reusable_saved_models.md) that
+running the model in training mode (e.g., for dropout) may require a keyword
+argument `obj(..., training=True)`, and that `obj` provides attributes
+`.variables`, `.trainable_variables` and `.regularization_losses` as applicable.
+
+In Keras, all this is taken care of by
+
+```python
+embeddings = hub.KerasLayer("path/to/model", trainable=...)(text_input)
+```
+
+### Distributed training
+
+If the text embedding is used as part of a model that gets trained with a
+distribution strategy, the call to `hub.load("path/to/model")` or
+`hub.KerasLayer("path/to/model", ...)`, resp., must happen inside the
+DistributionStrategy scope in order to create the model's variables in the
+distributed way. For example
+
+```python
+  with strategy.scope():
+    ...
+    model = hub.load("path/to/model")
+    ...
+```
+
+### Examples
+
+*   Colab tutorial
+    [Text Classification with Movie Reviews](https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_text_classification.ipynb).
+
+<a name="text-embeddings-preprocessed"></a>
+
+## Text Embeddings with Preprocessed Inputs
+
+A **text embedding with preprocessed inputs** is implemented by two separate
+SavedModels:
+
+*   a **preprocessor** that maps a string Tensor of shape `[batch_size]` to a
+    dict of numeric Tensors,
+*   an **encoder** that accepts a dict of Tensors as returned by the
+    preprocessor, performs the trainable part of the embedding computation, and
+    returns a dict of outputs. The output under key `"default"` is a float32
+    Tensor of shape `[batch_size, dim]`.
+
+This allows to run the preprocessor in an input pipeline but fine-tune the
+embeddings computed by the encoder as part of a larger model. In particular, it
+allows to build encoders that can be run and fine-tuned on
+[TPU](https://www.tensorflow.org/guide/tpu).
+
+It is an implementation detail which Tensors are contained in the preprocessor's
+output, and which (if any) additional Tensors besides `"default"` are contained
+in the encoder's output.
+
+The documentation of the encoder must specify which preprocessor to use with it.
+Typically, there is exactly one correct choice.
+
+### Usage synopsis
+
+```python
+text_input = tf.constant(["A long sentence.",
+                          "single-word",
+                          "http://example.com"])
+preprocessor = hub.load("path/to/preprocessor")  # Must match `encoder`.
+encoder_inputs = preprocessor(text_input)
+
+encoder = hub.load("path/to/encoder")
+encoder_outputs = encoder(encoder_inputs)
+embeddings = encoder_outputs["default"]
+```
+
+Recall from the [Reusable SavedModel API](../reusable_saved_models.md) that
+running the encoder in training mode (e.g., for dropout) may require a keyword
+argument `encoder(..., training=True)`, and that `encoder` provides attributes
+`.variables`, `.trainable_variables` and `.regularization_losses` as applicable.
+
+The `preprocessor` model may have `.variables` but is not meant to be trained
+further. Preprocessing is not mode-dependent: if `preprocessor()` has a
+`training=...` argument at all, it has no effect.
+
+In Keras, all this is taken care of by
+
+```python
+encoder_inputs = hub.KerasLayer("path/to/preprocessor")(text_input)
+encoder_outputs = hub.KerasLayer("path/to/encoder", trainable=True)(encoder_inputs)
+embeddings = encoder_outputs["default"]
+```
+
+### Distributed training
+
+If the encoder is used as part of a model that gets trained with a distribution
+strategy, the call to `hub.load("path/to/encoder")` or
+`hub.KerasLayer("path/to/encoder", ...)`, resp., must happen inside
+
+```python
+  with strategy.scope():
+    ...
+```
+
+in order to re-create the encoder variables in the distributed way.
+
+Likewise, if the preprocessor is part of the trained model (as in the simple
+example above), it also needs to be loaded under the distribution strategy
+scope. If, however, the preprocessor is used in an input pipeline (e.g., in a
+callable passed to `tf.data.Dataset.map()`), its loading must happen *outside*
+the distribution strategy scope, in order to place its variables (if any) on the
+host CPU.
+
+### Examples
+
+*   Colab tutorial
+    [Classify text with BERT](https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/text/classify_text_with_bert.ipynb).
+
+<a name="transformer-encoders"></a>
+
+## Text embeddings with Transformer Encoders
+
+Transformer encoders for text operate on a batch of input sequences, each
+sequence comprising *n* ≥ 1 segments of tokenized text, within some
+model-specific bound on *n*. For BERT and many of its extensions, that bound is
+2, so they accept single segments and segment pairs.
+
+The API for **text embeddings with Transformer encoders** extends the API for
+text embeddings with preprocessed inputs to this setting.
+
+### Preprocessor
+
+A preprocessor SavedModel for text embeddings with Transformer encoders
+implements the API of a preprocessor SavedModel for text embeddings with
+preprocessed inputs (see above), which provides a way to map single-segment text
+inputs directly to encoder inputs.
+
+In addition, the preprocessor SavedModel provides callable subobjects `tokenize`
+for tokenization (separately per segment) and `bert_pack_inputs` for packing *n*
+tokenized segments into one input sequence for the encoder. Each subobject
+follows the [Reusable SavedModel API](../reusable_saved_models.md).
+
+#### Usage synopsis
+
+As a concrete example for two segments of text, let us look at a sentence
+entailment task that asks whether a premise (first segment) does or does not
+imply a hypothesis (second segment).
+
+```python
+preprocessor = hub.load("path/to/preprocessor")
+
+# Tokenize batches of both text inputs.
+text_premises = tf.constant(["The quick brown fox jumped over the lazy dog.",
+                             "Good day."])
+tokenized_premises = preprocessor.tokenize(text_premises)
+text_hypotheses = tf.constant(["The dog was lazy.",  # Implied.
+                               "Axe handle!"])       # Not implied.
+tokenized_hypotheses = preprocessor.tokenize(text_hypotheses)
+
+# Pack input sequences for the Transformer encoder.
+seq_length = 128
+encoder_inputs = preprocessor.bert_pack_inputs(
+    [tokenized_premises, tokenized_hypotheses],
+    seq_length=seq_length)  # Optional argument.
+```
+
+In Keras, this computation can be expressed as
+
+```python
+tokenize = hub.KerasLayer(preprocessor.tokenize)
+tokenized_hypotheses = tokenize(text_hypotheses)
+tokenized_premises = tokenize(text_premises)
+
+bert_pack_inputs = hub.KerasLayer(
+    preprocessor.bert_pack_inputs,
+    arguments=dict(seq_length=seq_length))  # Optional argument.
+encoder_inputs = bert_pack_inputs([tokenized_premises, tokenized_hypotheses])
+```
+
+#### Details of `tokenize`
+
+A call to `preprocessor.tokenize()` accepts a string Tensor of shape
+`[batch_size]` and returns a
+[RaggedTensor](https://www.tensorflow.org/guide/ragged_tensor) of shape
+`[batch_size, ...]` whose values are int32 token ids representing the input
+strings. There can be *r* ≥ 1 ragged dimensions after `batch_size` but no other
+uniform dimension.
+
+*   If *r*=1, the shape is `[batch_size, (tokens)]`, and each input is simply
+    tokenized into a flat sequence of tokens.
+*   If *r*>1, there are *r*-1 additional levels of grouping. For example,
+    [tensorflow_text.BertTokenizer](https://github.com/tensorflow/text/blob/v2.3.0/tensorflow_text/python/ops/bert_tokenizer.py#L138)
+    uses *r*=2 to group tokens by words and yields shape `[batch_size, (words),
+    (tokens_per_word)]`. It is up to the model at hand how many of these extra
+    level(s) exist, if any, and what groupings they represent.
+
+The user can (but need not) modify tokenized inputs, e.g., to accommodate the
+seq_length limit that will be enforced in packing encoder inputs. Extra
+dimensions in the tokenizer output can help here (e.g., to respect word
+boundaries) but become meaningless in the next step.
+
+In terms of the [Reusable SavedModel API](../reusable_saved_models.md), the
+`preprocessor.tokenize` object may have `.variables` but is not meant to be
+trained further. Tokenization is not mode-dependent: if
+`preprocessor.tokenize()` has a `training=...` argument at all, it has no
+effect.
+
+#### Details of `bert_pack_inputs`
+
+A call to `preprocessor.bert_pack_inputs()` accepts a Python list of tokenized
+inputs (batched separately for each input segment) and returns a dict of Tensors
+representing a batch of fixed-length input sequences for the Transformer encoder
+model.
+
+Each tokenized input is an int32 RaggedTensor of shape `[batch_size, ...]`,
+where the number *r* of ragged dimensions after batch_size is either 1 or the
+same as in the output of `preprocessor.tokenize().` (The latter is for
+convenience only; the extra dimensions are flattened out before packing.)
+
+Packing adds special tokens around the input segments as expected by the
+encoder. The `bert_pack_inputs()` call implements exactly the packing scheme
+used by the original BERT models and many of their extensions: the packed
+sequence starts with one start-of-sequence token, followed by the tokenized
+segments, each terminated by one end-of-segment token. Remaining positions up to
+seq_length, if any, are filled up with padding tokens.
+
+If a packed sequence would exceed seq_length, `bert_pack_inputs()` truncates its
+segments to prefixes of approximately equal sizes so that the packed sequence
+fits exactly within seq_length.
+
+Packing is not mode-dependent: if `preprocessor.bert_pack_inputs()` has a
+`training=...` argument at all, it has no effect. Also,
+`preprocessor.bert_pack_inputs` is not expected to have variables, or support
+fine-tuning.
+
+### Encoder
+
+The encoder is called on the dict of `encoder_inputs` in the same way as in the
+API for text embeddings with preprocessed inputs (see above), including the
+provisions from the [Reusable SavedModel API](../reusable_saved_models.md).
+
+#### Usage synopsis
+
+```python
+encoder = hub.load("path/to/encoder")
+encoder_outputs = encoder(encoder_inputs)
+```
+
+or equivalently in Keras:
+
+```python
+encoder = hub.KerasLayer("path/to/encoder", trainable=True)
+encoder_outputs = encoder(encoder_inputs)
+```
+
+#### Details
+
+The `encoder_outputs` are a dict of Tensors with the following keys.
+<!-- TODO(b/172561269): More guidance for models trained without poolers. -->
+
+*   `"sequence_output"`: a float32 Tensor of shape `[batch_size, seq_length,
+    dim]` with the context-aware embedding of each token of every packed input
+    sequence.
+*   `"pooled_output"`: a float32 Tensor of shape `[batch_size, dim]` with the
+    embedding of each input sequence as a whole, derived from sequence_output in
+    some trainable manner.
+*   `"default"`, as required by the API for text embeddings with preprocessed
+    inputs: a float32 Tensor of shape `[batch_size, dim]` with the embedding of
+    each input sequence. (This might be just an alias of pooled_output.)
+
+The contents of the `encoder_inputs` are not strictly required by this API
+definition. However, for encoders that use BERT-style inputs, it is recommended
+to use the following names (from the
+[NLP Modeling Toolkit of TensorFlow Model Garden](https://github.com/tensorflow/models/tree/master/official/nlp))
+to minimize friction in interchanging encoders and reusing preprocessor models:
+
+*   `"input_word_ids"`: an int32 Tensor of shape `[batch_size, seq_length]` with
+    the token ids of the packed input sequence (that is, including a
+    start-of-sequence token, end-of-segment tokens, and padding).
+*   `"input_mask"`: an int32 Tensor of shape `[batch_size, seq_length]` with
+    value 1 at the position of all input tokens present before padding and value
+    0 for the padding tokens.
+*   `"input_type_ids"`: an int32 Tensor of shape `[batch_size, seq_length]` with
+    the index of the input segment that gave rise to the input token at the
+    respective position. The first input segment (index 0) includes the
+    start-of-sequence token and its end-of-segment token. The second and later
+    segments (if present) include their respective end-of-segment token. Padding
+    tokens get index 0 again.
+
+### Distributed training
+
+For loading the preprocessor and encoder objects inside or outside a
+distribution strategy scope, the same rules apply as in the API for text
+embeddings with preprocessed inputs (see above).
+
+### Examples
+
+*   Colab tutorial
+    [Solve GLUE tasks using BERT on TPU](https://colab.research.google.com/github/tensorflow/text/blob/master/docs/tutorials/bert_glue.ipynb).
diff --git a/site/en/hub/common_signatures/images.md b/site/en/hub/common_signatures/images.md
new file mode 100644
index 00000000000..5e41c3e2960
--- /dev/null
+++ b/site/en/hub/common_signatures/images.md
@@ -0,0 +1,155 @@
+
+# Common Signatures for Images
+
+This page describes common signatures that should be implemented by modules in
+the [TF1 Hub format](../tf1_hub_module.md) for image-related tasks. (For the
+[TF2 SavedModel format](../tf2_saved_model.md), see the analogous
+[SavedModel API](../common_saved_model_apis/images.md).)
+
+Some modules can be used for more than one task (e.g., image classification
+modules tend to do some feature extraction on the way). Therefore, each module
+provides (1) named signatures for all the tasks anticipated by the publisher,
+and (2) a default signature `output = m(images)` for its designated primary
+task.
+
+<a name="feature-vector"></a>
+## Image Feature Vector
+
+### Usage summary
+
+An **image feature vector** is a dense 1-D tensor that represents a whole image,
+typically for classification by the consumer model. (Unlike the intermediate
+activations of CNNs, it does not offer a spatial breakdown. Unlike [image
+classification](#classification), it discards the classification learned
+by the publisher model.)
+
+A module for image feature extraction has a default signature that maps a batch
+of images to a batch of feature vectors. It can be used like so:
+
+```python
+  module_spec = hub.load_module_spec("path/to/module")
+  height, width = hub.get_expected_image_size(module_spec)
+  images = ...  # A batch of images with shape [batch_size, height, width, 3].
+  module = hub.Module(module_spec)
+  features = module(images)   # A batch with shape [batch_size, num_features].
+```
+
+It also defines the corresponding named signature.
+
+### Signature specification
+
+The named signature for extracting image feature vectors is invoked as
+
+```python
+  outputs = module(dict(images=images), signature="image_feature_vector",
+                   as_dict=True)
+  features = outputs["default"]
+```
+
+The input follows the general convention for
+[input of images](#input).
+
+The outputs dictionary contains a `"default"` output of dtype `float32` and
+shape `[batch_size, num_features]`. The `batch_size` is the same as in the
+input, but not known at graph construction time. `num_features` is a known,
+module-specific constant independent of input size.
+
+These feature vectors are meant to be usable for classification with a simple
+feed-forward classifier (like the pooled features from the topmost convolutional
+layer in a typical CNN for image classification).
+
+Applying dropout to the output features (or not) should be left to the module
+consumer. The module itself should not perform dropout on the actual outputs
+(even if it uses dropout internally in other places).
+
+The outputs dictionary may provide further outputs, for example, the activations
+of hidden layers inside the module. Their keys and values are module-dependent.
+It is recommended to prefix architecture-dependent keys with an architecture
+name (e.g., to avoid confusing the intermediate layer `"InceptionV3/Mixed_5c"`
+with the topmost convolutional layer `"InceptionV2/Mixed_5c"`).
+
+<a name="classification"></a>
+## Image Classification
+
+### Usage summary
+
+**Image classification** maps the pixels of an image to linear scores (logits)
+for membership in the classes of a taxonomy _selected by the module publisher_.
+This allows consumers to draw conclusions from the particular classification
+learned by the publisher module, and not just its underlying features (cf.
+[Image Feature Vector](#feature-vector)).
+
+A module for image feature extraction has a default signature that maps a batch
+of images to a batch of logits. It can be used like so:
+
+```python
+  module_spec = hub.load_module_spec("path/to/module")
+  height, width = hub.get_expected_image_size(module_spec)
+  images = ...  # A batch of images with shape [batch_size, height, width, 3].
+  module = hub.Module(module_spec)
+  logits = module(images)   # A batch with shape [batch_size, num_classes].
+```
+
+It also defines the corresponding named signature.
+
+### Signature specification
+
+The named signature for extracting image feature vectors is invoked as
+
+```python
+  outputs = module(dict(images=images), signature="image_classification",
+                   as_dict=True)
+  logits = outputs["default"]
+```
+
+The input follows the general convention for
+[input of images](#input).
+
+The outputs dictionary contains a `"default"` output of dtype `float32` and
+shape `[batch_size, num_classes]`. The `batch_size` is the same as in the input,
+but not known at graph construction time. `num_classes` is the number of classes
+in the classification, which is a known constant independent of input size.
+
+Evaluating `outputs["default"][i, c]` yields a score predicting the membership
+of example `i` in the class with index `c`.
+
+It depends on the underlying classification whether these scores are meant to be
+used with softmax (for mutually exclusive classes), sigmoid (for orthogonal
+classes), or something else. The module documentation should describe this,
+and refer to a definition of the class indices.
+
+The outputs dictionary may provide further outputs, for example, the activations
+of hidden layers inside the module. Their keys and values are module-dependent.
+It is recommended to prefix architecture-dependent keys with an architecture
+name (e.g., to avoid confusing the intermediate layer `"InceptionV3/Mixed_5c"`
+with the topmost convolutional layer `"InceptionV2/Mixed_5c"`).
+
+<a name="input"></a>
+## Image input
+
+This is common to all types of image modules and image signatures.
+
+A signature that takes a batch of images as input accepts them as a dense 4-D
+tensor of dtype `float32` and shape `[batch_size, height, width, 3]` whose
+elements are RGB color values of pixels normalized to the range [0, 1]. This is
+what you get from `tf.image.decode_*()` followed by
+`tf.image.convert_image_dtype(..., tf.float32)`.
+
+A module with exactly one (or one principal) input of images uses the name
+`"images"` for this input.
+
+The module accepts any `batch_size`, and correspondingly sets the first
+dimension of TensorInfo.tensor_shape to "unknown". The last dimension is fixed
+to the number `3` of RGB channels. The `height` and `width` dimensions are
+fixed to the expected size of input images. (Future work may remove that
+restriction for fully convolutional modules.)
+
+Consumers of the module should not inspect the shape directly, but obtain
+the size information by calling hub.get_expected_image_size()
+on the module or module spec, and are expected to resize input images
+accordingly (typically before/during batching).
+
+For simplicity, TF-Hub modules use the `channels_last`
+(or `NHWC`) layout of Tensors, and leave it to TensorFlow's graph optimizer
+to rewrite to `channels_first` (or `NCHW`) if needed. It has been doing that
+by default since TensorFlow version 1.7.
diff --git a/site/en/hub/common_signatures/index.md b/site/en/hub/common_signatures/index.md
new file mode 100644
index 00000000000..05eacc8b37f
--- /dev/null
+++ b/site/en/hub/common_signatures/index.md
@@ -0,0 +1,25 @@
+
+# Common Signatures for TF Hub Modules
+
+## Introduction
+
+[TensorFlow Hub](https://tfhub.dev) hosts models for a variety of tasks. Models
+for the same task are encouraged to implement a common API so that model
+consumers can easily exchange them without modifying the code that uses them,
+even if they come from different publishers.
+
+The goal is to make exchanging different models for the same task as simple as
+switching a string-valued hyperparameter. With that, model consumers can easily
+find the best one for their problem.
+
+This directory collects specifications of common signatures for modules in the
+[TF1 Hub format](../tf1_hub_module.md).
+
+Note that the TF1 Hub format has been **deprecated** in favor of the
+[TF2 SavedModel format](../tf2_saved_model.md) and its
+[Common SavedModel APIs](../common_saved_model_apis/index.md).
+
+## Signatures
+
+*   [Image Signatures](images.md)
+*   [Text Signatures](text.md)
diff --git a/site/en/hub/common_signatures/text.md b/site/en/hub/common_signatures/text.md
new file mode 100644
index 00000000000..3ea8f27c91d
--- /dev/null
+++ b/site/en/hub/common_signatures/text.md
@@ -0,0 +1,46 @@
+
+# Common Signatures for Text
+
+This page describes common signatures that should be implemented by modules in
+the [TF1 Hub format](../tf1_hub_module.md) for tasks that accept text inputs.
+(For the [TF2 SavedModel format](../tf2_saved_model.md), see the analogous
+[SavedModel API](../common_saved_model_apis/text.md).)
+
+## Text feature vector
+
+A **text feature vector** module creates a dense vector representation
+from text features.
+It accepts a batch of strings of shape `[batch_size]` and maps them to
+a `float32` tensor of shape `[batch_size, N]`. This is often called
+**text embedding** in dimension `N`.
+
+### Basic usage
+
+```python
+  embed = hub.Module("path/to/module")
+  representations = embed([
+      "A long sentence.",
+      "single-word",
+      "http://example.com"])
+```
+
+### Feature column usage
+
+```python
+    feature_columns = [
+      hub.text_embedding_column("comment", "path/to/module", trainable=False),
+    ]
+    input_fn = tf.estimator.inputs.numpy_input_fn(features, labels, shuffle=True)
+    estimator = tf.estimator.DNNClassifier(hidden_units, feature_columns)
+    estimator.train(input_fn, max_steps=100)
+```
+
+## Notes
+
+Modules have been pre-trained on different domains and/or tasks,
+and therefore not every text feature vector module would be suitable for
+your problem. E.g.: some modules could have been trained on a single language.
+
+This interface does not allow fine-tuning of the text representation on TPUs,
+because it requires the module to instantiate both string processing and the
+trainable variables at the same time.
diff --git a/site/en/hub/community.md b/site/en/hub/community.md
new file mode 100644
index 00000000000..a7a4c2bf0ec
--- /dev/null
+++ b/site/en/hub/community.md
@@ -0,0 +1,6 @@
+
+# Community and support
+
+*   The source code is available on [GitHub](https://github.com/tensorflow/hub).
+    We use [GitHub issues](https://github.com/tensorflow/hub/issues) for
+    tracking feature requests and bugs.
\ No newline at end of file
diff --git a/site/en/hub/contribute.md b/site/en/hub/contribute.md
new file mode 100644
index 00000000000..e537f79f766
--- /dev/null
+++ b/site/en/hub/contribute.md
@@ -0,0 +1,16 @@
+
+# Contribute
+
+To learn more about how to publish a model or model collection on
+[tfhub.dev](https://tfhub.dev/), see the [becoming_a_publisher](publish.md)
+guide.
+
+You can find more information of how to contribute to the
+[TensorFlow Hub library](https://github.com/tensorflow/hub) in our
+[GitHub contributing guide](https://github.com/tensorflow/hub/blob/master/CONTRIBUTING.md).
+
+Content published to tfhub.dev can be automatically mirrored to other model
+hubs, provided it follows a specified format and is permitted by our Terms
+(https://tfhub.dev/terms). See [our publishing documentation](publish.md) for
+more details, and [our contribution documentation](contribute_a_model.md) if
+you'd like to opt-out of mirroring.
diff --git a/site/en/hub/hosting.md b/site/en/hub/hosting.md
new file mode 100644
index 00000000000..ce2ce76b0a6
--- /dev/null
+++ b/site/en/hub/hosting.md
@@ -0,0 +1,175 @@
+
+# Model hosting protocol
+
+This document describes the URL conventions used when hosting all model types on
+[tfhub.dev](https://tfhub.dev) - TFJS, TF Lite and TensorFlow models. It also
+describes the HTTP(S)-based protocol implemented by the `tensorflow_hub` library
+in order to load TensorFlow models from [tfhub.dev](https://tfhub.dev) and
+compatible services into TensorFlow programs.
+
+Its key feature is to use the same URL in code to load a model and in a browser
+to view the model documentation.
+
+## General URL conventions
+
+[tfhub.dev](https://tfhub.dev) supports the following URL formats:
+
+*   TF Hub publishers follow `https://tfhub.dev/<publisher>`
+*   TF Hub collections follow
+    `https://tfhub.dev/<publisher>/collection/<collection_name>`
+*   TF Hub models have versioned url
+    `https://tfhub.dev/<publisher>/<model_name>/<version>` and unversioned url
+    `https://tfhub.dev/<publisher>/<model_name>` that resolves to the latest
+    version of the model.
+
+TF Hub models can be downloaded as compressed assets by appending URL parameters
+to the [tfhub.dev](https://tfhub.dev) model URL. However, the URL parameters
+required to achieve that depend on the model type:
+
+*   TensorFlow models (both SavedModel and TF1 Hub formats): append
+    `?tf-hub-format=compressed` to the TensorFlow model url.
+*   TFJS models: append `?tfjs-format=compressed` to the TFJS model url to
+    download the compressed or `/model.json?tfjs-format=file` to read if from
+    remote storage.
+*   TF lite models: append `?lite-format=tflite` to the TF Lite model url.
+
+For example:
+
+<table style="width: 100%;">
+  <tr style="text-align: center">
+    <col style="width: 10%" />
+    <col style="width: 20%" />
+    <col style="width: 15%" />
+    <col style="width: 30%" />
+    <col style="width: 25%" />
+    <td style="text-align: center; background-color: #D0D0D0">Type</td>
+    <td style="text-align: center; background-color: #D0D0D0">Model URL</td>
+    <td style="text-align: center; background-color: #D0D0D0">Download type</td>
+    <td style="text-align: center; background-color: #D0D0D0">URL param</td>
+    <td style="text-align: center; background-color: #D0D0D0">Download URL</td>
+  </tr>
+  <tr>
+    <td>TensorFlow (SavedModel, TF1 Hub format)</td>
+    <td>https://tfhub.dev/google/spice/2</td>
+    <td>.tar.gz</td>
+    <td>?tf-hub-format=compressed </td>
+    <td>https://tfhub.dev/google/spice/2?tf-hub-format=compressed</td>
+  </tr>
+  <tr>
+    <td>TF Lite</td>
+    <td>https://tfhub.dev/google/lite-model/spice/1</td>
+    <td>.tflite</td>
+    <td>?lite-format=tflite</td>
+    <td>https://tfhub.dev/google/lite-model/spice/1?lite-format=tflite</td>
+  </tr>
+  <tr>
+    <td>TF.js</td>
+    <td>https://tfhub.dev/google/tfjs-model/spice/2/default/1</td>
+    <td>.tar.gz</td>
+    <td>?tfjs-format=compressed</td>
+    <td>https://tfhub.dev/google/tfjs-model/spice/2/default/1?tfjs-format=compressed</td>
+  </tr>
+</table>
+
+Additionally, some models also are hosted in a format that can be read directly
+from remote storage without being downloaded. This is especially useful if there
+is no local storage available, such as running a TF.js model in the browser or
+loading a SavedModel on [Colab](https://colab.research.google.com/). Be
+conscious that reading models that are hosted remotely without being downloaded
+locally may increase latency.
+
+<table style="width: 100%;">
+  <tr style="text-align: center">
+    <col style="width: 10%" />
+    <col style="width: 20%" />
+    <col style="width: 15%" />
+    <col style="width: 30%" />
+    <col style="width: 25%" />
+    <td style="text-align: center; background-color: #D0D0D0">Type</td>
+    <td style="text-align: center; background-color: #D0D0D0">Model URL</td>
+    <td style="text-align: center; background-color: #D0D0D0">Response type</td>
+    <td style="text-align: center; background-color: #D0D0D0">URL param</td>
+    <td style="text-align: center; background-color: #D0D0D0">Request URL</td>
+  </tr>
+  <tr>
+    <td>TensorFlow (SavedModel, TF1 Hub format)</td>
+    <td>https://tfhub.dev/google/spice/2</td>
+    <td>String (Path to GCS folder where the uncompressed model is stored)</td>
+    <td>?tf-hub-format=uncompressed</td>
+    <td>https://tfhub.dev/google/spice/2?tf-hub-format=uncompressed</td>
+  </tr>
+  <tr>
+    <td>TF.js</td>
+    <td>https://tfhub.dev/google/tfjs-model/spice/2/default/1</td>
+    <td>.json</td>
+    <td>?tfjs-format=file</td>
+    <td>https://tfhub.dev/google/tfjs-model/spice/2/default/1/model.json?tfjs-format=file</td>
+  </tr>
+</table>
+
+## tensorflow_hub library protocol
+
+This section describes how we host models on [tfhub.dev](https://tfhub.dev) for
+use with the tensorflow_hub library. If you want to host your own model
+repository to work with the tensorflow_hub library, your HTTP(s) distribution
+service should provide an implementation of this protocol.
+
+Note that this section does not address hosting TF Lite and TFJS models since
+they are not downloaded via the `tensorflow_hub` library. For more information
+on hosting these model types, please check [above](#general-url-conventions).
+
+### Compressed Hosting
+
+Models are stored on [tfhub.dev](https://tfhub.dev) as compressed tar.gz files.
+By default, the tensorflow_hub library automatically downloads the compressed
+model. They can also be manually downloaded by appending the
+`?tf-hub-format=compressed` to the model url, for example:
+
+```shell
+wget https://tfhub.dev/tensorflow/albert_en_xxlarge/1?tf-hub-format=compressed
+```
+
+The root of the archive is the root of the model directory and should contain a
+SavedModel, as in this example:
+
+```shell
+# Create a compressed model from a SavedModel directory.
+$ tar -cz -f model.tar.gz --owner=0 --group=0 -C /tmp/export-model/ .
+
+# Inspect files inside a compressed model
+$ tar -tf model.tar.gz
+./
+./variables/
+./variables/variables.data-00000-of-00001
+./variables/variables.index
+./assets/
+./saved_model.pb
+```
+
+Tarballs for use with the legacy
+[TF1 Hub format](https://www.tensorflow.org/hub/tf1_hub_module) will also
+contain a `./tfhub_module.pb` file.
+
+When one of `tensorflow_hub` library model loading APIs is invoked
+([hub.KerasLayer](https://www.tensorflow.org/hub/api_docs/python/hub/KerasLayer),
+[hub.load](https://www.tensorflow.org/hub/api_docs/python/hub/load), etc) the
+library downloads the model, uncompresses the model and caches it locally. The
+`tensorflow_hub` library expects that model URLs are versioned and that the
+model content of a given version is immutable, so that it can be cached
+indefinitely. Learn more about [caching models](caching.md).
+
+![](https://raw.githubusercontent.com/tensorflow/hub/master/docs/images/library_download_cache.png)
+
+### Uncompressed Hosting
+
+When the environment variable `TFHUB_MODEL_LOAD_FORMAT` or the command-line flag
+`--tfhub_model_load_format` is set to `UNCOMPRESSED`, the model is read directly
+from remote storage (GCS) instead of being downloaded and uncompressed locally.
+When this behavior is enabled the library appends `?tf-hub-format=uncompressed`
+to the model URL. That request returns the path to the folder on GCS that
+contains the uncompressed model files. As an example, \
+`https://tfhub.dev/google/spice/2?tf-hub-format=uncompressed` \
+returns \
+`gs://kaggle-tfhub-models-uncompressed/tfhub-modules/google/spice/2/uncompressed`
+in the body of the 303 response. The library then reads the model from that GCS
+destination.
diff --git a/site/en/hub/images/action_recognition.gif b/site/en/hub/images/action_recognition.gif
new file mode 100644
index 00000000000..a58c22ac8c3
Binary files /dev/null and b/site/en/hub/images/action_recognition.gif differ
diff --git a/site/en/hub/images/bert.png b/site/en/hub/images/bert.png
new file mode 100644
index 00000000000..e36f69c9c7b
Binary files /dev/null and b/site/en/hub/images/bert.png differ
diff --git a/site/en/hub/images/bert_preprocess.png b/site/en/hub/images/bert_preprocess.png
new file mode 100644
index 00000000000..18b3b435d1b
Binary files /dev/null and b/site/en/hub/images/bert_preprocess.png differ
diff --git a/site/en/hub/images/bert_preprocess_wide.png b/site/en/hub/images/bert_preprocess_wide.png
new file mode 100644
index 00000000000..b414196724e
Binary files /dev/null and b/site/en/hub/images/bert_preprocess_wide.png differ
diff --git a/site/en/hub/images/bit_blog.jpg b/site/en/hub/images/bit_blog.jpg
new file mode 100644
index 00000000000..260415bf0b1
Binary files /dev/null and b/site/en/hub/images/bit_blog.jpg differ
diff --git a/site/en/hub/images/boundless.png b/site/en/hub/images/boundless.png
new file mode 100644
index 00000000000..ccc52d17f84
Binary files /dev/null and b/site/en/hub/images/boundless.png differ
diff --git a/site/en/hub/images/colab_logo.svg b/site/en/hub/images/colab_logo.svg
new file mode 100644
index 00000000000..d03f1106221
--- /dev/null
+++ b/site/en/hub/images/colab_logo.svg
@@ -0,0 +1 @@
+<svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 800 800"><defs><style>.cls-1{fill:#f9ab00;}.cls-2{fill:#e8710a;}</style></defs><path class="cls-1" d="M548,203.5c-109.08,0-197.5,88.42-197.5,197.5S438.92,598.5,548,598.5,745.5,510.08,745.5,401,657.08,203.5,548,203.5ZM548,504A103,103,0,1,1,651,401,103,103,0,0,1,548,504Z"/><path class="cls-1" d="M324.78,473.88a103,103,0,1,1,3.38-142.23l51.67-81.11A196.69,196.69,0,0,0,252.5,204C143.42,204,55,292.42,55,401.5S143.42,599,252.5,599a196.72,196.72,0,0,0,128.07-47.16Z"/><path class="cls-2" d="M194.63,486.55a103,103,0,0,1,2.9-173L129.9,246.66a197.5,197.5,0,0,0-2.37,307.77Z"/><path class="cls-2" d="M548,598.5c109.08,0,197.5-88.42,197.5-197.5a196.85,196.85,0,0,0-56.34-138.11l-66.62,67A103,103,0,0,1,477.36,476l-66.63,67A196.8,196.8,0,0,0,548,598.5Z"/></svg>
diff --git a/site/en/hub/images/food.png b/site/en/hub/images/food.png
new file mode 100644
index 00000000000..41865be3984
Binary files /dev/null and b/site/en/hub/images/food.png differ
diff --git a/site/en/hub/images/gan_faces.gif b/site/en/hub/images/gan_faces.gif
new file mode 100644
index 00000000000..a34b8d517f4
Binary files /dev/null and b/site/en/hub/images/gan_faces.gif differ
diff --git a/site/en/hub/images/github_icon.svg b/site/en/hub/images/github_icon.svg
new file mode 100644
index 00000000000..0a607bb98b3
--- /dev/null
+++ b/site/en/hub/images/github_icon.svg
@@ -0,0 +1 @@
+<svg width='24' height='24' viewBox='0 0 22 22' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink'><defs><path d='M12 1C5.924 1 1 5.925 1 12c0 4.86 3.151 8.982 7.523 10.438.55.1.751-.24.751-.531 0-.26-.01-.953-.015-1.87-3.06.664-3.705-1.475-3.705-1.475-.5-1.271-1.222-1.609-1.222-1.609-.998-.683.076-.67.076-.67 1.104.08 1.685 1.135 1.685 1.135.981 1.68 2.574 1.195 3.201.914.1-.711.384-1.196.698-1.47-2.442-.278-5.01-1.222-5.01-5.437 0-1.2.429-2.183 1.132-2.952-.113-.278-.49-1.396.108-2.91 0 0 .924-.297 3.025 1.127a10.536 10.536 0 0 1 2.754-.371c.934.005 1.875.126 2.753.37 2.1-1.423 3.022-1.127 3.022-1.127.6 1.515.223 2.633.11 2.911.705.77 1.13 1.751 1.13 2.952 0 4.225-2.571 5.155-5.022 5.427.395.34.747 1.011.747 2.038 0 1.47-.014 2.656-.014 3.017 0 .294.198.637.757.53C19.85 20.977 23 16.858 23 12 23 5.925 18.075 1 12 1' id='a'/></defs><g fill='none' fill-rule='evenodd'><g transform='translate(-1 -1)'><mask id='b' fill='#5f6368'><use xlink:href='#a'/></mask><use fill='#5f6368' xlink:href='#a'/><path fill='#5f6368' fill-rule='nonzero' mask='url(#b)' d='M0 0h24v24H0z'/></g><path d='M-1-1h24v24H-1z'/></g></svg>
diff --git a/site/en/hub/images/guide_basics.png b/site/en/hub/images/guide_basics.png
new file mode 100644
index 00000000000..e6aee34f516
Binary files /dev/null and b/site/en/hub/images/guide_basics.png differ
diff --git a/site/en/hub/images/image_classification.png b/site/en/hub/images/image_classification.png
new file mode 100644
index 00000000000..a3840e3482c
Binary files /dev/null and b/site/en/hub/images/image_classification.png differ
diff --git a/site/en/hub/images/interpolation.png b/site/en/hub/images/interpolation.png
new file mode 100644
index 00000000000..d2f062da7c1
Binary files /dev/null and b/site/en/hub/images/interpolation.png differ
diff --git a/site/en/hub/images/library_download_cache.png b/site/en/hub/images/library_download_cache.png
new file mode 100644
index 00000000000..1b581a4a819
Binary files /dev/null and b/site/en/hub/images/library_download_cache.png differ
diff --git a/site/en/hub/images/object_detection.png b/site/en/hub/images/object_detection.png
new file mode 100644
index 00000000000..57b327099ae
Binary files /dev/null and b/site/en/hub/images/object_detection.png differ
diff --git a/site/en/hub/images/odml.png b/site/en/hub/images/odml.png
new file mode 100644
index 00000000000..29bf3bcc61b
Binary files /dev/null and b/site/en/hub/images/odml.png differ
diff --git a/site/en/hub/images/similarity.png b/site/en/hub/images/similarity.png
new file mode 100644
index 00000000000..3155e8706e1
Binary files /dev/null and b/site/en/hub/images/similarity.png differ
diff --git a/site/en/hub/images/spice_blog.png b/site/en/hub/images/spice_blog.png
new file mode 100644
index 00000000000..cf19769e6d8
Binary files /dev/null and b/site/en/hub/images/spice_blog.png differ
diff --git a/site/en/hub/images/spice_color.png b/site/en/hub/images/spice_color.png
new file mode 100644
index 00000000000..35b68d7c444
Binary files /dev/null and b/site/en/hub/images/spice_color.png differ
diff --git a/site/en/hub/images/stackoverflow_icon.svg b/site/en/hub/images/stackoverflow_icon.svg
new file mode 100644
index 00000000000..491a75e464d
--- /dev/null
+++ b/site/en/hub/images/stackoverflow_icon.svg
@@ -0,0 +1 @@
+<svg width='24' height='24' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink'><defs><path d='M6.715 14.673l10.154 2.167.43-2.085L7.145 12.59l-.43 2.084zm1.343-4.937l9.402 4.47.86-1.92L8.918 7.79l-.86 1.947zm2.606-4.717l7.978 6.774 1.316-1.618-7.978-6.774-1.316 1.618zM15.82 0l-1.665 1.262 6.178 8.502L22 8.502 15.821 0zM6.5 19.5v-2.112h10.37V19.5H6.5zm12.395 2.382V15.5H21V24H2v-8.5h2.105v6.382h14.79z' id='a'/></defs><g fill='none' fill-rule='evenodd'><g transform='translate(-2)'><mask id='b' fill='#fff'><use xlink:href='#a'/></mask><use fill-rule='nonzero' xlink:href='#a'/><path fill='#5f6368' fill-rule='nonzero' mask='url(#b)' d='M0 0h24v24H0z'/></g><path d='M-2 0h24v24H-2z'/></g></svg>
diff --git a/site/en/hub/images/style_transfer.png b/site/en/hub/images/style_transfer.png
new file mode 100644
index 00000000000..d0427408830
Binary files /dev/null and b/site/en/hub/images/style_transfer.png differ
diff --git a/site/en/hub/images/super_resolution.png b/site/en/hub/images/super_resolution.png
new file mode 100644
index 00000000000..7d3f3741077
Binary files /dev/null and b/site/en/hub/images/super_resolution.png differ
diff --git a/site/en/hub/images/text_video.gif b/site/en/hub/images/text_video.gif
new file mode 100644
index 00000000000..5fe639b1eea
Binary files /dev/null and b/site/en/hub/images/text_video.gif differ
diff --git a/site/en/hub/images/yamnet.png b/site/en/hub/images/yamnet.png
new file mode 100644
index 00000000000..416956ad6fb
Binary files /dev/null and b/site/en/hub/images/yamnet.png differ
diff --git a/site/en/hub/installation.md b/site/en/hub/installation.md
new file mode 100644
index 00000000000..2381fbea614
--- /dev/null
+++ b/site/en/hub/installation.md
@@ -0,0 +1,57 @@
+
+# Installation
+
+## Installing tensorflow_hub
+
+The `tensorflow_hub` library can be installed alongside TensorFlow 1 and
+TensorFlow 2. We recommend that new users start with TensorFlow 2 right away,
+and current users upgrade to it.
+
+### Use with TensorFlow 2
+
+Use [pip](https://pip.pypa.io/) to
+[install TensorFlow 2](https://www.tensorflow.org/install) as usual. (See there
+for extra instructions about GPU support.) Then install a current version of
+[`tensorflow-hub`](https://pypi.org/project/tensorflow-hub/) next to it (must be
+0.5.0 or newer).
+
+```bash
+$ pip install "tensorflow>=2.0.0"
+$ pip install --upgrade tensorflow-hub
+```
+
+The TF1-style API of TensorFlow Hub works with the v1 compatibility mode of
+TensorFlow 2.
+
+### Legacy use with TensorFlow 1
+
+TensorFlow 1.15 is the only version of TensorFlow 1.x still supported by the
+`tensorflow_hub` library (as of release 0.11.0). TensorFlow 1.15 defaults to
+TF1-compatible behavior but contains many TF2 features under the hood to allow
+some use of TensorFlow Hub's TF2-style APIs.
+
+```bash
+$ pip install "tensorflow>=1.15,<2.0"
+$ pip install --upgrade tensorflow-hub
+```
+
+### Use of pre-release versions
+
+The pip packages `tf-nightly` and `tf-hub-nightly` are built automatically from
+the source code on github, with no release testing. This lets developers try out
+the latest code without [building from source](build_from_source.md).
+
+```bash
+$ pip install tf-nightly
+$ pip install --upgrade tf-hub-nightly
+```
+
+## Next Steps
+
+-   [Library overview](lib_overview.md)
+-   Tutorials:
+    -   [Text classification](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_text_classification.ipynb)
+    -   [Image classification](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_image_retraining.ipynb)
+    -   Additional examples
+        [on GitHub](https://github.com/tensorflow/hub/blob/master/examples/README.md)
+-   Find models on [tfhub.dev](https://tfhub.dev).
\ No newline at end of file
diff --git a/site/en/hub/lib_overview.md b/site/en/hub/lib_overview.md
new file mode 100644
index 00000000000..c480ad2fbdf
--- /dev/null
+++ b/site/en/hub/lib_overview.md
@@ -0,0 +1,50 @@
+
+# TensorFlow Hub Library Overview
+
+The [`tensorflow_hub`](https://github.com/tensorflow/hub) library lets you
+download and reuse trained models in your TensorFlow program with a minimum
+amount of code. The main way to load a trained model is using the
+`hub.KerasLayer` API.
+
+```python
+import tensorflow_hub as hub
+
+embed = hub.KerasLayer("https://tfhub.dev/google/nnlm-en-dim128/2")
+embeddings = embed(["A long sentence.", "single-word", "http://example.com"])
+print(embeddings.shape, embeddings.dtype)
+```
+**Note:** This documentation uses TFhub.dev URL handles in examples. See more
+information regarding other valid handle types [here](tf2_saved_model.md#model_handles).
+
+## Setting the cache location for downloads.
+
+By default, `tensorflow_hub` uses a system-wide, temporary directory to cache
+downloaded and uncompressed models. See [Caching](caching.md) for options to use
+other, possibly more persistent locations.
+
+## API stability
+
+Although we hope to prevent breaking changes, this project is still under active
+development and is not yet guaranteed to have a stable API or model format.
+
+## Fairness
+
+As in all of machine learning, [fairness](http://ml-fairness.com) is an
+[important](https://research.googleblog.com/2016/10/equality-of-opportunity-in-machine.html)
+consideration. Many pre-trained models are trained on large datasets. When
+reusing any model, it’s important to be mindful of what data the model was
+trained on (and whether there are any existing biases there), and how these
+might impact your use of it.
+
+## Security
+
+Since they contain arbitrary TensorFlow graphs, models can be thought of as
+programs.
+[Using TensorFlow Securely](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md)
+describes the security implications of referencing a model from an untrusted
+source.
+
+## Next Steps
+
+-   [Use the library](tf2_saved_model.md)
+-   [Reusable SavedModels](reusable_saved_models.md)
diff --git a/site/en/hub/migration_tf2.md b/site/en/hub/migration_tf2.md
new file mode 100644
index 00000000000..c2cc4b50759
--- /dev/null
+++ b/site/en/hub/migration_tf2.md
@@ -0,0 +1,114 @@
+
+# Migrating from TF1 to TF2 with TensorFlow Hub
+
+This page explains how to keep using TensorFlow Hub while migrating your
+TensorFlow code from TensorFlow 1 to TensorFlow 2. It complements TensorFlow's
+general [migration guide](https://www.tensorflow.org/guide/migrate).
+
+For TF2, TF Hub has switched away from the legacy `hub.Module` API for building
+a `tf.compat.v1.Graph` like `tf.contrib.v1.layers` do. Instead, there is now a
+`hub.KerasLayer` for use alongside other Keras layers for building a
+`tf.keras.Model` (typically in TF2's new
+[eager execution environment](https://www.tensorflow.org/api_docs/python/tf/executing_eagerly))
+and its underlying `hub.load()` method for low-level TensorFlow code.
+
+The `hub.Module` API remains available in the `tensorflow_hub` library for use
+in TF1 and in the TF1 compatibility mode of TF2. It can only load models in the
+[TF1 Hub format](tf1_hub_module.md).
+
+The new API of `hub.load()` and `hub.KerasLayer` works for TensorFlow 1.15 (in
+eager and graph mode) and in TensorFlow 2. This new API can load the new
+[TF2 SavedModel](tf2_saved_model.md) assets, and, with the restrictions laid out
+in the [model compatibility guide](model_compatibility.md), the legacy models in
+TF1 Hub format.
+
+In general, it is recommended to use new API wherever possible.
+
+## Summary of the new API
+
+`hub.load()` is the new low-level function to load a SavedModel from TensorFlow
+Hub (or compatible services). It wraps TF2's `tf.saved_model.load()`;
+TensorFlow's [SavedModel Guide](https://www.tensorflow.org/guide/saved_model)
+describes what you can do with the result.
+
+```python
+m = hub.load(handle)
+outputs = m(inputs)
+```
+
+The `hub.KerasLayer` class calls `hub.load()` and adapts the result for use in
+Keras alongside other Keras layers. (It may even be a convenient wrapper for
+loaded SavedModels used in other ways.)
+
+```python
+model = tf.keras.Sequential([
+    hub.KerasLayer(handle),
+    ...])
+```
+
+Many tutorials show these APIs in action. Here are some examples:
+
+*   [Text classification example notebook](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_text_classification.ipynb)
+*   [Image classification example notebook](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_image_retraining.ipynb)
+
+### Using the new API in Estimator training
+
+If you use a TF2 SavedModel in an Estimator for training with parameter servers
+(or otherwise in a TF1 Session with variables placed on remote devices), you
+need to set `experimental.share_cluster_devices_in_session` in the tf.Session's
+ConfigProto, or else you will get an error like "Assigned device
+'/job:ps/replica:0/task:0/device:CPU:0' does not match any device."
+
+The necessary option can be set like
+
+```python
+session_config = tf.compat.v1.ConfigProto()
+session_config.experimental.share_cluster_devices_in_session = True
+run_config = tf.estimator.RunConfig(..., session_config=session_config)
+estimator = tf.estimator.Estimator(..., config=run_config)
+```
+
+Starting with TF2.2, this option is no longer experimental, and the
+`.experimental` piece can be dropped.
+
+## Loading legacy models in TF1 Hub format
+
+It can happen that a new TF2 SavedModel is not yet available for your use-case
+and you need to load an legacy model in TF1 Hub format. Starting in
+`tensorflow_hub` release 0.7, you can use legacy model in TF1 Hub format
+together with `hub.KerasLayer` as shown below:
+
+```python
+m = hub.KerasLayer(handle)
+tensor_out = m(tensor_in)
+```
+
+Additionally `KerasLayer` exposes the ability to specify `tags`, `signature`,
+`output_key` and `signature_outputs_as_dict` for more specific usages of legacy
+models in TF1 Hub format and legacy SavedModels.
+
+For more information on TF1 Hub format compatibility see the
+[model compatibility guide](model_compatibility.md).
+
+## Using lower level APIs
+
+Legacy TF1 Hub format models can be loaded via `tf.saved_model.load`. Instead of
+
+```python
+# DEPRECATED: TensorFlow 1
+m = hub.Module(handle, tags={"foo", "bar"})
+tensors_out_dict = m(dict(x1=..., x2=...), signature="sig", as_dict=True)
+```
+
+it is recommended to use:
+
+```python
+# TensorFlow 2
+m = hub.load(path, tags={"foo", "bar"})
+tensors_out_dict = m.signatures["sig"](x1=..., x2=...)
+```
+
+In these examples `m.signatures` is a dict of TensorFlow
+[concrete functions](https://www.tensorflow.org/tutorials/customization/performance#tracing)
+keyed by signature names. Calling such a function computes all its outputs, even
+if unused. (This is different from the lazy evaluation of TF1's graph mode.)
diff --git a/site/en/hub/model_compatibility.md b/site/en/hub/model_compatibility.md
new file mode 100644
index 00000000000..e37ed717c3b
--- /dev/null
+++ b/site/en/hub/model_compatibility.md
@@ -0,0 +1,144 @@
+
+# Model compatibility for TF1/TF2
+
+## TF Hub model formats
+
+TF Hub offers reusable model pieces that can be loaded back, built upon, and
+possibly be retrained in a TensorFlow program. These come in two different
+formats:
+
+*   The custom [TF1 Hub format](https://www.tensorflow.org/hub/tf1_hub_module) .
+    Its main intended use is in TF1 (or TF1 compatibility mode in TF2) via its
+    [hub.Module API](https://www.tensorflow.org/hub/api_docs/python/hub/Module).
+    Full compatibility details [below](#compatibility_of_hubmodule).
+*   The native [TF2 SavedModel](https://www.tensorflow.org/hub/tf2_saved_model)
+    format. Its main intended use is in TF2 via the
+    [hub.load](https://www.tensorflow.org/hub/api_docs/python/hub/load) and
+    [hub.KerasLayer](https://www.tensorflow.org/hub/api_docs/python/hub/KerasLayer)
+    APIs. Full compatibility details [below](#compatibility_of_tf2_savedmodel).
+
+The model format can be found on the model page on
+[tfhub.dev](https://tfhub.dev). Model **loading/inference**, **fine-tuning** or
+**creation** might not be supported in TF1/2 based on the model formats.
+
+## Compatibility of the TF1 Hub format {:#compatibility_of_hubmodule}
+
+<table style="width: 100%;">
+  <tr style="text-align: center">
+    <col style="width: 20%" />
+    <col style="width: 40%" />
+    <col style="width: 40%" />
+    <td style="text-align: center; background-color: #D0D0D0">Operation</td>
+    <td style="text-align: center; background-color: #D0D0D0">TF1/ TF1 compat mode in TF2 <a href="#compatfootnote">[1]</a></td>
+    <td style="text-align: center; background-color: #D0D0D0">TF2</td>
+  </tr>
+  <tr>
+    <td>Loading / Inference</td>
+    <td>
+      Fully supported (<a href="https://www.tensorflow.org/hub/tf1_hub_module#using_a_module">complete TF1 Hub format loading guide</a>)
+      <pre style="font-size: 12px;" lang="python">m = hub.Module(handle)
+outputs = m(inputs)</pre>
+    </td>
+    <td> It's recommended to use either hub.load
+    <pre style="font-size: 12px;" lang="python">m = hub.load(handle)
+outputs = m.signatures["sig"](inputs)</pre>
+      or hub.KerasLayer
+      <pre style="font-size: 12px;" lang="python">m = hub.KerasLayer(handle, signature="sig")
+outputs = m(inputs)</pre>
+    </td>
+  </tr>
+  <tr>
+    <td>Fine-tuning</td>
+    <td>
+      Fully supported (<a href="https://www.tensorflow.org/hub/tf1_hub_module#for_consumers">complete TF1 Hub format fine-tuning guide</a>)
+    <pre style="font-size: 12px;" lang="python">m = hub.Module(handle,
+               trainable=True,
+               tags=["train"]*is_training)
+outputs = m(inputs)</pre>
+      <div style="font-style: italic; font-size: 14px">
+      Note: modules that don't need a separate train graph don't have a train
+        tag.
+      </div>
+    </td>
+    <td style="text-align: center">
+      Not supported
+    </td>
+  </tr>
+  <tr>
+    <td>Creation</td>
+    <td> Fully supported (see <a href="https://www.tensorflow.org/hub/tf1_hub_module#general_approach">complete TF1 Hub format creation guide</a>) <br> <div style="font-style: italic; font-size: 14px">
+      Note: The TF1 Hub format is geared towards TF1 and is only partially supported in TF2. Consider creating a TF2 SavedModel.
+      </div></td>
+    <td style="text-align: center">Not supported</td>
+  </tr>
+</table>
+
+## Compatibility of TF2 SavedModel {:#compatibility_of_tf2_savedmodel}
+
+Not supported before TF1.15.
+<table style="width: 100%;">
+  <tr style="text-align: center">
+    <col style="width: 20%" />
+    <col style="width: 40%" />
+    <col style="width: 40%" />
+    <td style="text-align: center; background-color: #D0D0D0">Operation</td>
+    <td style="text-align: center; background-color: #D0D0D0">TF1.15/ TF1 compat mode in TF2 <a href="#compatfootnote">[1]</a></td>
+    <td style="text-align: center; background-color: #D0D0D0">TF2</td>
+  </tr>
+  <tr>
+    <td>Loading / Inference</td>
+    <td>
+      Use either hub.load
+    <pre style="font-size: 12px;" lang="python">m = hub.load(handle)
+outputs = m(inputs)</pre>
+      or hub.KerasLayer
+      <pre style="font-size: 12px;" lang="python">m = hub.KerasLayer(handle)
+outputs = m(inputs)</pre>
+    </td>
+    <td> Fully supported (<a href="https://www.tensorflow.org/hub/tf2_saved_model#using_savedmodels_from_tf_hub">complete TF2 SavedModel loading guide</a>). Use either hub.load
+    <pre style="font-size: 12px;" lang="python">m = hub.load(handle)
+outputs = m(inputs)</pre>
+      or hub.KerasLayer
+      <pre style="font-size: 12px;" lang="python">m = hub.KerasLayer(handle)
+outputs = m(inputs)</pre>
+    </td>
+  </tr>
+  <tr>
+    <td>Fine-tuning</td>
+    <td>
+      Supported for a hub.KerasLayer used in  tf.keras.Model when trained with
+      Model.fit() or trained in an Estimator whose model_fn wraps the Model per the <a href="https://www.tensorflow.org/guide/migrate#using_a_custom_model_fn">custom model_fn guide</a>.
+      <br/><div style="font-style: italic; font-size: 14px;">
+        Note: hub.KerasLayer <span style="font-weight: bold;">does not</span>
+        fill in graph collections like the old tf.compat.v1.layers or hub.Module
+        APIs did.
+      </div>
+    </td>
+    <td>
+      Fully supported (<a href="https://www.tensorflow.org/hub/tf2_saved_model#for_savedmodel_consumers">complete TF2 SavedModel fine-tuning guide</a>).
+      Use either hub.load:
+      <pre style="font-size: 12px;" lang="python">m = hub.load(handle)
+outputs = m(inputs, training=is_training)</pre>
+      or hub.KerasLayer:
+      <pre style="font-size: 12px;" lang="python">m =  hub.KerasLayer(handle, trainable=True)
+outputs = m(inputs)</pre>
+    </td>
+  </tr>
+  <tr>
+    <td>Creation</td>
+    <td>
+     The TF2 API <a href="https://www.tensorflow.org/api_docs/python/tf/saved_model/save">
+      tf.saved_model.save()</a> can be called from within compat mode.
+   </td>
+   <td>Fully supported (see <a href="https://www.tensorflow.org/hub/tf2_saved_model#creating_savedmodels_for_tf_hub">complete TF2 SavedModel creation guide</a>) </td>
+  </tr>
+</table>
+
+<p id="compatfootnote">[1] "TF1 compat mode in TF2" refers to the combined
+  effect of importing TF2 with
+  <code style="font-size: 12px;" lang="python">import tensorflow.compat.v1 as tf</code>
+  and running
+  <code style="font-size: 12px;" lang="python">tf.disable_v2_behavior()</code>
+ as described in the
+  <a href="https://www.tensorflow.org/guide/migrate">TensorFlow Migration guide
+  </a>.</p>
diff --git a/site/en/hub/model_formats.md b/site/en/hub/model_formats.md
new file mode 100644
index 00000000000..73ae7c247a1
--- /dev/null
+++ b/site/en/hub/model_formats.md
@@ -0,0 +1,79 @@
+
+# Model formats
+
+[tfhub.dev](https://tfhub.dev) hosts the following model
+formats: TF2 SavedModel, TF1 Hub format, TF.js and TFLite. This page provides an
+overview of each model format.
+
+Content published to tfhub.dev can be automatically mirrored to other model
+hubs, provided it follows a specified format and is permitted by our Terms
+(https://tfhub.dev/terms). See [our publishing documentation](publish.md) for
+more details, and [our contribution documentation](contribute_a_model.md) if
+you'd like to opt-out of mirroring.
+
+## TensorFlow formats
+
+[tfhub.dev](https://tfhub.dev) hosts TensorFlow models in the TF2 SavedModel
+format and TF1 Hub format. We recommend using models in the standardized TF2
+SavedModel format instead of the deprecated TF1 Hub format when possible.
+
+### SavedModel
+
+TF2 SavedModel is the recommended format for sharing TensorFlow models. You can
+learn more about the SavedModel format in the
+[TensorFlow SavedModel](https://www.tensorflow.org/guide/saved_model) guide.
+
+You can browse SavedModels on tfhub.dev by using the TF2 version filter on the
+[tfhub.dev browse page](https://tfhub.dev/s?subtype=module,placeholder) or by
+following
+[this link](https://tfhub.dev/s?subtype=module,placeholder&tf-version=tf2).
+
+You can use SavedModels from tfhub.dev without depending on the `tensorflow_hub`
+library, since this format is a part of core TensorFlow.
+
+Learn more about SavedModels on TF Hub:
+
+*   [Using TF2 SavedModels](tf2_saved_model.md)
+*   [Exporting a TF2 SavedModel](exporting_tf2_saved_model.md)
+*   [TF1/TF2 compatibility of TF2 SavedModels](model_compatibility.md)
+
+### TF1 Hub format
+
+The TF1 Hub format is a custom serialization format used in by TF Hub library.
+The TF1 Hub format is similar to the SavedModel format of TensorFlow 1 on a
+syntactic level (same file names and protocol messages) but semantically
+different to allow for module reuse, composition and re-training (e.g.,
+different storage of resource initializers, different tagging conventions for
+metagraphs). The easiest way to tell them apart on disk is the presence or
+absence of the `tfhub_module.pb` file.
+
+You can browse models in the TF1 Hub format on tfhub.dev by using the TF1
+version filter on the
+[tfhub.dev browse page](https://tfhub.dev/s?subtype=module,placeholder) or by
+following
+[this link](https://tfhub.dev/s?subtype=module,placeholder&tf-version=tf1).
+
+Learn more about models in TF1 Hub format on TF Hub:
+
+*   [Using TF1 Hub format models](tf1_hub_module.md)
+*   [Exporting a model in the TF1 Hub format](exporting_hub_format.md)
+*   [TF1/TF2 compatibility of TF1 Hub format](model_compatibility.md)
+
+## TFLite format
+
+The TFLite format is used for on-device inference. You can learn more at the
+[TFLite documentation](https://www.tensorflow.org/lite).
+
+You can browse TF Lite models on tfhub.dev by using the TF Lite model format
+filter on the
+[tfhub.dev browse page](https://tfhub.dev/s?subtype=module,placeholder) or by
+following [this link](https://tfhub.dev/lite).
+
+## TFJS format
+
+The TF.js format is used for in-browser ML. You can learn more at the
+[TF.js documentation](https://www.tensorflow.org/js).
+
+You can browse TF.js models on tfhub.dev by using the TF.js model format filter
+on the [tfhub.dev browse page](https://tfhub.dev/s?subtype=module,placeholder)
+or by following [this link](https://tfhub.dev/js).
diff --git a/site/en/hub/overview.md b/site/en/hub/overview.md
new file mode 100644
index 00000000000..b6d814eba73
--- /dev/null
+++ b/site/en/hub/overview.md
@@ -0,0 +1,31 @@
+
+# TensorFlow Hub
+
+TensorFlow Hub is an open repository and library for reusable machine learning.
+The [tfhub.dev](https://tfhub.dev) repository provides many pre-trained models:
+text embeddings, image classification models, TF.js/TFLite models and much more.
+The repository is open to
+[community contributors](https://tfhub.dev/s?subtype=publisher).
+
+The [`tensorflow_hub`](https://github.com/tensorflow/hub) library lets you
+download and reuse them in your TensorFlow program with a minimum amount of
+code.
+
+```python
+import tensorflow_hub as hub
+
+model = hub.KerasLayer("https://tfhub.dev/google/nnlm-en-dim128/2")
+embeddings = model(["The rain in Spain.", "falls",
+                    "mainly", "In the plain!"])
+
+print(embeddings.shape)  #(4,128)
+```
+
+## Next Steps
+
+-   [Find models on tfhub.dev](https://tfhub.dev)
+-   [Publish models on tfhub.dev](publish.md)
+-   TensorFlow Hub library
+    -   [Install TensorFlow Hub](installation.md)
+    -   [Library overview](lib_overview.md)
+-   [Follow tutorials](tutorials)
diff --git a/site/en/hub/portability_and_deletion.md b/site/en/hub/portability_and_deletion.md
new file mode 100644
index 00000000000..67fa401d161
--- /dev/null
+++ b/site/en/hub/portability_and_deletion.md
@@ -0,0 +1,18 @@
+
+## I want to see what I’ve uploaded to TensorFlow Hub. Can I get a copy of my data?
+
+Yes. If you’d like the Kaggle Team to **send you a copy** of all of the
+data you have uploaded, please send us an email at [support@kaggle.com](mailto:support@kaggle.com)
+and we’ll respond as soon as possible.
+
+## How do I delete what I’ve uploaded to TensorFlow Hub?
+
+Similarly, if you’d like us to **delete or remove content**, please send us an
+email at [support@kaggle.com](mailto:support@kaggle.com) and we’ll delete
+all copies that we have and stop serving it on tfhub.dev. Please note:
+
+*   Because TensorFlow Hub is an open-source platform, copies of your assets may
+still be retained by members of the public.
+*   Deletion is permanent and cannot be undone.
+*   Deletion can cause downstream breakages if users are not caching your model
+locally and/or are not properly warned prior to deletion.
diff --git a/site/en/hub/publish.md b/site/en/hub/publish.md
new file mode 100644
index 00000000000..7fc5e7c1751
--- /dev/null
+++ b/site/en/hub/publish.md
@@ -0,0 +1,19 @@
+
+# Publishing Process
+
+Thank you for considering to publish your models!
+
+**Please join the Early Access Model Publishing (EAP) on
+[Kaggle Models](https://www.kaggle.com/models):**
+
+-   Email [kaggle-models@google.com](mailto:kaggle-models@google.com) and
+    provide the following to get access to EAP:
+    -   (1) Your Kaggle username
+    -   (2) Your desired organization slug
+    -   (3) A URL to a square-shaped profile image (which is needed for the
+        organization creation)
+-   Follow the
+    [documentation instructions](https://www.kaggle.com/model-publishing-instructions)
+    to create and publish your model
+-   Feel free to raise any questions and get support from
+    [Kaggle Discord channel](https://discord.gg/rKEyxj9WF)
diff --git a/site/en/hub/reusable_saved_models.md b/site/en/hub/reusable_saved_models.md
new file mode 100644
index 00000000000..b2114135d77
--- /dev/null
+++ b/site/en/hub/reusable_saved_models.md
@@ -0,0 +1,208 @@
+
+# Reusable SavedModels
+
+## Introduction
+
+TensorFlow Hub hosts SavedModels for TensorFlow 2, among other assets.
+They can be loaded back into a Python program with `obj = hub.load(url)`
+[[learn more](tf2_saved_model)]. The returned `obj` is the result
+of `tf.saved_model.load()` (see TensorFlow's
+[SavedModel guide](https://www.tensorflow.org/guide/saved_model)).
+This object can have arbitrary attributes that are tf.functions,
+tf.Variables (initialized from their pre-trained values), other resources
+and, recursively, more such objects.
+
+This page describes an interface to be implemented by the loaded `obj`
+in order to be *reused* in a TensorFlow Python program.
+SavedModels conforming to this interface are called *Reusable SavedModels*.
+
+Reusing means building a larger model around `obj`, including the ability
+to fine-tune it. Fine-tuning means further training of the weights in the loaded
+`obj` as part of the surrounding model. The loss function and the
+optimizer are determined by the surrounding model; `obj` only defines
+the mapping of input to output activations (the "forward pass"), possibly
+including techniques such as dropout or batch normalization.
+
+**The TensorFlow Hub team recommends implementing the Reusable SavedModel
+interface** in all SavedModels that are meant to be reused in the above sense.
+Many utilities from the `tensorflow_hub` library, notably `hub.KerasLayer`,
+require SavedModels to implement it.
+
+### Relation to SignatureDefs
+
+This interface in terms of tf.functions and other TF2 features
+is separate from the SavedModel's signatures, which have been
+available since TF1 and continue to be used in TF2 for inference
+(such as deploying SavedModels to TF Serving or TF Lite).
+Signatures for inference are not expressive enough to support fine-tuning,
+and [`tf.function`](https://www.tensorflow.org/api_docs/python/tf/function)
+provides a more natural and expressive
+[Python API](https://www.tensorflow.org/tutorials/customization/performance)
+for the reused model.
+
+### Relation to model-building libraries
+
+A Reusable SavedModel uses only TensorFlow 2 primitives, independent of any
+particular model-building library like Keras or Sonnet. This facilitates reuse
+across model-building libraries, free from dependencies on the original
+model-building code.
+
+Some amount of adaptation will be needed load Reusable SavedModels into or save
+them from any given model-building library. For Keras,
+[hub.KerasLayer](https://www.tensorflow.org/hub/api_docs/python/hub/KerasLayer)
+provides the loading, and Keras's built-in saving in the SavedModel format has
+been redesigned for TF2 with the goal of providing a superset of this interface
+(see the
+[RFC](https://github.com/tensorflow/community/blob/master/rfcs/20190509-keras-saved-model.md)
+from May 2019).
+
+### Relation to task-specific "Common SavedModel APIs"
+
+The interface definition on this page allows for any number and type of inputs
+and outputs. The
+[Common SavedModel APIs for TF Hub](common_saved_model_apis/index.md) refine
+this general interface with usage conventions for specific tasks to make models
+easily interchangeable.
+
+## Interface definition
+
+### Attributes
+
+A Reusable SavedModel is a TensorFlow 2 SavedModel such that
+`obj = tf.saved_model.load(...)` returns an object that has the following
+attributes
+
+  * `__call__`. Required. A tf.function implementing the model's computation
+    (the "forward pass") subject to the specification below.
+
+  * `variables`: A list of tf.Variable objects, listing all the variables
+    used by any possible invocation of `__call__`, including both
+    trainable and non-trainable ones.
+
+    This list can be omitted if empty.
+
+    Note: Conveniently, this name coincides with the attribute synthesized by
+    `tf.saved_model.load(...)` when loading a TF1 SavedModel to represent
+    its `GLOBAL_VARIABLES` collection.
+
+  * `trainable_variables`: A list of tf.Variable objects such that
+    `v.trainable` is true for all elements.
+    These variables must be a subset of `variables`.
+    These are the variables to be trained when fine-tuning the object.
+    The SavedModel creator may choose to omit some variables here that were
+    originally trainable to indicate that these should not be modified during
+    fine-tuning.
+
+    This list can be omitted if empty, in particular, if the SavedModel does not
+    support fine-tuning.
+
+  * `regularization_losses`: A list of tf.functions, each taking zero inputs
+    and returning a single scalar float tensor. For fine-tuning, the
+    SavedModel user is advised to include these as additional regularization
+    terms into the loss (in the simplest case without further scaling).
+    Typically, these are used to represent weight regularizers.
+    (For lack of inputs, these tf.functions cannot express
+    activity regularizers.)
+
+    This list can be omitted if empty, in particular, if the SavedModel does not
+    support fine-tuning or does not wish to prescribe weight regularization.
+
+### The `__call__` function
+
+A Restored SavedModel `obj` has an `obj.__call__` attribute that is
+a restored tf.function and allows `obj` to be called as follows.
+
+Synopsis (pseudo-code):
+
+```python
+outputs = obj(inputs, trainable=..., **kwargs)
+```
+
+#### Arguments
+
+The arguments are as follows.
+
+  * There is one positional, required argument with a batch of input activations
+    of the SavedModel. Its type is one of
+
+      * a single Tensor for a single input,
+      * a list of Tensors for an ordered sequence of unnamed inputs,
+      * a dict of Tensors keyed by a particular set of input names.
+
+    (Future revisions of this interface may allow more general nests.)
+    The SavedModel creator chooses one of those and the tensor shapes
+    and dtypes. Where useful, some dimensions of the shape should be
+    undefined (notably batch size).
+
+  * There may be an optional keyword argument `training` that accepts a Python
+    boolean, `True` or `False`. The default is `False`.
+    If the model supports fine-tuning, and if its computation differs between
+    the two (e.g., as in dropout and batch normalization), that distinction
+    is implemented with this argument. Otherwise, this argument may be absent.
+
+    It is not required that `__call__` accept a Tensor-valued `training`
+    argument. It falls on the caller to use `tf.cond()` if necessary
+    to dispatch between them.
+
+  * The SavedModel creator may choose to accept more optional `kwargs`
+    of particular names.
+
+      * For Tensor-valued arguments, the SavedModel creator defines their
+        permissible dtypes and shapes. `tf.function` accepts a Python default
+        value on an argument that is traced with a tf.TensorSpec input.
+        Such arguments can be used to allow customization of numeric
+        hyperparameters involved in `__call__` (e.g., dropout rate).
+
+      * For Python-valued arguments, the SavedModel creator defines their
+        permissible values. Such arguments can be used as flags to make
+        discrete choices in the traced function (but mind the combinatorial
+        explosion of traces).
+
+The restored `__call__` function must provide traces for all permissible
+combinations of arguments. Flipping `training` between `True` and `False`
+must not change the permissibility of arguments.
+
+#### Result
+
+The `outputs` from calling `obj` can be
+
+  * a single Tensor for a single output,
+  * a list of Tensors for an ordered sequence of unnamed outputs,
+  * a dict of Tensors keyed by a particular set of output names.
+
+(Future revisions of this interface may allow more general nests.)
+The return type may vary depending on the Python-valued kwargs.
+This allows for flags producing extra outputs.
+The SavedModel creator defines the output dtypes and shapes and their
+dependency on inputs.
+
+
+### Named callables
+
+A Reusable SavedModel can provide multiple model pieces in the way
+described above by putting them into named subobjects, for example,
+`obj.foo`, `obj.bar` and so on.
+Each subobject provides a `__call__` method and supporting attributes
+about the variables etc. specific to that model piece.
+For the example above, there would be `obj.foo.__call__`,
+`obj.foo.variables` and so on.
+
+Note that this interface does *not* cover the approach of adding
+a bare tf.function directly as `tf.foo`.
+
+Users of Reusable SavedModels are only expected to handle one level of nesting
+(`obj.bar` but not `obj.bar.baz`). (Future revisions of this interface may allow
+deeper nesting, and may waive the requirement that the top-level object be
+callable itself.)
+
+## Closing remarks
+
+### Relation to in-process APIs
+
+This document describes an interface of a Python class which consists
+of primitives like tf.function and tf.Variable that survive a
+round-trip through serialization via `tf.saved_model.save()`
+and `tf.saved_model.load()`. However, the interface was already present
+on the original object that was passed to `tf.saved_model.save()`.
+Adaptation to that interface enables the exchange of model pieces
+across model-building APIs within a single TensorFlow program.
diff --git a/site/en/hub/tf1_hub_module.md b/site/en/hub/tf1_hub_module.md
new file mode 100644
index 00000000000..7601878dc1b
--- /dev/null
+++ b/site/en/hub/tf1_hub_module.md
@@ -0,0 +1,198 @@
+
+# TF1 Hub format
+
+At its launch in 2018, TensorFlow Hub offered a single type of asset: TF1 Hub
+format for import into TensorFlow 1 programs.
+
+This page explains how to use TF1 Hub format in TF1 (or the TF1 compatibility
+mode of TF2) with the `hub.Module` class and associated APIs. (The typical use
+is to build a `tf.Graph`, possibly inside a TF1 `Estimator`, by combining one or
+more models in TF1 Hub format with `tf.compat.layers` or `tf.layers`).
+
+Users of TensorFlow 2 (outside TF1 compatibility mode) must use
+[the new API with `hub.load()` or `hub.KerasLayer`](tf2_saved_model.md). The new
+API loads the new TF2 SavedModel asset type, but also has limited
+[support for loading TF1 Hub format into TF2](migration_tf2.md).
+
+## Using a model in TF1 Hub format
+
+### Instantiating a model in TF1 Hub format
+
+A model in TF1 Hub format is imported into a TensorFlow program by creating a
+`hub.Module` object from a string with its URL or filesystem path, such as:
+
+```python
+m = hub.Module("path/to/a/module_dir")
+```
+**Note:** See more information regarding other valid handle types [here](tf2_saved_model.md#model_handles).
+
+This adds the module's variables to the current TensorFlow graph.
+Running their initializers will read their pre-trained values from disk.
+Likewise, tables and other state is added to the graph.
+
+### Caching Modules
+
+When creating a module from a URL, the module content is downloaded and cached
+in the local system temporary directory. The location where modules are cached
+can be overridden using `TFHUB_CACHE_DIR` environment variable. For details, see
+[Caching](caching.md).
+
+### Applying a Module
+
+Once instantiated, a module `m` can be called zero or more times like a Python
+function from tensor inputs to tensor outputs:
+
+```python
+y = m(x)
+```
+
+Each such call adds operations to the current TensorFlow graph to compute
+`y` from `x`. If this involves variables with trained weights, these are
+shared between all applications.
+
+Modules can define multiple named *signatures* in order to allow being applied
+in more than one way (similar to how Python objects have *methods*).
+A module's documentation should describe the available
+signatures. The call above applies the signature named `"default"`. Any
+signature can be selected by passing its name to the optional `signature=`
+argument.
+
+If a signature has multiple inputs, they must be passed as a dict, with the keys
+defined by the signature. Likewise, if a signature has multiple outputs, these
+can be retrieved as a dict by passing `as_dict=True`, under the keys defined by
+the signature (the key `"default"` is for the single output returned if
+`as_dict=False`). So the most general form of applying a Module looks like:
+
+```python
+outputs = m(dict(apples=x1, oranges=x2), signature="fruit_to_pet", as_dict=True)
+y1 = outputs["cats"]
+y2 = outputs["dogs"]
+```
+
+A caller must supply all inputs defined by a signature, but there is no
+requirement to use all of a module's outputs.
+TensorFlow will run only those parts of the module that end up
+as dependencies of a target in `tf.Session.run()`. Indeed, module publishers may
+choose to provide various outputs for advanced uses (like activations of
+intermediate layers) along with the main outputs. Module consumers should
+handle additional outputs gracefully.
+
+### Trying out alternative modules
+
+Whenever there are multiple modules for the same task, TensorFlow Hub
+encourages to equip them with compatible signatures (interfaces)
+such that trying different ones is as easy as varying the module handle
+as a string-valued hyperparameter.
+
+To this end, we maintain a collection of recommended
+[Common Signatures](common_signatures/index.md) for popular tasks.
+
+
+## Creating a New Module
+
+### Compatibility note
+
+The TF1 Hub format is geared towards TensorFlow 1. It is only partially
+supported by TF Hub in TensorFlow 2. Please do consider publishing in the new
+[TF2 SavedModel](tf2_saved_model.md) format instead.
+
+The TF1 Hub format is similar to the SavedModel format of TensorFlow 1 on a
+syntactic level (same file names and protocol messages) but semantically
+different to allow for module reuse, composition and re-training (e.g.,
+different storage of resource initializers, different tagging conventions for
+metagraphs). The easiest way to tell them apart on disk is the presence or
+absence of the `tfhub_module.pb` file.
+
+### General approach
+
+To define a new module, a publisher calls `hub.create_module_spec()` with a
+function `module_fn`. This function constructs a graph representing the module's
+internal structure, using `tf.placeholder()` for inputs to be supplied by
+the caller. Then it defines signatures by calling
+`hub.add_signature(name, inputs, outputs)` one or more times.
+
+For example:
+
+```python
+def module_fn():
+  inputs = tf.placeholder(dtype=tf.float32, shape=[None, 50])
+  layer1 = tf.layers.dense(inputs, 200)
+  layer2 = tf.layers.dense(layer1, 100)
+  outputs = dict(default=layer2, hidden_activations=layer1)
+  # Add default signature.
+  hub.add_signature(inputs=inputs, outputs=outputs)
+
+...
+spec = hub.create_module_spec(module_fn)
+```
+
+The result of `hub.create_module_spec()` can be used, instead of a path,
+to instantiate a module object within a particular TensorFlow graph. In
+such case, there is no checkpoint, and the module instance will use the
+variable initializers instead.
+
+Any module instance can be serialized to disk via its `export(path, session)`
+method. Exporting a module serializes its definition together with the current
+state of its variables in `session` into the passed path. This can be used
+when exporting a module for the first time, as well as when exporting a fine
+tuned module.
+
+For compatibility with TensorFlow Estimators, `hub.LatestModuleExporter` exports
+modules from the latest checkpoint, just like `tf.estimator.LatestExporter`
+exports the entire model from the latest checkpoint.
+
+Module publishers should implement a [common
+signature](common_signatures/index.md) when possible, so that consumers can
+easily exchange modules and find the best one for their problem.
+
+### Real example
+
+Take a look at our [text embedding module exporter](https://github.com/tensorflow/hub/blob/master/examples/text_embeddings/export.py)
+for a real-world example of how to create a module from a common text embedding
+format.
+
+
+## Fine-Tuning
+
+Training the variables of an imported module together with those of the model
+around it is called *fine-tuning*. Fine-tuning can result in better quality, but
+adds new complications. We advise consumers to look into fine-tuning only after
+exploring simpler quality tweaks, and only if the module publisher recommends
+it.
+
+### For Consumers
+
+To enable fine-tuning, instantiate the module with
+`hub.Module(..., trainable=True)` to make its variables trainable and
+import TensorFlow's `REGULARIZATION_LOSSES`. If the module has multiple
+graph variants, make sure to pick the one appropriate for training.
+Usually, that's the one with tags `{"train"}`.
+
+Choose a training regime that does not ruin the pre-trained weights,
+for example, a lower learning rate than for training from scratch.
+
+### For Publishers
+
+To make fine-tuning easier for consumers, please be mindful of the following:
+
+*   Fine-tuning needs regularization. Your module is exported with the
+    `REGULARIZATION_LOSSES` collection, which is what puts your choice of
+    `tf.layers.dense(..., kernel_regularizer=...)` etc. into what the consumer
+    gets from `tf.losses.get_regularization_losses()`. Prefer this way of
+    defining L1/L2 regularization losses.
+
+*   In the publisher model, avoid defining L1/L2 regularization via the `l1_`
+    and `l2_regularization_strength` parameters of `tf.train.FtrlOptimizer`,
+    `tf.train.ProximalGradientDescentOptimizer`, and other proximal optimizers.
+    These are not exported alongside the module, and setting regularization
+    strengths globally may not be appropriate for the consumer. Except for L1
+    regularization in wide (i.e. sparse linear) or wide & deep models, it should
+    be possible to use individual regularization losses instead.
+
+*   If you use dropout, batch normalization, or similar training techniques, set
+    their hyperparameters to values that make sense across many expected uses.
+    The dropout rate may have to be adjusted to the target problem's propensity
+    to overfitting. In batch normalization, the momentum (a.k.a. decay
+    coefficient) should be small enough to enable fine-tuning with small
+    datasets and/or large batches. For advanced consumers, consider adding a
+    signature that exposes control over critical hyperparameters.
diff --git a/site/en/hub/tf2_saved_model.md b/site/en/hub/tf2_saved_model.md
new file mode 100644
index 00000000000..e41337b2548
--- /dev/null
+++ b/site/en/hub/tf2_saved_model.md
@@ -0,0 +1,289 @@
+
+# SavedModels from TF Hub in TensorFlow 2
+
+The
+[SavedModel format of TensorFlow 2](https://www.tensorflow.org/guide/saved_model)
+is the recommended way to share pre-trained models and model pieces on
+TensorFlow Hub. It replaces the older [TF1 Hub format](tf1_hub_module.md) and
+comes with a new set of APIs.
+
+This page explains how to reuse TF2 SavedModels in a TensorFlow 2 program with
+the low-level `hub.load()` API and its `hub.KerasLayer` wrapper. (Typically,
+`hub.KerasLayer` is combined with other `tf.keras.layers` to build a Keras model
+or the `model_fn` of a TF2 Estimator.) These APIs can also load the legacy
+models in TF1 Hub format, within limits, see the
+[compatibility guide](model_compatibility.md).
+
+Users of TensorFlow 1 can update to TF 1.15 and then use the same APIs.
+Older versions of TF1 do not work.
+
+## Using SavedModels from TF Hub
+
+### Using a SavedModel in Keras
+
+[Keras](https://www.tensorflow.org/guide/keras/) is TensorFlow's high-level API
+for building deep learning models by composing Keras Layer objects.
+The `tensorflow_hub` library provides the class `hub.KerasLayer` that gets
+initialized with the URL (or filesystem path) of a SavedModel and then
+provides the computation from the SavedModel, including its pre-trained
+weights.
+
+Here is an example of using a pre-trained text embedding:
+
+```python
+import tensorflow as tf
+import tensorflow_hub as hub
+
+hub_url = "https://tfhub.dev/google/nnlm-en-dim128/2"
+embed = hub.KerasLayer(hub_url)
+embeddings = embed(["A long sentence.", "single-word", "http://example.com"])
+print(embeddings.shape, embeddings.dtype)
+```
+
+From this, a text classifier can be built in the usual Keras way:
+
+```python
+model = tf.keras.Sequential([
+    embed,
+    tf.keras.layers.Dense(16, activation="relu"),
+    tf.keras.layers.Dense(1, activation="sigmoid"),
+])
+```
+
+The [Text classification
+colab](https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_text_classification.ipynb)
+is a complete example how to train and evaluate such a classifier.
+
+The model weights in a `hub.KerasLayer` are set to non-trainable by default.
+See the section on fine-tuning below for how to change that. Weights are
+shared between all applications of the same layer object, as usual in Keras.
+
+
+### Using a SavedModel in an Estimator
+
+Users of TensorFlow's
+[Estimator](https://www.tensorflow.org/tutorials/distribute/multi_worker_with_estimator)
+API for distributed training can use SavedModels from TF Hub by
+writing their `model_fn` in terms of  `hub.KerasLayer` among other
+`tf.keras.layers`.
+
+
+### Behind the scenes: SavedModel downloading and caching
+
+Using a SavedModel from TensorFlow Hub (or other HTTPS servers that implement
+its [hosting](hosting.md) protocol) downloads and decompresses it to the local
+filesystem if not already present. The environment variable `TFHUB_CACHE_DIR`
+can be set to override the default temporary location for caching the downloaded
+and uncompressed SavedModels. For details, see [Caching](caching.md).
+
+### Using a SavedModel in low-level TensorFlow
+#### Model Handles
+
+SavedModels can be loaded from a specified `handle`, where the `handle` is a
+filesystem path, valid TFhub.dev model URL (e.g. "https://tfhub.dev/...").
+Kaggle Models URLs mirror TFhub.dev handles in accordance with our Terms and the
+license associated with the model assets, e.g., "https://www.kaggle.com/...".
+Handles from Kaggle Models are equivalent to their corresponding TFhub.dev
+handle.
+
+The function `hub.load(handle)` downloads and decompresses a SavedModel
+(unless `handle` is already a filesystem path) and then returns the result
+of loading it with TensorFlow's built-in function `tf.saved_model.load()`.
+Therefore, `hub.load()` can handle any valid SavedModel (unlike its
+predecessor `hub.Module` for TF1).
+
+#### Advanced topic: what to expect from the SavedModel after loading
+
+Depending on the contents of the SavedModel, the result of
+`obj = hub.load(...)` can be invoked in various ways (as explained in
+much greater detail in TensorFlow's [SavedModel
+Guide](https://www.tensorflow.org/guide/saved_model):
+
+  * The serving signatures of the SavedModel (if any) are represented as a
+    dictionary of concrete functions and can be called like
+    `tensors_out = obj.signatures["serving_default"](**tensors_in)`,
+    with dictionaries of tensors keyed by the respective input and output
+    names and subject to the signature's shape and dtype constraints.
+
+  * The
+    [`@tf.function`](https://www.tensorflow.org/api_docs/python/tf/function)-decorated
+    methods of the saved object (if any) are restored as tf.function objects
+    that can be called by all combinations of Tensor and non-Tensor arguments
+    for which the tf.function had been
+    [traced](https://www.tensorflow.org/tutorials/customization/performance#tracing)
+    prior to saving. In particular, if there is an `obj.__call__` method
+    with suitable traces, `obj` itself can be called like a Python function.
+    A simple example could look like
+    `output_tensor = obj(input_tensor, training=False)`.
+
+This leaves enormous liberty in the interfaces that SavedModels can
+implement. The [Reusable SavedModels interface](reusable_saved_models.md)
+for `obj` establishes conventions such that client code, including adapters
+like `hub.KerasLayer`, know how to use the SavedModel.
+
+Some SavedModels may not follow that convention, especially whole models
+not meant to be reused in larger models, and just provide serving signatures.
+
+The trainable variables in a SavedModel are reloaded as trainable,
+and `tf.GradientTape` will watch them by default. See the section on
+fine-tuning below for some caveats, and consider avoiding this for starters.
+Even if you want to fine-tune, you may want to see if `obj.trainable_variables`
+advises to re-train only a subset of the originally trainable variables.
+
+
+## Creating SavedModels for TF Hub
+
+### Overview
+
+SavedModel is TensorFlow's standard serialization format for trained models
+or model pieces.
+It stores the model's trained weights together with the exact TensorFlow
+operations to perform its computation. It can be used independently from
+the code that created it. In particular, it can be reused across different
+high-level model-building APIs like Keras, because TensorFlow operations
+are their common basic language.
+
+### Saving from Keras
+
+Starting with TensorFlow 2, `tf.keras.Model.save()` and
+`tf.keras.models.save_model()` default to the SavedModel format (not HDF5).
+The resulting SavedModels that can be used with `hub.load()`,
+`hub.KerasLayer` and similar adapters for other high-level APIs
+as they become available.
+
+To share a complete Keras Model, just save it with `include_optimizer=False`.
+
+To share a piece of a Keras Model, make the piece a Model in itself and then
+save that. You can either lay out the code like that from the start....
+
+```python
+piece_to_share = tf.keras.Model(...)
+full_model = tf.keras.Sequential([piece_to_share, ...])
+full_model.fit(...)
+piece_to_share.save(...)
+```
+
+...or cut out the piece to share after the fact (if it aligns with the
+layering of your full model):
+
+```python
+full_model = tf.keras.Model(...)
+sharing_input = full_model.get_layer(...).get_output_at(0)
+sharing_output = full_model.get_layer(...).get_output_at(0)
+piece_to_share = tf.keras.Model(sharing_input, sharing_output)
+piece_to_share.save(..., include_optimizer=False)
+```
+
+[TensorFlow Models](https://github.com/tensorflow/models) on GitHub uses the
+former approach for BERT (see
+[nlp/tools/export_tfhub_lib.py](https://github.com/tensorflow/models/blob/master/official/nlp/tools/export_tfhub_lib.py),
+note the split between `core_model` for export and the `pretrainer` for
+restoring the checkpoint) and the latter approach for ResNet (see
+[legacy/image_classification/tfhub_export.py](https://github.com/tensorflow/models/blob/master/official/legacy/image_classification/resnet/tfhub_export.py)).
+
+### Saving from low-level TensorFlow
+
+This requires good familiarity with TensorFlow's [SavedModel
+Guide](https://www.tensorflow.org/guide/saved_model).
+
+If you want to provide more than just a serving signature, you should
+implement the [Reusable SavedModel interface](reusable_saved_models.md).
+Conceptually, this looks like
+
+```python
+class MyMulModel(tf.train.Checkpoint):
+  def __init__(self, v_init):
+    super().__init__()
+    self.v = tf.Variable(v_init)
+    self.variables = [self.v]
+    self.trainable_variables = [self.v]
+    self.regularization_losses = [
+        tf.function(input_signature=[])(lambda: 0.001 * self.v**2),
+    ]
+
+  @tf.function(input_signature=[tf.TensorSpec(shape=None, dtype=tf.float32)])
+  def __call__(self, inputs):
+    return tf.multiply(inputs, self.v)
+
+tf.saved_model.save(MyMulModel(2.0), "/tmp/my_mul")
+
+layer = hub.KerasLayer("/tmp/my_mul")
+print(layer([10., 20.]))  # [20., 40.]
+layer.trainable = True
+print(layer.trainable_weights)  # [2.]
+print(layer.losses)  # 0.004
+```
+
+
+## Fine-Tuning
+
+Training the already-trained variables of an imported SavedModel together with
+those of the model around it is called *fine-tuning* the SavedModel.
+This can result in better quality, but often makes the training more
+demanding (may take more time, depend more on the optimizer and its
+hyperparameters, increase the risk of overfitting and require dataset
+augmentation, esp. for CNNs). We advise SavedModel consumers to look into
+fine-tuning only after having established a good training regime,
+and only if the SavedModel publisher recommends it.
+
+Fine-tuning changes the "continuous" model parameters that are trained.
+It does not change hard-coded transformations, such as tokenizing text
+input and mapping tokens to their corresponding entries in an embedding matrix.
+
+### For SavedModel consumers
+
+Creating a `hub.KerasLayer` like
+
+```python
+layer = hub.KerasLayer(..., trainable=True)
+```
+
+enables fine-tuning of the SavedModel loaded by the layer. It adds the
+trainable weights and weight regularizers declared in the SavedModel
+to the Keras model, and runs the SavedModel's computation in training
+mode (think of dropout etc.).
+
+The [image classification
+colab](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_image_retraining.ipynb)
+contains an end-to-end example with optional fine-tuning.
+
+#### Re-exporting the fine-tuning result
+
+Advanced users may want to save the results of fine-tuning back into
+a SavedModel that can be used instead of the originally loaded one.
+This can be done with code like
+
+```python
+loaded_obj = hub.load("https://tfhub.dev/...")
+hub_layer = hub.KerasLayer(loaded_obj, trainable=True, ...)
+
+model = keras.Sequential([..., hub_layer, ...])
+model.compile(...)
+model.fit(...)
+
+export_module_dir = os.path.join(os.getcwd(), "finetuned_model_export")
+tf.saved_model.save(loaded_obj, export_module_dir)
+```
+
+### For SavedModel creators
+
+When creating a SavedModel for sharing on TensorFlow Hub,
+think ahead if and how its consumers should fine-tune it,
+and provide guidance in the documentation.
+
+Saving from a Keras Model should make all the mechanics of fine-tuning work
+(saving weight regularization losses, declaring trainable variables, tracing
+`__call__` for both `training=True` and `training=False`, etc.)
+
+Choose a model interface that plays well with gradient flow,
+e.g., output logits instead of softmax probabilities or top-k predictions.
+
+If the model use dropout, batch normalization, or similar training techniques
+that involve hyperparameters, set them to values that make sense across many
+expected target problems and batch sizes. (As of this writing, saving from
+Keras does not make it easy to let consumers adjust them.)
+
+Weight regularizers on individual layers are saved (with their regularization
+strength coefficients), but weight regularization from within the optimizer
+(like `tf.keras.optimizers.Ftrl.l1_regularization_strength=...)`)
+is lost. Advise consumers of your SavedModel accordingly.
diff --git a/site/en/hub/tutorials/_index.yaml b/site/en/hub/tutorials/_index.yaml
new file mode 100644
index 00000000000..deb98108393
--- /dev/null
+++ b/site/en/hub/tutorials/_index.yaml
@@ -0,0 +1,174 @@
+book_path: /hub/_book.yaml
+project_path: /hub/_project.yaml
+title: Tutorials
+landing_page:
+  custom_css_path: /site-assets/css/style.css
+  nav: left
+  meta_tags:
+  - name: description
+    content: >
+      TensorFlow Hub tutorials to help you get started with using and adapting pre-trained
+      machine learning models to your needs.
+  rows:
+  # Getting started
+  - classname: devsite-landing-row-100
+    items:
+    - description: >
+        <h2 class="tfo-landing-page-heading no-link">Getting started</h2>
+        <p><a href="https://tfhub.dev">TensorFlow Hub</a> is a comprehensive repository of pre-trained
+        models ready for fine-tuning and deployable anywhere. Download the latest trained models
+        with a minimal amount of code with the <code>tensorflow_hub</code> library.</p>
+        <p>The following tutorials should help you getting
+        started with using and applying models from TF Hub for your needs. Interactive tutorials let you
+        modify them and execute them with your changes. Click the <i>Run in Google Colab</i>
+        button at the top of an interactive tutorial to tinker with it.</p>
+
+  # For beginners
+  - classname: devsite-landing-row-100
+    items:
+    - description: >
+        <h2 class="tfo-landing-page-heading no-link">For beginners</h2>
+        <p>If you are unfamiliar with machine learning and TensorFlow, you can start by getting
+        an overview of how to classify images and text, detecting objects in images, or by stylizing your own pictures like famous artwork:</p>
+
+  - classname: devsite-landing-row-100
+    items:
+    - classname: tfo-landing-page-card
+      description: >
+        <a href="/hub/tutorials/tf2_image_retraining"><h3 class="no-link">Image Classification</h3></a>
+        Build a Keras model on top of a pre-trained image classifier to distinguish flowers.
+      path: /hub/tutorials/tf2_image_retraining
+      image_path: /hub/images/image_classification.png
+    - classname: tfo-landing-page-card
+      description: >
+        <a href="/tutorials/text/classify_text_with_bert"><h3 class="no-link">Classify Text with BERT</h3></a>
+        Use BERT to build a Keras model to solve a text classificaton sentiment analysis task.
+      path: /tutorials/text/classify_text_with_bert
+      image_path: /hub/images/bert_preprocess.png
+    - classname: tfo-landing-page-card
+      description: >
+        <a href="/hub/tutorials/tf2_arbitrary_image_stylization">
+          <h3 class="no-link">Style Transfer</h3>
+        </a>
+        Let a neural network redraw an image in the style of Picasso, van Gogh or like your own style image.
+      path: /hub/tutorials/tf2_arbitrary_image_stylization
+      image_path: /hub/images/style_transfer.png
+    - classname: tfo-landing-page-card
+      description: >
+        <a href="/hub/tutorials/object_detection"><h3 class="no-link">Object Detection</h3></a>
+        Detect objects in images using models like FasterRCNN or SSD.
+      path: /hub/tutorials/tf2_object_detection
+      image_path: /hub/images/object_detection.png
+
+  # More advanced users
+  - classname: devsite-landing-row-100
+    items:
+    - description: >
+        <h2 class="tfo-landing-page-heading no-link">For experienced developers</h2>
+        <p>Check out more advanced tutorials for how to use NLP, images, audio, and video models from TensorFlow Hub.</p>
+
+  # NLP tutorials
+  - classname: devsite-landing-row-100
+    items:
+    - description: >
+        <h3 class="tfo-landing-page-heading no-link">NLP tutorials</h3>
+        <p>Solve common NLP tasks with models from TensorFlow Hub. View all available NLP tutorials in the left nav.</p>
+
+  - classname: devsite-landing-row-100
+    items:
+    - classname: tfo-landing-page-card
+      description: >
+        <a href="/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder"><h3 class="no-link">Semantic Similarity</h3></a>
+        Classify and semantically compare sentences with the Universal Sentence Encoder.
+      path: /hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder
+      image_path: /hub/images/similarity.png
+    - classname: tfo-landing-page-card
+      description: >
+        <a href="/tutorials/text/solve_glue_tasks_using_bert_on_tpu"><h3 class="no-link">BERT on TPU</h3></a>
+        Use BERT to solve GLUE benchmark tasks running on TPU.
+      path: /tutorials/text/solve_glue_tasks_using_bert_on_tpu
+      image_path: /hub/images/bert.png
+    - classname: tfo-landing-page-card
+      description: >
+        <a href="/hub/tutorials/retrieval_with_tf_hub_universal_encoder_qa"><h3 class="no-link">Multilingual Universal Sentence Encoder Q&A</h3></a>
+        Answer cross-lingual questions from the SQuAD dataset using the multilingual universal sentence encoder Q&A model.
+      path: /hub/tutorials/retrieval_with_tf_hub_universal_encoder_qa
+      image_path: /hub/images/colab_logo.svg
+
+  # Image tutorials
+  - classname: devsite-landing-row-100
+    items:
+    - description: >
+        <h3 class="tfo-landing-page-heading no-link">Image tutorials</h3>
+        <p>Explore how to use GANs, super resolution models and more. View all available image tutorials in the left nav.</p>
+
+  - classname: devsite-landing-row-100
+    items:
+    - classname: tfo-landing-page-card
+      description: >
+        <a href="/hub/tutorials/tf_hub_generative_image_module"><h3 class="no-link">GANS for Image Generation</h3></a>
+        Generate artificial faces and interpolate between them using GANs.
+      path: /hub/tutorials/tf_hub_generative_image_module
+      image_path: /hub/images/gan_faces.gif
+    - classname: tfo-landing-page-card
+      description: >
+        <a href="/hub/tutorials/image_enhancing"><h3 class="no-link">Super Resolution</h3></a>
+        Enhance the resolution of downsampled images.
+      path: /hub/tutorials/image_enhancing
+      image_path: /hub/images/super_resolution.png
+    - classname: tfo-landing-page-card
+      description: >
+        <a href="/hub/tutorials/tf_hub_delf_module"><h3 class="no-link">Image Extension</h3></a>
+        Fill the masked part of given images.
+      path: /hub/tutorials/boundless
+      image_path: /hub/images/boundless.png
+
+  # Audio tutorials
+  - classname: devsite-landing-row-100
+    items:
+    - description: >
+        <h3 class="tfo-landing-page-heading no-link">Audio tutorials</h3>
+        <p>Explore tutorials using trained models for audio data including pitch recognition and sound classification.</p>
+
+  - classname: devsite-landing-row-100
+    items:
+    - classname: tfo-landing-page-card
+      description: >
+        <a href="/hub/tutorials/spice"><h3 class="no-link">Pitch recognition</h3></a>
+        Record yourself singing and detect the pitch of your voice using the SPICE model.
+      path: /hub/tutorials/spice
+      image_path: /hub/images/spice_color.png
+    - classname: tfo-landing-page-card
+      description: >
+        <a href="/hub/tutorials/yamnet"><h3 class="no-link">Sound classification</h3></a>
+        Use the YAMNet model to classify sounds as 521 audio event classes from the AudioSet-YouTube corpus.
+      path: /hub/tutorials/yamnet
+      image_path: /hub/images/yamnet.png
+
+  # Video tutorials
+  - classname: devsite-landing-row-100
+    items:
+    - description: >
+        <h3 class="tfo-landing-page-heading no-link">Video tutorials</h3>
+        <p>Try out trained ML models for video data for action recognition, video interpolation, and more.</p>
+
+  - classname: devsite-landing-row-100
+    items:
+    - classname: tfo-landing-page-card
+      description: >
+        <a href="/hub/tutorials/action_recognition_with_tf_hub"><h3 class="no-link">Action Recognition</h3></a>
+        Detect one of 400 actions in a video using the Inflated 3D ConvNet model.
+      path: /hub/tutorials/action_recognition_with_tf_hub
+      image_path: /hub/images/action_recognition.gif
+    - classname: tfo-landing-page-card
+      description: >
+        <a href="/hub/tutorials/tweening_conv3d"><h3 class="no-link">Video Interpolation</h3></a>
+        Interpolate between video frames using Inbetweening with 3D Convolutions.
+      path: /hub/tutorials/tweening_conv3d
+      image_path: /hub/images/interpolation.png
+    - classname: tfo-landing-page-card
+      description: >
+        <a href="/hub/tutorials/text_to_video_retrieval_with_s3d_milnce"><h3 class="no-link">Text-to-Video Retrieval</h3></a>
+        Find videos that are the most related to a text query.
+      path: /hub/tutorials/text_to_video_retrieval_with_s3d_milnce
+      image_path: /hub/images/text_video.gif
diff --git a/site/en/hub/tutorials/_toc.yaml b/site/en/hub/tutorials/_toc.yaml
new file mode 100644
index 00000000000..04d95a267d7
--- /dev/null
+++ b/site/en/hub/tutorials/_toc.yaml
@@ -0,0 +1,118 @@
+toc:
+- heading: "Getting started"
+  style: divider
+- title: Overview
+  path: /hub/tutorials/_index.yaml
+
+- heading: "NLP Tutorials"
+  style: divider
+- title: Text classification
+  path: /hub/tutorials/tf2_text_classification
+- title: Classify text with BERT
+  path: /tutorials/text/classify_text_with_bert
+  status: external
+- title: BERT on TPU
+  path: /tutorials/text/solve_glue_tasks_using_bert_on_tpu
+  status: external
+- title: Real-time semantic search
+  path: /hub/tutorials/tf2_semantic_approximate_nearest_neighbors
+- title: Multilingual question answering
+  path: /hub/tutorials/retrieval_with_tf_hub_universal_encoder_qa
+- title: "Additional NLP tutorials"
+  style: accordion
+  section:
+  - title: BERT Experts
+    path: /hub/tutorials/bert_experts
+  - title: Semantic similarity
+    path: /hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder
+  - title: Text classification on Kaggle
+    path: /hub/tutorials/text_classification_with_tf_hub_on_kaggle
+  - title: Bangla article classifier
+    path: /hub/tutorials/bangla_article_classifier
+  - title: Explore CORD-19 text embeddings
+    path: /hub/tutorials/cord_19_embeddings_keras
+  - title: Multilingual universal sentence encoder
+    path: /hub/tutorials/cross_lingual_similarity_with_tf_hub_multilingual_universal_encoder
+  - title: Text cookbook
+    path: /hub/tutorials/text_cookbook
+  - title: SentEval for Universal Sentence Encoder CMLM model.
+    path: /hub/tutorials/senteval_for_universal_sentence_encoder_cmlm
+
+- heading: "Image Tutorials"
+  style: divider
+- title: Image classification
+  path: /hub/tutorials/image_classification
+- title: Transfer Learning for Image classification
+  path: /hub/tutorials/tf2_image_retraining
+- title: Style transfer
+  path: /hub/tutorials/tf2_arbitrary_image_stylization
+- title: Large-scale image retrieval with DELF
+  path: /hub/tutorials/tf_hub_delf_module
+- title: Object detection
+  path: /hub/tutorials/tf2_object_detection
+- title: GANs for image generation
+  path: /hub/tutorials/tf_hub_generative_image_module
+- title: Human Pose Estimation
+  path: /hub/tutorials/movenet
+- title: "Additional image tutorials"
+  style: accordion
+  section:
+  - title: "CropNet: Cassava Disease Detection"
+    path: /hub/tutorials/cropnet_cassava
+  - title: "CropNet: Fine tuning models for on-device inference"
+    path: /hub/tutorials/cropnet_on_device
+  - title: Boundless GAN
+    path: /hub/tutorials/boundless
+  - title: Super resolution
+    path: /hub/tutorials/image_enhancing
+  - title: HRNet model inference for semantic segmentation
+    path: /hub/tutorials/hrnet_semantic_segmentation
+    status: new
+
+- heading: "Audio Tutorials"
+  style: divider
+- title: Pitch recognition
+  path: /hub/tutorials/spice
+- title: Sound classification
+  path: /hub/tutorials/yamnet
+- title: Automatic speech recognition with Wav2Vec2
+  path: /hub/tutorials/wav2vec2_saved_model_finetuning
+
+- heading: "Video Tutorials"
+  style: divider
+- title: Frame interpolation with FILM
+  path: /hub/tutorials/tf_hub_film_example
+  status: new
+- title: Action recognition
+  path: /hub/tutorials/action_recognition_with_tf_hub
+- title: Streaming action recognition
+  path: /hub/tutorials/movinet
+- title: Video interpolation
+  path: /hub/tutorials/tweening_conv3d
+- title: Text-to-video retrieval
+  path: /hub/tutorials/text_to_video_retrieval_with_s3d_milnce
+
+- title: "Tutorials (TF1)"
+  style: accordion
+  status: deprecated
+  section:
+  - heading: "Image Tutorials"
+  - title: Image classification
+    path: /hub/tutorials/image_feature_vector
+  - title: Object detection
+    path: /hub/tutorials/object_detection
+  - title: BigGAN image generation
+    path: /hub/tutorials/biggan_generation_with_tf_hub
+  - title: BigBiGAN image generation
+    path: /hub/tutorials/bigbigan_with_tf_hub
+  - title: S3 GAN image generation
+    path: /hub/tutorials/s3gan_generation_with_tf_hub
+  - heading: "NLP Tutorials"
+  - title: Semantic similarity lite
+    path: /hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder_lite
+  - title: Nearest neighbor index for real-time semantic search
+    path: /hub/tutorials/semantic_approximate_nearest_neighbors
+  - title: Explore CORD-19 text embeddings
+    path: /hub/tutorials/cord_19_embeddings
+  - title: Wiki40B Language Models
+    path: /hub/tutorials/wiki40b_lm
diff --git a/site/en/hub/tutorials/action_recognition_with_tf_hub.ipynb b/site/en/hub/tutorials/action_recognition_with_tf_hub.ipynb
new file mode 100644
index 00000000000..3f586991ba9
--- /dev/null
+++ b/site/en/hub/tutorials/action_recognition_with_tf_hub.ipynb
@@ -0,0 +1,438 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "x8Q7Un821X1A"
+      },
+      "source": [
+        "##### Copyright 2018 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "1W4rIAFt1Ui3"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "cDq0CIKc1vO_"
+      },
+      "source": [
+        "# Action Recognition with an Inflated 3D CNN\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfBg1C5NB3X0"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/action_recognition_with_tf_hub\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/action_recognition_with_tf_hub.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/action_recognition_with_tf_hub.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/action_recognition_with_tf_hub.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/deepmind/i3d-kinetics-400/1\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub model</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "h6W3FhoP3TxC"
+      },
+      "source": [
+        "This Colab demonstrates recognizing actions in video data using the\n",
+        "[tfhub.dev/deepmind/i3d-kinetics-400/1](https://tfhub.dev/deepmind/i3d-kinetics-400/1) module. More models to detect actions in videos can be found [here](https://tfhub.dev/s?module-type=video-classification).\n",
+        "\n",
+        "The underlying model is described in the paper \"[Quo Vadis, Action Recognition? A New\n",
+        "Model and the Kinetics Dataset](https://arxiv.org/abs/1705.07750)\" by Joao\n",
+        "Carreira and Andrew Zisserman. The paper was posted on arXiv in May 2017, and\n",
+        "was published as a CVPR 2017 conference paper.\n",
+        "The source code is publicly available on\n",
+        "[github](https://github.com/deepmind/kinetics-i3d).\n",
+        "\n",
+        "\"Quo Vadis\" introduced a new architecture for video classification, the Inflated\n",
+        "3D Convnet or I3D. This architecture achieved state-of-the-art results on the UCF101\n",
+        "and HMDB51 datasets from fine-tuning these models. I3D models pre-trained on Kinetics\n",
+        "also placed first in the CVPR 2017 [Charades challenge](http://vuchallenge.org/charades.html).\n",
+        "\n",
+        "The original module was trained on the [kinetics-400 dateset](https://www.deepmind.com/open-source/kinetics)\n",
+        "and knows about 400 different actions.\n",
+        "Labels for these actions can be found in the\n",
+        "[label map file](https://github.com/deepmind/kinetics-i3d/blob/master/data/label_map.txt).\n",
+        "\n",
+        "In this Colab we will use it recognize activities in videos from a UCF101 dataset."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "R_0xc2jyNGRp"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "mOHMWsFnITdi"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install -q imageio\n",
+        "!pip install -q opencv-python\n",
+        "!pip install -q git+https://github.com/tensorflow/docs"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "both",
+        "id": "USf0UvkYIlKo"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Import the necessary modules\n",
+        "# TensorFlow and TF-Hub modules.\n",
+        "from absl import logging\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "import tensorflow_hub as hub\n",
+        "from tensorflow_docs.vis import embed\n",
+        "\n",
+        "logging.set_verbosity(logging.ERROR)\n",
+        "\n",
+        "# Some modules to help with reading the UCF101 dataset.\n",
+        "import random\n",
+        "import re\n",
+        "import os\n",
+        "import tempfile\n",
+        "import ssl\n",
+        "import cv2\n",
+        "import numpy as np\n",
+        "\n",
+        "# Some modules to display an animation using imageio.\n",
+        "import imageio\n",
+        "from IPython import display\n",
+        "\n",
+        "from urllib import request  # requires python3"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "both",
+        "id": "IuMMS3TGdws7"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Helper functions for the UCF101 dataset\n",
+        "\n",
+        "# Utilities to fetch videos from UCF101 dataset\n",
+        "UCF_ROOT = \"https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101/\"\n",
+        "_VIDEO_LIST = None\n",
+        "_CACHE_DIR = tempfile.mkdtemp()\n",
+        "# As of July 2020, crcv.ucf.edu doesn't use a certificate accepted by the\n",
+        "# default Colab environment anymore.\n",
+        "unverified_context = ssl._create_unverified_context()\n",
+        "\n",
+        "def list_ucf_videos():\n",
+        "  \"\"\"Lists videos available in UCF101 dataset.\"\"\"\n",
+        "  global _VIDEO_LIST\n",
+        "  if not _VIDEO_LIST:\n",
+        "    index = request.urlopen(UCF_ROOT, context=unverified_context).read().decode(\"utf-8\")\n",
+        "    videos = re.findall(\"(v_[\\w_]+\\.avi)\", index)\n",
+        "    _VIDEO_LIST = sorted(set(videos))\n",
+        "  return list(_VIDEO_LIST)\n",
+        "\n",
+        "def fetch_ucf_video(video):\n",
+        "  \"\"\"Fetches a video and cache into local filesystem.\"\"\"\n",
+        "  cache_path = os.path.join(_CACHE_DIR, video)\n",
+        "  if not os.path.exists(cache_path):\n",
+        "    urlpath = request.urljoin(UCF_ROOT, video)\n",
+        "    print(\"Fetching %s => %s\" % (urlpath, cache_path))\n",
+        "    data = request.urlopen(urlpath, context=unverified_context).read()\n",
+        "    open(cache_path, \"wb\").write(data)\n",
+        "  return cache_path\n",
+        "\n",
+        "# Utilities to open video files using CV2\n",
+        "def crop_center_square(frame):\n",
+        "  y, x = frame.shape[0:2]\n",
+        "  min_dim = min(y, x)\n",
+        "  start_x = (x // 2) - (min_dim // 2)\n",
+        "  start_y = (y // 2) - (min_dim // 2)\n",
+        "  return frame[start_y:start_y+min_dim,start_x:start_x+min_dim]\n",
+        "\n",
+        "def load_video(path, max_frames=0, resize=(224, 224)):\n",
+        "  cap = cv2.VideoCapture(path)\n",
+        "  frames = []\n",
+        "  try:\n",
+        "    while True:\n",
+        "      ret, frame = cap.read()\n",
+        "      if not ret:\n",
+        "        break\n",
+        "      frame = crop_center_square(frame)\n",
+        "      frame = cv2.resize(frame, resize)\n",
+        "      frame = frame[:, :, [2, 1, 0]]\n",
+        "      frames.append(frame)\n",
+        "      \n",
+        "      if len(frames) == max_frames:\n",
+        "        break\n",
+        "  finally:\n",
+        "    cap.release()\n",
+        "  return np.array(frames) / 255.0\n",
+        "\n",
+        "def to_gif(images):\n",
+        "  converted_images = np.clip(images * 255, 0, 255).astype(np.uint8)\n",
+        "  imageio.mimsave('./animation.gif', converted_images, duration=40)\n",
+        "  return embed.embed_file('./animation.gif')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "pIKTs-KneUfz"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Get the kinetics-400 labels\n",
+        "# Get the kinetics-400 action labels from the GitHub repository.\n",
+        "KINETICS_URL = \"https://raw.githubusercontent.com/deepmind/kinetics-i3d/master/data/label_map.txt\"\n",
+        "with request.urlopen(KINETICS_URL) as obj:\n",
+        "  labels = [line.decode(\"utf-8\").strip() for line in obj.readlines()]\n",
+        "print(\"Found %d labels.\" % len(labels))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GBvmjVICIp3W"
+      },
+      "source": [
+        "# Using the UCF101 dataset"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "V-QcxdhLIfi2"
+      },
+      "outputs": [],
+      "source": [
+        "# Get the list of videos in the dataset.\n",
+        "ucf_videos = list_ucf_videos()\n",
+        "  \n",
+        "categories = {}\n",
+        "for video in ucf_videos:\n",
+        "  category = video[2:-12]\n",
+        "  if category not in categories:\n",
+        "    categories[category] = []\n",
+        "  categories[category].append(video)\n",
+        "print(\"Found %d videos in %d categories.\" % (len(ucf_videos), len(categories)))\n",
+        "\n",
+        "for category, sequences in categories.items():\n",
+        "  summary = \", \".join(sequences[:2])\n",
+        "  print(\"%-20s %4d videos (%s, ...)\" % (category, len(sequences), summary))\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "c0ZvVDruN2nU"
+      },
+      "outputs": [],
+      "source": [
+        "# Get a sample cricket video.\n",
+        "video_path = fetch_ucf_video(\"v_CricketShot_g04_c02.avi\")\n",
+        "sample_video = load_video(video_path)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "hASLA90YFPTO"
+      },
+      "outputs": [],
+      "source": [
+        "sample_video.shape"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "POf5XgffvXlD"
+      },
+      "outputs": [],
+      "source": [
+        "i3d = hub.load(\"https://tfhub.dev/deepmind/i3d-kinetics-400/1\").signatures['default']"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mDXgaOD1zhMP"
+      },
+      "source": [
+        "Run the id3 model and print the top-5 action predictions."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "3mTbqA5JGYUx"
+      },
+      "outputs": [],
+      "source": [
+        "def predict(sample_video):\n",
+        "  # Add a batch axis to the sample video.\n",
+        "  model_input = tf.constant(sample_video, dtype=tf.float32)[tf.newaxis, ...]\n",
+        "\n",
+        "  logits = i3d(model_input)['default'][0]\n",
+        "  probabilities = tf.nn.softmax(logits)\n",
+        "\n",
+        "  print(\"Top 5 actions:\")\n",
+        "  for i in np.argsort(probabilities)[::-1][:5]:\n",
+        "    print(f\"  {labels[i]:22}: {probabilities[i] * 100:5.2f}%\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ykaXQcGRvK4E"
+      },
+      "outputs": [],
+      "source": [
+        "predict(sample_video)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PHsq0lHXCsD4"
+      },
+      "source": [
+        "Now try a new video, from: https://commons.wikimedia.org/wiki/Category:Videos_of_sports\n",
+        "\n",
+        "How about [this video](https://commons.wikimedia.org/wiki/File:End_of_a_jam.ogv) by Patrick Gillett: "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "p-mZ9fFPCoNq"
+      },
+      "outputs": [],
+      "source": [
+        "!curl -O https://upload.wikimedia.org/wikipedia/commons/8/86/End_of_a_jam.ogv"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "lpLmE8rjEbAF"
+      },
+      "outputs": [],
+      "source": [
+        "video_path = \"End_of_a_jam.ogv\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "CHZJ9qTLErhV"
+      },
+      "outputs": [],
+      "source": [
+        "sample_video = load_video(video_path)[:100]\n",
+        "sample_video.shape"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2ZNLkEZ9Er-c"
+      },
+      "outputs": [],
+      "source": [
+        "to_gif(sample_video)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "yskHIRbxEtjS"
+      },
+      "outputs": [],
+      "source": [
+        "predict(sample_video)"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [
+        "x8Q7Un821X1A"
+      ],
+      "name": "action_recognition_with_tf_hub.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/bangla_article_classifier.ipynb b/site/en/hub/tutorials/bangla_article_classifier.ipynb
new file mode 100644
index 00000000000..988a68c4023
--- /dev/null
+++ b/site/en/hub/tutorials/bangla_article_classifier.ipynb
@@ -0,0 +1,646 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "IDdZSPcLtKx4"
+      },
+      "source": [
+        "##### Copyright 2019 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "-g5By3P4tavy"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS, \n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vpaLrN0mteAS"
+      },
+      "source": [
+        "# Bangla Article Classification With TF-Hub"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfBg1C5NB3X0"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/bangla_article_classifier\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/bangla_article_classifier.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/bangla_article_classifier.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/bangla_article_classifier.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GhN2WtIrBQ4y"
+      },
+      "source": [
+        "Caution: In addition to installing Python packages with pip, this notebook uses\n",
+        "`sudo apt install` to install system packages: `unzip`.\n",
+        "\n",
+        "This Colab is a demonstration of using [Tensorflow Hub](https://www.tensorflow.org/hub/) for text classification in non-English/local languages. Here we choose [Bangla](https://en.wikipedia.org/wiki/Bengali_language) as the local language and use pretrained word embeddings to solve a multiclass classification task where we classify Bangla news articles in 5 categories.  The pretrained embeddings for Bangla comes from [fastText](https://fasttext.cc/docs/en/crawl-vectors.html) which is a library by Facebook with released pretrained word vectors for 157 languages. \n",
+        "\n",
+        "We'll use TF-Hub's pretrained embedding exporter for converting the word embeddings to a text embedding module first and then use the module to train a classifier with [tf.keras](https://www.tensorflow.org/api_docs/python/tf/keras), Tensorflow's high level user friendly API to build deep learning models.  Even if we are using fastText embeddings here, it's possible to export any other embeddings pretrained from other tasks and quickly get results with Tensorflow hub. "
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Q4DN769E2O_R"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9Vt-StAAZguA"
+      },
+      "outputs": [],
+      "source": [
+        "%%bash\n",
+        "# https://github.com/pypa/setuptools/issues/1694#issuecomment-466010982\n",
+        "pip install gdown --no-use-pep517"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "WcBA19FlDPZO"
+      },
+      "outputs": [],
+      "source": [
+        "%%bash\n",
+        "sudo apt-get install -y unzip"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "zSeyZMq-BYsu"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "import tensorflow_hub as hub\n",
+        "\n",
+        "import gdown\n",
+        "import numpy as np\n",
+        "from sklearn.metrics import classification_report\n",
+        "import matplotlib.pyplot as plt\n",
+        "import seaborn as sns"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9FB7gLU4F54l"
+      },
+      "source": [
+        "# Dataset\n",
+        "\n",
+        "We will use [BARD](https://www.researchgate.net/publication/328214545_BARD_Bangla_Article_Classification_Using_a_New_Comprehensive_Dataset) (Bangla Article Dataset) which has around 376,226 articles collected from different Bangla news portals and labelled with 5 categories: economy, state, international, sports, and entertainment. We download the file from Google Drive this ([bit.ly/BARD_DATASET](https://bit.ly/BARD_DATASET)) link is referring to from [this](https://github.com/tanvirfahim15/BARD-Bangla-Article-Classifier) GitHub repository.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "zdQrL_rwa-1K"
+      },
+      "outputs": [],
+      "source": [
+        "gdown.download(\n",
+        "    url='https://drive.google.com/uc?id=1Ag0jd21oRwJhVFIBohmX_ogeojVtapLy',\n",
+        "    output='bard.zip',\n",
+        "    quiet=True\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "P2YW4GGa9Y5o"
+      },
+      "outputs": [],
+      "source": [
+        "%%bash\n",
+        "unzip -qo bard.zip"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "js75OARBF_B8"
+      },
+      "source": [
+        "# Export pretrained word vectors to TF-Hub module"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-uAicYA6vLsf"
+      },
+      "source": [
+        "TF-Hub provides some useful scripts for converting word embeddings to TF-hub text embedding modules [here](https://github.com/tensorflow/hub/tree/master/examples/text_embeddings_v2). To make the module for Bangla or any other languages, we simply have to download the word embedding `.txt` or `.vec` file to the same directory as `export_v2.py` and run the script.\n",
+        "\n",
+        "\n",
+        "The exporter reads the embedding vectors and exports it to a Tensorflow [SavedModel](https://www.tensorflow.org/beta/guide/saved_model). A SavedModel contains a complete TensorFlow program including weights and graph. TF-Hub can load the SavedModel as a [module](https://www.tensorflow.org/hub/api_docs/python/hub/Module), which we will use to build the model for text classification. Since we are using `tf.keras` to build the model, we will use [hub.KerasLayer](https://www.tensorflow.org/hub/api_docs/python/hub/KerasLayer), which provides a wrapper for a TF-Hub module to use as a Keras Layer.\n",
+        "\n",
+        "First we will get our word embeddings from fastText and embedding exporter from TF-Hub [repo](https://github.com/tensorflow/hub).\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "5DY5Ze6pO1G5"
+      },
+      "outputs": [],
+      "source": [
+        "%%bash\n",
+        "curl -O https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bn.300.vec.gz\n",
+        "curl -O https://raw.githubusercontent.com/tensorflow/hub/master/examples/text_embeddings_v2/export_v2.py\n",
+        "gunzip -qf cc.bn.300.vec.gz --k"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PAzdNZaHmdl1"
+      },
+      "source": [
+        "Then, we will run the exporter script on our embedding file. Since fastText embeddings have a header line and are pretty large (around 3.3 GB for Bangla after converting to a module) we ignore the first line and export only the first 100, 000 tokens to the text embedding module."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Tkv5acr_Q9UU"
+      },
+      "outputs": [],
+      "source": [
+        "%%bash\n",
+        "python export_v2.py --embedding_file=cc.bn.300.vec --export_path=text_module --num_lines_to_ignore=1 --num_lines_to_use=100000"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "k9WEpmedF_3_"
+      },
+      "outputs": [],
+      "source": [
+        "module_path = \"text_module\"\n",
+        "embedding_layer = hub.KerasLayer(module_path, trainable=False)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fQHbmS_D4YIo"
+      },
+      "source": [
+        "The text embedding module takes a batch of sentences in a 1D tensor of strings as input and outputs the embedding vectors of shape (batch_size, embedding_dim) corresponding to the sentences. It preprocesses the input by splitting on spaces. Word embeddings are combined to sentence embeddings with the `sqrtn` combiner(See [here](https://www.tensorflow.org/api_docs/python/tf/nn/embedding_lookup_sparse)). For demonstration we pass a list of Bangla words as input and get the corresponding embedding vectors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Z1MBnaBUihWn"
+      },
+      "outputs": [],
+      "source": [
+        "embedding_layer(['বাস', 'বসবাস', 'ট্রেন', 'যাত্রী', 'ট্রাক']) "
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4KY8LiFOHmcd"
+      },
+      "source": [
+        "# Convert to Tensorflow Dataset \n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "pNguCDNe6bvz"
+      },
+      "source": [
+        "Since the dataset is really large instead of loading the entire dataset in memory we will use a generator to yield samples in run-time in batches using [Tensorflow Dataset](https://www.tensorflow.org/api_docs/python/tf/data/Dataset) functions. The dataset is also very imbalanced, so, before using the generator, we will shuffle the dataset. \n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "bYv6LqlEChO1"
+      },
+      "outputs": [],
+      "source": [
+        "dir_names = ['economy', 'sports', 'entertainment', 'state', 'international']\n",
+        "\n",
+        "file_paths = []\n",
+        "labels = []\n",
+        "for i, dir in enumerate(dir_names):\n",
+        "  file_names = [\"/\".join([dir, name]) for name in os.listdir(dir)]\n",
+        "  file_paths += file_names\n",
+        "  labels += [i] * len(os.listdir(dir))\n",
+        "  \n",
+        "np.random.seed(42)\n",
+        "permutation = np.random.permutation(len(file_paths))\n",
+        "\n",
+        "file_paths = np.array(file_paths)[permutation]\n",
+        "labels = np.array(labels)[permutation]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8b-UtAP5TL-W"
+      },
+      "source": [
+        "We can check the distribution of labels in the training and validation examples after shuffling."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "mimhWVSzzAmS"
+      },
+      "outputs": [],
+      "source": [
+        "train_frac = 0.8\n",
+        "train_size = int(len(file_paths) * train_frac)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "4BNXFrkotAYu"
+      },
+      "outputs": [],
+      "source": [
+        "# plot training vs validation distribution\n",
+        "plt.subplot(1, 2, 1)\n",
+        "plt.hist(labels[0:train_size])\n",
+        "plt.title(\"Train labels\")\n",
+        "plt.subplot(1, 2, 2)\n",
+        "plt.hist(labels[train_size:])\n",
+        "plt.title(\"Validation labels\")\n",
+        "plt.tight_layout()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "RVbHb2I3TUNA"
+      },
+      "source": [
+        "To create a [Dataset](https://www.tensorflow.org/api_docs/python/tf/data/Dataset) using a generator, we first write a generator function which reads each of the articles from `file_paths` and the labels from the label array, and yields one training example at each step. We pass this generator function to the [`tf.data.Dataset.from_generator`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_generator) method and specify the output types. Each training example is a tuple containing an article of `tf.string` data type and one-hot encoded label. We split the dataset with a train-validation split of 80-20 using [`tf.data.Dataset.skip`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#skip) and [`tf.data.Dataset.take`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#take) methods."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "eZRGTzEhUi7Q"
+      },
+      "outputs": [],
+      "source": [
+        "def load_file(path, label):\n",
+        "    return tf.io.read_file(path), label"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2g4nRflB7fbF"
+      },
+      "outputs": [],
+      "source": [
+        "def make_datasets(train_size):\n",
+        "  batch_size = 256\n",
+        "\n",
+        "  train_files = file_paths[:train_size]\n",
+        "  train_labels = labels[:train_size]\n",
+        "  train_ds = tf.data.Dataset.from_tensor_slices((train_files, train_labels))\n",
+        "  train_ds = train_ds.map(load_file).shuffle(5000)\n",
+        "  train_ds = train_ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)\n",
+        "\n",
+        "  test_files = file_paths[train_size:]\n",
+        "  test_labels = labels[train_size:]\n",
+        "  test_ds = tf.data.Dataset.from_tensor_slices((test_files, test_labels))\n",
+        "  test_ds = test_ds.map(load_file)\n",
+        "  test_ds = test_ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)\n",
+        "\n",
+        "\n",
+        "  return train_ds, test_ds"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "8PuuN6el8tv9"
+      },
+      "outputs": [],
+      "source": [
+        "train_data, validation_data = make_datasets(train_size)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MrdZI6FqPJNP"
+      },
+      "source": [
+        "# Model Training and Evaluation"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jgr7YScGVS58"
+      },
+      "source": [
+        "Since we have already added a wrapper around our module to use it as any other layer in Keras, we can create a small [Sequential](https://www.tensorflow.org/api_docs/python/tf/keras/Sequential) model which is a linear stack of layers. We can add our text embedding module with `model.add` just like any other layer. We compile the model by specifying the loss and optimizer and train it for 10 epochs. The `tf.keras` API can handle Tensorflow Datasets as input, so we can pass a Dataset instance to the fit method for model training. Since we are using the generator function, `tf.data` will handle generating the samples, batching them and feeding them to the model."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WhCqbDK2uUV5"
+      },
+      "source": [
+        "## Model"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "nHUw807XPPM9"
+      },
+      "outputs": [],
+      "source": [
+        "def create_model():\n",
+        "  model = tf.keras.Sequential([\n",
+        "    tf.keras.layers.Input(shape=[], dtype=tf.string),\n",
+        "    embedding_layer,\n",
+        "    tf.keras.layers.Dense(64, activation=\"relu\"),\n",
+        "    tf.keras.layers.Dense(16, activation=\"relu\"),\n",
+        "    tf.keras.layers.Dense(5),\n",
+        "  ])\n",
+        "  model.compile(loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
+        "      optimizer=\"adam\", metrics=['accuracy'])\n",
+        "  return model"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "5J4EXJUmPVNG"
+      },
+      "outputs": [],
+      "source": [
+        "model = create_model()\n",
+        "# Create earlystopping callback\n",
+        "early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=3)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ZZ7XJLg2u2No"
+      },
+      "source": [
+        "## Training"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "OoBkN2tAaXWD"
+      },
+      "outputs": [],
+      "source": [
+        "history = model.fit(train_data, \n",
+        "                    validation_data=validation_data, \n",
+        "                    epochs=5, \n",
+        "                    callbacks=[early_stopping_callback])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XoDk8otmMoT7"
+      },
+      "source": [
+        "## Evaluation"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "G5ZRKGOsXEh4"
+      },
+      "source": [
+        "We can visualize the accuracy and loss curves for training and validation data using the `tf.keras.callbacks.History` object returned by the `tf.keras.Model.fit` method, which contains the loss and accuracy value for each epoch."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "V6tOnByIOeGn"
+      },
+      "outputs": [],
+      "source": [
+        "# Plot training & validation accuracy values\n",
+        "plt.plot(history.history['accuracy'])\n",
+        "plt.plot(history.history['val_accuracy'])\n",
+        "plt.title('Model accuracy')\n",
+        "plt.ylabel('Accuracy')\n",
+        "plt.xlabel('Epoch')\n",
+        "plt.legend(['Train', 'Test'], loc='upper left')\n",
+        "plt.show()\n",
+        "\n",
+        "# Plot training & validation loss values\n",
+        "plt.plot(history.history['loss'])\n",
+        "plt.plot(history.history['val_loss'])\n",
+        "plt.title('Model loss')\n",
+        "plt.ylabel('Loss')\n",
+        "plt.xlabel('Epoch')\n",
+        "plt.legend(['Train', 'Test'], loc='upper left')\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "D54IXLqcG8Cq"
+      },
+      "source": [
+        "## Prediction\n",
+        "\n",
+        "We can get the predictions for the validation data and check the confusion matrix to see the model's performance for each of the 5 classes. Because `tf.keras.Model.predict` method returns an n-d array for probabilities for each class, they can be converted to class labels using `np.argmax`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dptEywzZJk4l"
+      },
+      "outputs": [],
+      "source": [
+        "y_pred = model.predict(validation_data)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "7Dzeml6Pk0ub"
+      },
+      "outputs": [],
+      "source": [
+        "y_pred = np.argmax(y_pred, axis=1)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "T4M3Lzg8jHcB"
+      },
+      "outputs": [],
+      "source": [
+        "samples = file_paths[0:3]\n",
+        "for i, sample in enumerate(samples):\n",
+        "  f = open(sample)\n",
+        "  text = f.read()\n",
+        "  print(text[0:100])\n",
+        "  print(\"True Class: \", sample.split(\"/\")[0])\n",
+        "  print(\"Predicted Class: \", dir_names[y_pred[i]])\n",
+        "  f.close()\n",
+        "  "
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PlDTIpMBu6h-"
+      },
+      "source": [
+        "## Compare Performance\n",
+        "\n",
+        "Now we can take the correct labels for the validation data from `labels` and compare them with our predictions to get a [classification_report](http://scikit-learn.org/stable/modules/generated/sklearn.metrics.classification_report.html). "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "mqrERUCS1Xn7"
+      },
+      "outputs": [],
+      "source": [
+        "y_true = np.array(labels[train_size:])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "NX5w-NuTKuVP"
+      },
+      "outputs": [],
+      "source": [
+        "print(classification_report(y_true, y_pred, target_names=dir_names))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "p5e9m3bV6oXK"
+      },
+      "source": [
+        "We can also compare our model's performance with the published results obtained in the original [paper](https://www.researchgate.net/publication/328214545_BARD_Bangla_Article_Classification_Using_a_New_Comprehensive_Dataset), which had a 0.96 precision .The original authors described many preprocessing steps performed on the dataset, such as dropping punctuations and digits, removing top 25 most frequest stop words. As we can see in the `classification_report`, we also manage to obtain a 0.96 precision and accuracy after training for only 5 epochs without any preprocessing! \n",
+        "\n",
+        "In this example, when we created the Keras layer from our embedding module, we set the parameter`trainable=False`, which means the embedding weights will not be updated during training. Try setting it to `True` to reach around 97% accuracy using this dataset after only 2 epochs. "
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [
+        "IDdZSPcLtKx4"
+      ],
+      "name": "bangla_article_classifier.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/bert_experts.ipynb b/site/en/hub/tutorials/bert_experts.ipynb
new file mode 100644
index 00000000000..5440909f7cb
--- /dev/null
+++ b/site/en/hub/tutorials/bert_experts.ipynb
@@ -0,0 +1,286 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-1vOMEXIhMQt"
+      },
+      "source": [
+        "##### Copyright 2020 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "pRfq9ZU5hQhg"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Copyright 2020 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mTL0TERThT6z"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/bert_experts\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/bert_experts.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/bert_experts.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/bert_experts.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/s?q=experts%2Fbert\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub models</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "FkthMlVk8bHp"
+      },
+      "source": [
+        "# BERT Experts from TF-Hub\n",
+        "\n",
+        "This colab demonstrates how to:\n",
+        "* Load BERT models from [TensorFlow Hub](https://tfhub.dev) that have been trained on different tasks including MNLI, SQuAD, and PubMed\n",
+        "* Use a matching preprocessing model to tokenize raw text and convert it to ids\n",
+        "* Generate the pooled and sequence output from the token input ids using the loaded model\n",
+        "* Look at the semantic similarity of the pooled outputs of different sentences\n",
+        "\n",
+        "#### Note: This colab should be run with a GPU runtime"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jspO02jDPfPG"
+      },
+      "source": [
+        "## Set up and imports"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "r-ed8zj-dbwm"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install --quiet \"tensorflow-text==2.11.*\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "czDmtrGKYw_5"
+      },
+      "outputs": [],
+      "source": [
+        "import seaborn as sns\n",
+        "from sklearn.metrics import pairwise\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "import tensorflow_hub as hub\n",
+        "import tensorflow_text as text  # Imports TF ops for preprocessing."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "GSuDcPSaY5aB"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Configure the model { run: \"auto\" }\n",
+        "BERT_MODEL = \"https://tfhub.dev/google/experts/bert/wiki_books/2\" # @param {type: \"string\"} [\"https://tfhub.dev/google/experts/bert/wiki_books/2\", \"https://tfhub.dev/google/experts/bert/wiki_books/mnli/2\", \"https://tfhub.dev/google/experts/bert/wiki_books/qnli/2\", \"https://tfhub.dev/google/experts/bert/wiki_books/qqp/2\", \"https://tfhub.dev/google/experts/bert/wiki_books/squad2/2\", \"https://tfhub.dev/google/experts/bert/wiki_books/sst2/2\",  \"https://tfhub.dev/google/experts/bert/pubmed/2\", \"https://tfhub.dev/google/experts/bert/pubmed/squad2/2\"]\n",
+        "# Preprocessing must match the model, but all the above use the same.\n",
+        "PREPROCESS_MODEL = \"https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3\""
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "pvaZiGVgwtqw"
+      },
+      "source": [
+        "## Sentences\n",
+        "\n",
+        "Let's take some sentences from Wikipedia to run through the model"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "tytu-rSpeDNG"
+      },
+      "outputs": [],
+      "source": [
+        "sentences = [\n",
+        "  \"Here We Go Then, You And I is a 1999 album by Norwegian pop artist Morten Abel. It was Abel's second CD as a solo artist.\",\n",
+        "  \"The album went straight to number one on the Norwegian album chart, and sold to double platinum.\",\n",
+        "  \"Among the singles released from the album were the songs \\\"Be My Lover\\\" and \\\"Hard To Stay Awake\\\".\",\n",
+        "  \"Riccardo Zegna is an Italian jazz musician.\",\n",
+        "  \"Rajko Maksimović is a composer, writer, and music pedagogue.\",\n",
+        "  \"One of the most significant Serbian composers of our time, Maksimović has been and remains active in creating works for different ensembles.\",\n",
+        "  \"Ceylon spinach is a common name for several plants and may refer to: Basella alba Talinum fruticosum\",\n",
+        "  \"A solar eclipse occurs when the Moon passes between Earth and the Sun, thereby totally or partly obscuring the image of the Sun for a viewer on Earth.\",\n",
+        "  \"A partial solar eclipse occurs in the polar regions of the Earth when the center of the Moon's shadow misses the Earth.\",\n",
+        "]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "zI39475kxCKh"
+      },
+      "source": [
+        "## Run the model\n",
+        "\n",
+        "We'll load the BERT model from TF-Hub, tokenize our sentences using the matching preprocessing model from TF-Hub, then feed in the tokenized sentences to the model. To keep this colab fast and simple, we recommend running on GPU.\n",
+        "\n",
+        "Go to **Runtime** → **Change runtime type** to make sure that **GPU** is selected"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "x4t6r22ErQg0"
+      },
+      "outputs": [],
+      "source": [
+        "preprocess = hub.load(PREPROCESS_MODEL)\n",
+        "bert = hub.load(BERT_MODEL)\n",
+        "inputs = preprocess(sentences)\n",
+        "outputs = bert(inputs)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "gItjCg4315Cv"
+      },
+      "outputs": [],
+      "source": [
+        "print(\"Sentences:\")\n",
+        "print(sentences)\n",
+        "\n",
+        "print(\"\\nBERT inputs:\")\n",
+        "print(inputs)\n",
+        "\n",
+        "print(\"\\nPooled embeddings:\")\n",
+        "print(outputs[\"pooled_output\"])\n",
+        "\n",
+        "print(\"\\nPer token embeddings:\")\n",
+        "print(outputs[\"sequence_output\"])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ptiW2mgw6x-l"
+      },
+      "source": [
+        "## Semantic similarity\n",
+        "\n",
+        "Now let's take a look at the `pooled_output` embeddings of our sentences and compare how similar they are across sentences."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "GXrSO2Vc1Qtr"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Helper functions\n",
+        "\n",
+        "def plot_similarity(features, labels):\n",
+        "  \"\"\"Plot a similarity matrix of the embeddings.\"\"\"\n",
+        "  cos_sim = pairwise.cosine_similarity(features)\n",
+        "  sns.set(font_scale=1.2)\n",
+        "  cbar_kws=dict(use_gridspec=False, location=\"left\")\n",
+        "  g = sns.heatmap(\n",
+        "      cos_sim, xticklabels=labels, yticklabels=labels,\n",
+        "      vmin=0, vmax=1, cmap=\"Blues\", cbar_kws=cbar_kws)\n",
+        "  g.tick_params(labelright=True, labelleft=False)\n",
+        "  g.set_yticklabels(labels, rotation=0)\n",
+        "  g.set_title(\"Semantic Textual Similarity\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "td6jcT0pJMZ5"
+      },
+      "outputs": [],
+      "source": [
+        "plot_similarity(outputs[\"pooled_output\"], sentences)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tJ4QCyzhSL7B"
+      },
+      "source": [
+        "## Learn more\n",
+        "\n",
+        "* Find more BERT models on [TensorFlow Hub](https://tfhub.dev)\n",
+        "* This notebook demonstrates simple inference with BERT, you can find a more advanced tutorial about fine-tuning BERT at [tensorflow.org/official_models/fine_tuning_bert](https://www.tensorflow.org/official_models/fine_tuning_bert)\n",
+        "* We used just one GPU chip to run the model, you can learn more about how to load models using tf.distribute at [tensorflow.org/tutorials/distribute/save_and_load](https://www.tensorflow.org/tutorials/distribute/save_and_load)"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [],
+      "name": "bert_experts.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/bigbigan_with_tf_hub.ipynb b/site/en/hub/tutorials/bigbigan_with_tf_hub.ipynb
new file mode 100644
index 00000000000..919abc7e354
--- /dev/null
+++ b/site/en/hub/tutorials/bigbigan_with_tf_hub.ipynb
@@ -0,0 +1,713 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "pLOYL1PJAAtK"
+      },
+      "source": [
+        "##### Copyright 2019 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "3fJWQ8WSAFhh"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-1NTVIH6ABK-"
+      },
+      "source": [
+        "# Generating Images with BigBiGAN\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfBg1C5NB3X0"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/bigbigan_with_tf_hub\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/bigbigan_with_tf_hub.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/bigbigan_with_tf_hub.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/bigbigan_with_tf_hub.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/s?q=experts%2Fbert\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub models</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "AVvOoEhswyZg"
+      },
+      "source": [
+        "This notebook is a demo for the *BigBiGAN* models available on [TF Hub](https://tfhub.dev/s?publisher=deepmind&q=bigbigan).\n",
+        "\n",
+        "BigBiGAN extends standard (Big)GANs by adding an *encoder* module which can be used for unsupervised representation learning. Roughly speaking, the encoder inverts the generator by predicting latents `z` given real data `x`. See the [BigBiGAN paper on arXiv](https://arxiv.org/abs/1907.02544) [1] for more information about these models.\n",
+        "\n",
+        "After connecting to a runtime, get started by following these instructions:\n",
+        "\n",
+        "1. (Optional) Update the selected **`module_path`** in the first code cell below to load a BigBiGAN generator for a different encoder architecture.\n",
+        "2. Click **Runtime > Run all** to run each cell in order. Afterwards, the outputs, including visualizations of BigBiGAN samples and reconstructions, should automatically appear below.\n",
+        "\n",
+        "Note: if you run into any issues, it can help to click **Runtime > Restart and run all...** to restart your runtime and rerun all cells from scratch.\n",
+        "\n",
+        "[1] Jeff Donahue and Karen Simonyan. [Large Scale Adversarial Representation Learning](https://arxiv.org/abs/1907.02544). *arxiv:1907.02544*, 2019."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DtGFwUKOA9jt"
+      },
+      "source": [
+        "First, set the module path.\n",
+        "By default, we load the BigBiGAN model with the smaller ResNet-50-based encoder from  **`https://tfhub.dev/deepmind/bigbigan-resnet50/1`**.\n",
+        "To load the larger RevNet-50-x4 based model used to achieve the best representation learning results, comment out the active **`module_path`** setting and uncomment the other."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "xoY9pl0FBoUS"
+      },
+      "outputs": [],
+      "source": [
+        "module_path = 'https://tfhub.dev/deepmind/bigbigan-resnet50/1'  # ResNet-50\n",
+        "# module_path = 'https://tfhub.dev/deepmind/bigbigan-revnet50x4/1'  # RevNet-50 x4"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Lr01cszC_vcC"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "TPdT-hYj1XXQ"
+      },
+      "outputs": [],
+      "source": [
+        "import io\n",
+        "import IPython.display\n",
+        "import PIL.Image\n",
+        "from pprint import pformat\n",
+        "\n",
+        "import numpy as np\n",
+        "\n",
+        "import tensorflow.compat.v1 as tf\n",
+        "tf.disable_v2_behavior()\n",
+        "\n",
+        "import tensorflow_hub as hub"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ouePZy6-CFJl"
+      },
+      "source": [
+        "## Define some functions to display images"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "MBQPtmrY2N91"
+      },
+      "outputs": [],
+      "source": [
+        "def imgrid(imarray, cols=4, pad=1, padval=255, row_major=True):\n",
+        "  \"\"\"Lays out a [N, H, W, C] image array as a single image grid.\"\"\"\n",
+        "  pad = int(pad)\n",
+        "  if pad < 0:\n",
+        "    raise ValueError('pad must be non-negative')\n",
+        "  cols = int(cols)\n",
+        "  assert cols >= 1\n",
+        "  N, H, W, C = imarray.shape\n",
+        "  rows = N // cols + int(N % cols != 0)\n",
+        "  batch_pad = rows * cols - N\n",
+        "  assert batch_pad >= 0\n",
+        "  post_pad = [batch_pad, pad, pad, 0]\n",
+        "  pad_arg = [[0, p] for p in post_pad]\n",
+        "  imarray = np.pad(imarray, pad_arg, 'constant', constant_values=padval)\n",
+        "  H += pad\n",
+        "  W += pad\n",
+        "  grid = (imarray\n",
+        "          .reshape(rows, cols, H, W, C)\n",
+        "          .transpose(0, 2, 1, 3, 4)\n",
+        "          .reshape(rows*H, cols*W, C))\n",
+        "  if pad:\n",
+        "    grid = grid[:-pad, :-pad]\n",
+        "  return grid\n",
+        "\n",
+        "def interleave(*args):\n",
+        "  \"\"\"Interleaves input arrays of the same shape along the batch axis.\"\"\"\n",
+        "  if not args:\n",
+        "    raise ValueError('At least one argument is required.')\n",
+        "  a0 = args[0]\n",
+        "  if any(a.shape != a0.shape for a in args):\n",
+        "    raise ValueError('All inputs must have the same shape.')\n",
+        "  if not a0.shape:\n",
+        "    raise ValueError('Inputs must have at least one axis.')\n",
+        "  out = np.transpose(args, [1, 0] + list(range(2, len(a0.shape) + 1)))\n",
+        "  out = out.reshape(-1, *a0.shape[1:])\n",
+        "  return out\n",
+        "\n",
+        "def imshow(a, format='png', jpeg_fallback=True):\n",
+        "  \"\"\"Displays an image in the given format.\"\"\"\n",
+        "  a = a.astype(np.uint8)\n",
+        "  data = io.BytesIO()\n",
+        "  PIL.Image.fromarray(a).save(data, format)\n",
+        "  im_data = data.getvalue()\n",
+        "  try:\n",
+        "    disp = IPython.display.display(IPython.display.Image(im_data))\n",
+        "  except IOError:\n",
+        "    if jpeg_fallback and format != 'jpeg':\n",
+        "      print ('Warning: image was too large to display in format \"{}\"; '\n",
+        "             'trying jpeg instead.').format(format)\n",
+        "      return imshow(a, format='jpeg')\n",
+        "    else:\n",
+        "      raise\n",
+        "  return disp\n",
+        "\n",
+        "def image_to_uint8(x):\n",
+        "  \"\"\"Converts [-1, 1] float array to [0, 255] uint8.\"\"\"\n",
+        "  x = np.asarray(x)\n",
+        "  x = (256. / 2.) * (x + 1.)\n",
+        "  x = np.clip(x, 0, 255)\n",
+        "  x = x.astype(np.uint8)\n",
+        "  return x"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8ASXPMb6CaXR"
+      },
+      "source": [
+        "## Load a BigBiGAN TF Hub module and display its available functionality"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "IuG7G1ToCtaf"
+      },
+      "outputs": [],
+      "source": [
+        "# module = hub.Module(module_path, trainable=True, tags={'train'})  # training\n",
+        "module = hub.Module(module_path)  # inference\n",
+        "\n",
+        "for signature in module.get_signature_names():\n",
+        "  print('Signature:', signature)\n",
+        "  print('Inputs:', pformat(module.get_input_info_dict(signature)))\n",
+        "  print('Outputs:', pformat(module.get_output_info_dict(signature)))\n",
+        "  print()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "sAY-AmcNCj9_"
+      },
+      "source": [
+        "## Define a wrapper class for convenient access to various functions"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "aTKHkxfx1dAL"
+      },
+      "outputs": [],
+      "source": [
+        "class BigBiGAN(object):\n",
+        "\n",
+        "  def __init__(self, module):\n",
+        "    \"\"\"Initialize a BigBiGAN from the given TF Hub module.\"\"\"\n",
+        "    self._module = module\n",
+        "\n",
+        "  def generate(self, z, upsample=False):\n",
+        "    \"\"\"Run a batch of latents z through the generator to generate images.\n",
+        "\n",
+        "    Args:\n",
+        "      z: A batch of 120D Gaussian latents, shape [N, 120].\n",
+        "\n",
+        "    Returns: a batch of generated RGB images, shape [N, 128, 128, 3], range\n",
+        "      [-1, 1].\n",
+        "    \"\"\"\n",
+        "    outputs = self._module(z, signature='generate', as_dict=True)\n",
+        "    return outputs['upsampled' if upsample else 'default']\n",
+        "\n",
+        "  def make_generator_ph(self):\n",
+        "    \"\"\"Creates a tf.placeholder with the dtype & shape of generator inputs.\"\"\"\n",
+        "    info = self._module.get_input_info_dict('generate')['z']\n",
+        "    return tf.placeholder(dtype=info.dtype, shape=info.get_shape())\n",
+        "\n",
+        "  def gen_pairs_for_disc(self, z):\n",
+        "    \"\"\"Compute generator input pairs (G(z), z) for discriminator, given z.\n",
+        "\n",
+        "    Args:\n",
+        "      z: A batch of latents (120D standard Gaussians), shape [N, 120].\n",
+        "\n",
+        "    Returns: a tuple (G(z), z) of discriminator inputs.\n",
+        "    \"\"\"\n",
+        "    # Downsample 256x256 image x for 128x128 discriminator input.\n",
+        "    x = self.generate(z)\n",
+        "    return x, z\n",
+        "\n",
+        "  def encode(self, x, return_all_features=False):\n",
+        "    \"\"\"Run a batch of images x through the encoder.\n",
+        "\n",
+        "    Args:\n",
+        "      x: A batch of data (256x256 RGB images), shape [N, 256, 256, 3], range\n",
+        "        [-1, 1].\n",
+        "      return_all_features: If True, return all features computed by the encoder.\n",
+        "        Otherwise (default) just return a sample z_hat.\n",
+        "\n",
+        "    Returns: the sample z_hat of shape [N, 120] (or a dict of all features if\n",
+        "      return_all_features).\n",
+        "    \"\"\"\n",
+        "    outputs = self._module(x, signature='encode', as_dict=True)\n",
+        "    return outputs if return_all_features else outputs['z_sample']\n",
+        "\n",
+        "  def make_encoder_ph(self):\n",
+        "    \"\"\"Creates a tf.placeholder with the dtype & shape of encoder inputs.\"\"\"\n",
+        "    info = self._module.get_input_info_dict('encode')['x']\n",
+        "    return tf.placeholder(dtype=info.dtype, shape=info.get_shape())\n",
+        "\n",
+        "  def enc_pairs_for_disc(self, x):\n",
+        "    \"\"\"Compute encoder input pairs (x, E(x)) for discriminator, given x.\n",
+        "\n",
+        "    Args:\n",
+        "      x: A batch of data (256x256 RGB images), shape [N, 256, 256, 3], range\n",
+        "        [-1, 1].\n",
+        "\n",
+        "    Returns: a tuple (downsample(x), E(x)) of discriminator inputs.\n",
+        "    \"\"\"\n",
+        "    # Downsample 256x256 image x for 128x128 discriminator input.\n",
+        "    x_down = tf.nn.avg_pool(x, ksize=2, strides=2, padding='SAME')\n",
+        "    z = self.encode(x)\n",
+        "    return x_down, z\n",
+        "\n",
+        "  def discriminate(self, x, z):\n",
+        "    \"\"\"Compute the discriminator scores for pairs of data (x, z).\n",
+        "\n",
+        "    (x, z) must be batches with the same leading batch dimension, and joint\n",
+        "      scores are computed on corresponding pairs x[i] and z[i].\n",
+        "\n",
+        "    Args:\n",
+        "      x: A batch of data (128x128 RGB images), shape [N, 128, 128, 3], range\n",
+        "        [-1, 1].\n",
+        "      z: A batch of latents (120D standard Gaussians), shape [N, 120].\n",
+        "\n",
+        "    Returns:\n",
+        "      A dict of scores:\n",
+        "        score_xz: the joint scores for the (x, z) pairs.\n",
+        "        score_x: the unary scores for x only.\n",
+        "        score_z: the unary scores for z only.\n",
+        "    \"\"\"\n",
+        "    inputs = dict(x=x, z=z)\n",
+        "    return self._module(inputs, signature='discriminate', as_dict=True)\n",
+        "\n",
+        "  def reconstruct_x(self, x, use_sample=True, upsample=False):\n",
+        "    \"\"\"Compute BigBiGAN reconstructions of images x via G(E(x)).\n",
+        "\n",
+        "    Args:\n",
+        "      x: A batch of data (256x256 RGB images), shape [N, 256, 256, 3], range\n",
+        "        [-1, 1].\n",
+        "      use_sample: takes a sample z_hat ~ E(x). Otherwise, deterministically\n",
+        "        use the mean. (Though a sample z_hat may be far from the mean z,\n",
+        "        typically the resulting recons G(z_hat) and G(z) are very\n",
+        "        similar.\n",
+        "      upsample: if set, upsample the reconstruction to the input resolution\n",
+        "        (256x256). Otherwise return the raw lower resolution generator output\n",
+        "        (128x128).\n",
+        "\n",
+        "    Returns: a batch of recons G(E(x)), shape [N, 256, 256, 3] if\n",
+        "      `upsample`, otherwise [N, 128, 128, 3].\n",
+        "    \"\"\"\n",
+        "    if use_sample:\n",
+        "      z = self.encode(x)\n",
+        "    else:\n",
+        "      z = self.encode(x, return_all_features=True)['z_mean']\n",
+        "    recons = self.generate(z, upsample=upsample)\n",
+        "    return recons\n",
+        "\n",
+        "  def losses(self, x, z):\n",
+        "    \"\"\"Compute per-module BigBiGAN losses given data & latent sample batches.\n",
+        "\n",
+        "    Args:\n",
+        "      x: A batch of data (256x256 RGB images), shape [N, 256, 256, 3], range\n",
+        "        [-1, 1].\n",
+        "      z: A batch of latents (120D standard Gaussians), shape [M, 120].\n",
+        "\n",
+        "    For the original BigBiGAN losses, pass batches of size N=M=2048, with z's\n",
+        "    sampled from a 120D standard Gaussian (e.g., np.random.randn(2048, 120)),\n",
+        "    and x's sampled from the ImageNet (ILSVRC2012) training set with the\n",
+        "    \"ResNet-style\" preprocessing from:\n",
+        "\n",
+        "        https://github.com/tensorflow/tpu/blob/master/models/official/resnet/resnet_preprocessing.py\n",
+        "\n",
+        "    Returns:\n",
+        "      A dict of per-module losses:\n",
+        "        disc: loss for the discriminator.\n",
+        "        enc: loss for the encoder.\n",
+        "        gen: loss for the generator.\n",
+        "    \"\"\"\n",
+        "    # Compute discriminator scores on (x, E(x)) pairs.\n",
+        "    # Downsample 256x256 image x for 128x128 discriminator input.\n",
+        "    scores_enc_x_dict = self.discriminate(*self.enc_pairs_for_disc(x))\n",
+        "    scores_enc_x = tf.concat([scores_enc_x_dict['score_xz'],\n",
+        "                              scores_enc_x_dict['score_x'],\n",
+        "                              scores_enc_x_dict['score_z']], axis=0)\n",
+        "\n",
+        "    # Compute discriminator scores on (G(z), z) pairs.\n",
+        "    scores_gen_z_dict = self.discriminate(*self.gen_pairs_for_disc(z))\n",
+        "    scores_gen_z = tf.concat([scores_gen_z_dict['score_xz'],\n",
+        "                              scores_gen_z_dict['score_x'],\n",
+        "                              scores_gen_z_dict['score_z']], axis=0)\n",
+        "\n",
+        "    disc_loss_enc_x = tf.reduce_mean(tf.nn.relu(1. - scores_enc_x))\n",
+        "    disc_loss_gen_z = tf.reduce_mean(tf.nn.relu(1. + scores_gen_z))\n",
+        "    disc_loss = disc_loss_enc_x + disc_loss_gen_z\n",
+        "\n",
+        "    enc_loss = tf.reduce_mean(scores_enc_x)\n",
+        "    gen_loss = tf.reduce_mean(-scores_gen_z)\n",
+        "\n",
+        "    return dict(disc=disc_loss, enc=enc_loss, gen=gen_loss)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5L5SFfH4C9gu"
+      },
+      "source": [
+        "## Create tensors to be used later for computing samples, reconstructions, discriminator scores, and losses"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "goxtzcb-19NA"
+      },
+      "outputs": [],
+      "source": [
+        "bigbigan = BigBiGAN(module)\n",
+        "\n",
+        "# Make input placeholders for x (`enc_ph`) and z (`gen_ph`).\n",
+        "enc_ph = bigbigan.make_encoder_ph()\n",
+        "gen_ph = bigbigan.make_generator_ph()\n",
+        "\n",
+        "# Compute samples G(z) from encoder input z (`gen_ph`).\n",
+        "gen_samples = bigbigan.generate(gen_ph)\n",
+        "\n",
+        "# Compute reconstructions G(E(x)) of encoder input x (`enc_ph`).\n",
+        "recon_x = bigbigan.reconstruct_x(enc_ph, upsample=True)\n",
+        "\n",
+        "# Compute encoder features used for representation learning evaluations given\n",
+        "# encoder input x (`enc_ph`).\n",
+        "enc_features = bigbigan.encode(enc_ph, return_all_features=True)\n",
+        "\n",
+        "# Compute discriminator scores for encoder pairs (x, E(x)) given x (`enc_ph`)\n",
+        "# and generator pairs (G(z), z) given z (`gen_ph`).\n",
+        "disc_scores_enc = bigbigan.discriminate(*bigbigan.enc_pairs_for_disc(enc_ph))\n",
+        "disc_scores_gen = bigbigan.discriminate(*bigbigan.gen_pairs_for_disc(gen_ph))\n",
+        "\n",
+        "# Compute losses.\n",
+        "losses = bigbigan.losses(enc_ph, gen_ph)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ly7LWnSUDQ_P"
+      },
+      "source": [
+        "## Create a TensorFlow session and initialize variables"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "CPnzCHDWFJwx"
+      },
+      "outputs": [],
+      "source": [
+        "init = tf.global_variables_initializer()\n",
+        "sess = tf.Session()\n",
+        "sess.run(init)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gcEVS26D-ues"
+      },
+      "source": [
+        "# Generator samples"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "LYSA8Zvb-w7S"
+      },
+      "source": [
+        "First, we'll visualize samples from the pretrained BigBiGAN generator by sampling generator inputs `z` from a standard Gaussian (via `np.random.randn`) and displaying the images it produces. So far we're not going beyond the capabilites of a standard GAN -- we're just using the generator (and ignoring the encoder) for now."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9zfpvw8fGNMr"
+      },
+      "outputs": [],
+      "source": [
+        "feed_dict = {gen_ph: np.random.randn(32, 120)}\n",
+        "_out_samples = sess.run(gen_samples, feed_dict=feed_dict)\n",
+        "print('samples shape:', _out_samples.shape)\n",
+        "imshow(imgrid(image_to_uint8(_out_samples), cols=4))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9v58CTfl8jTc"
+      },
+      "source": [
+        "# Load `test_images` from the TF-Flowers dataset"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "o0kmzQ4EqKJt"
+      },
+      "source": [
+        "BigBiGAN is trained on ImageNet, but as it's too large to work with in this demo, we use the smaller TF-Flowers [1] dataset as our inputs for visualizing reconstructions and computing encoder features.\n",
+        "\n",
+        "In this cell we load TF-Flowers (downloading the dataset if needed) and store a fixed batch of 256x256 RGB image samples in a NumPy array `test_images`.\n",
+        "\n",
+        "[1] https://www.tensorflow.org/datasets/catalog/tf_flowers"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "OBgpkMdkUjL-"
+      },
+      "outputs": [],
+      "source": [
+        "def get_flowers_data():\n",
+        "  \"\"\"Returns a [32, 256, 256, 3] np.array of preprocessed TF-Flowers samples.\"\"\"\n",
+        "  import tensorflow_datasets as tfds\n",
+        "  ds, info = tfds.load('tf_flowers', split='train', with_info=True)\n",
+        "\n",
+        "  # Just get the images themselves as we don't need labels for this demo.\n",
+        "  ds = ds.map(lambda x: x['image'])\n",
+        "\n",
+        "  # Filter out small images (with minor edge length <256).\n",
+        "  ds = ds.filter(lambda x: tf.reduce_min(tf.shape(x)[:2]) >= 256)\n",
+        "\n",
+        "  # Take the center square crop of the image and resize to 256x256.\n",
+        "  def crop_and_resize(image):\n",
+        "    imsize = tf.shape(image)[:2]\n",
+        "    minor_edge = tf.reduce_min(imsize)\n",
+        "    start = (imsize - minor_edge) // 2\n",
+        "    stop = start + minor_edge\n",
+        "    cropped_image = image[start[0] : stop[0], start[1] : stop[1]]\n",
+        "    resized_image = tf.image.resize_bicubic([cropped_image], [256, 256])[0]\n",
+        "    return resized_image\n",
+        "  ds = ds.map(crop_and_resize)\n",
+        "\n",
+        "  # Convert images from [0, 255] uint8 to [-1, 1] float32.\n",
+        "  ds = ds.map(lambda image: tf.cast(image, tf.float32) / (255. / 2.) - 1)\n",
+        "\n",
+        "  # Take the first 32 samples.\n",
+        "  ds = ds.take(32)\n",
+        "\n",
+        "  return np.array(list(tfds.as_numpy(ds)))\n",
+        "\n",
+        "test_images = get_flowers_data()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "QAFJQU597n2A"
+      },
+      "source": [
+        "# Reconstructions"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "EmCQ9N9b7ptM"
+      },
+      "source": [
+        "Now we visualize BigBiGAN reconstructions by passing real images through the encoder and back through the generator, computing `G(E(x))` given images `x`.\n",
+        "Below, input images `x` are shown in the left column, and corresponding reconstructions are shown on the right.\n",
+        "\n",
+        "Note that reconstructions are not pixel-perfect matches to the input images; rather, they tend to capture the higher level semantic content of the input while \"forgetting\" most of the low-level detail. This suggests the BigBiGAN encoder may learn to capture the types of high level semantic information about images that we'd like to see in a representation learning approach.\n",
+        "\n",
+        "Also note that the raw reconstructions of the 256x256 input images are at the lower resolution produced by our generator -- 128x128. We upsample them for visualization purposes."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "R2F3eq8aFRle"
+      },
+      "outputs": [],
+      "source": [
+        "test_images_batch = test_images[:16]\n",
+        "_out_recons = sess.run(recon_x, feed_dict={enc_ph: test_images_batch})\n",
+        "print('reconstructions shape:', _out_recons.shape)\n",
+        "\n",
+        "inputs_and_recons = interleave(test_images_batch, _out_recons)\n",
+        "print('inputs_and_recons shape:', inputs_and_recons.shape)\n",
+        "imshow(imgrid(image_to_uint8(inputs_and_recons), cols=2))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "zPpW3qdbEpXL"
+      },
+      "source": [
+        "# Encoder features"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2gAW76YxEsZa"
+      },
+      "source": [
+        "We now demonstrate how to compute features from the encoder used for standard representation learning evaluations.\n",
+        "\n",
+        "These features could be used in a linear or nearest neighbors-based classifier. We include the standard feature taken after the global average pooling (key `avepool_feat`) as well as the larger \"BN+CReLU\" feature (key `bn_crelu_feat`) used to achieve the best results."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "hpZYe5S_FQEw"
+      },
+      "outputs": [],
+      "source": [
+        "_out_features = sess.run(enc_features, feed_dict={enc_ph: test_images_batch})\n",
+        "print('AvePool features shape:', _out_features['avepool_feat'].shape)\n",
+        "print('BN+CReLU features shape:', _out_features['bn_crelu_feat'].shape)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "TGzahsms2w9a"
+      },
+      "source": [
+        "# Discriminator scores and losses"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "B2_5BIBN21Hr"
+      },
+      "source": [
+        "Finally, we'll compute the discriminator scores and losses on batches of encoder and generator pairs. These losses could be passed into an optimizer to train BigBiGAN.\n",
+        "\n",
+        "We use our batch of images above as the encoder inputs `x`, computing the encoder score as `D(x, E(x))`. For the generator inputs we sample `z` from a 120D standard Gaussian via `np.random.randn`, computing the generator score as `D(G(z), z)`.\n",
+        "\n",
+        "The discriminator predicts a joint score `score_xz` for the `(x, z)` pairs as well as unary scores `score_x` and `score_z` for `x` and `z` alone, respectively. It's trained to give high (positive) scores to encoder pairs and low (negative) scores to generator pairs. This mostly holds below, though the unary `score_z` is negative in both cases, indicating that the encoder outputs `E(x)` resemble actual samples from a Gaussian."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "8JJ8Go0dr22-"
+      },
+      "outputs": [],
+      "source": [
+        "feed_dict = {enc_ph: test_images, gen_ph: np.random.randn(32, 120)}\n",
+        "_out_scores_enc, _out_scores_gen, _out_losses = sess.run(\n",
+        "    [disc_scores_enc, disc_scores_gen, losses], feed_dict=feed_dict)\n",
+        "print('Encoder scores:', {k: v.mean() for k, v in _out_scores_enc.items()})\n",
+        "print('Generator scores:', {k: v.mean() for k, v in _out_scores_gen.items()})\n",
+        "print('Losses:', _out_losses)"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [
+        "9v58CTfl8jTc"
+      ],
+      "name": "bigbigan_with_tf_hub.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/biggan_generation_with_tf_hub.ipynb b/site/en/hub/tutorials/biggan_generation_with_tf_hub.ipynb
new file mode 100644
index 00000000000..e388f91fbcc
--- /dev/null
+++ b/site/en/hub/tutorials/biggan_generation_with_tf_hub.ipynb
@@ -0,0 +1,421 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "pLOYL1PJAAtK"
+      },
+      "source": [
+        "##### Copyright 2018 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "3fJWQ8WSAFhh"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Cd1dhL4Ykbm7"
+      },
+      "source": [
+        "# Generating Images with BigGAN\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfBg1C5NB3X0"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/biggan_generation_with_tf_hub\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/biggan_generation_with_tf_hub.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/biggan_generation_with_tf_hub.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/biggan_generation_with_tf_hub.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/s?q=deepmind%2Fbiggan\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub models</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-1NTVIH6ABK-"
+      },
+      "source": [
+        "This notebook is a demo for the *BigGAN* image generators available on [TF Hub](https://tfhub.dev/s?publisher=deepmind&q=biggan).\n",
+        "\n",
+        "See the [BigGAN paper on arXiv](https://arxiv.org/abs/1809.11096) [1] for more information about these models.\n",
+        "\n",
+        "After connecting to a runtime, get started by following these instructions:\n",
+        "\n",
+        "1. (Optional) Update the selected **`module_path`** in the first code cell below to load a BigGAN generator for a different image resolution.\n",
+        "2. Click **Runtime > Run all** to run each cell in order.\n",
+        "  * Afterwards, the interactive visualizations should update automatically when you modify the settings using the sliders and dropdown menus.\n",
+        "  * If not, press the **Play** button by the cell to re-render outputs manually.\n",
+        "\n",
+        "Note: if you run into any issues, it can help to click **Runtime > Restart and run all...** to restart your runtime and rerun all cells from scratch.\n",
+        "\n",
+        "[1] Andrew Brock, Jeff Donahue, and Karen Simonyan. [Large Scale GAN Training for High Fidelity Natural Image Synthesis](https://arxiv.org/abs/1809.11096). *arxiv:1809.11096*, 2018."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XS1_N6hKj8cz"
+      },
+      "source": [
+        "First, set the module path.\n",
+        "By default, we load the BigGAN-deep generator for 256x256 images from **`https://tfhub.dev/deepmind/biggan-deep-256/1`**.\n",
+        "To generate 128x128 or 512x512 images or to use the original BigGAN generators, comment out the active **`module_path`** setting and uncomment one of the others."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "OJCIhQPClKJ1"
+      },
+      "outputs": [],
+      "source": [
+        "# BigGAN-deep models\n",
+        "# module_path = 'https://tfhub.dev/deepmind/biggan-deep-128/1'  # 128x128 BigGAN-deep\n",
+        "module_path = 'https://tfhub.dev/deepmind/biggan-deep-256/1'  # 256x256 BigGAN-deep\n",
+        "# module_path = 'https://tfhub.dev/deepmind/biggan-deep-512/1'  # 512x512 BigGAN-deep\n",
+        "\n",
+        "# BigGAN (original) models\n",
+        "# module_path = 'https://tfhub.dev/deepmind/biggan-128/2'  # 128x128 BigGAN\n",
+        "# module_path = 'https://tfhub.dev/deepmind/biggan-256/2'  # 256x256 BigGAN\n",
+        "# module_path = 'https://tfhub.dev/deepmind/biggan-512/2'  # 512x512 BigGAN"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JJrTM6hAi0CJ"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "lOZnst2jeWDL"
+      },
+      "outputs": [],
+      "source": [
+        "import tensorflow.compat.v1 as tf\n",
+        "tf.disable_v2_behavior()\n",
+        "\n",
+        "import os\n",
+        "import io\n",
+        "import IPython.display\n",
+        "import numpy as np\n",
+        "import PIL.Image\n",
+        "from scipy.stats import truncnorm\n",
+        "import tensorflow_hub as hub\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "stWb21nlcyCm"
+      },
+      "source": [
+        "## Load a BigGAN generator module from TF Hub"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "tVgwgJiCH3PV"
+      },
+      "outputs": [],
+      "source": [
+        "tf.reset_default_graph()\n",
+        "print('Loading BigGAN module from:', module_path)\n",
+        "module = hub.Module(module_path)\n",
+        "inputs = {k: tf.placeholder(v.dtype, v.get_shape().as_list(), k)\n",
+        "          for k, v in module.get_input_info_dict().items()}\n",
+        "output = module(inputs)\n",
+        "\n",
+        "print()\n",
+        "print('Inputs:\\n', '\\n'.join(\n",
+        "    '  {}: {}'.format(*kv) for kv in inputs.items()))\n",
+        "print()\n",
+        "print('Output:', output)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ry62-8SWfuds"
+      },
+      "source": [
+        "## Define some functions for sampling and displaying BigGAN images"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "46M8prJPDEsV"
+      },
+      "outputs": [],
+      "source": [
+        "input_z = inputs['z']\n",
+        "input_y = inputs['y']\n",
+        "input_trunc = inputs['truncation']\n",
+        "\n",
+        "dim_z = input_z.shape.as_list()[1]\n",
+        "vocab_size = input_y.shape.as_list()[1]\n",
+        "\n",
+        "def truncated_z_sample(batch_size, truncation=1., seed=None):\n",
+        "  state = None if seed is None else np.random.RandomState(seed)\n",
+        "  values = truncnorm.rvs(-2, 2, size=(batch_size, dim_z), random_state=state)\n",
+        "  return truncation * values\n",
+        "\n",
+        "def one_hot(index, vocab_size=vocab_size):\n",
+        "  index = np.asarray(index)\n",
+        "  if len(index.shape) == 0:\n",
+        "    index = np.asarray([index])\n",
+        "  assert len(index.shape) == 1\n",
+        "  num = index.shape[0]\n",
+        "  output = np.zeros((num, vocab_size), dtype=np.float32)\n",
+        "  output[np.arange(num), index] = 1\n",
+        "  return output\n",
+        "\n",
+        "def one_hot_if_needed(label, vocab_size=vocab_size):\n",
+        "  label = np.asarray(label)\n",
+        "  if len(label.shape) <= 1:\n",
+        "    label = one_hot(label, vocab_size)\n",
+        "  assert len(label.shape) == 2\n",
+        "  return label\n",
+        "\n",
+        "def sample(sess, noise, label, truncation=1., batch_size=8,\n",
+        "           vocab_size=vocab_size):\n",
+        "  noise = np.asarray(noise)\n",
+        "  label = np.asarray(label)\n",
+        "  num = noise.shape[0]\n",
+        "  if len(label.shape) == 0:\n",
+        "    label = np.asarray([label] * num)\n",
+        "  if label.shape[0] != num:\n",
+        "    raise ValueError('Got # noise samples ({}) != # label samples ({})'\n",
+        "                     .format(noise.shape[0], label.shape[0]))\n",
+        "  label = one_hot_if_needed(label, vocab_size)\n",
+        "  ims = []\n",
+        "  for batch_start in range(0, num, batch_size):\n",
+        "    s = slice(batch_start, min(num, batch_start + batch_size))\n",
+        "    feed_dict = {input_z: noise[s], input_y: label[s], input_trunc: truncation}\n",
+        "    ims.append(sess.run(output, feed_dict=feed_dict))\n",
+        "  ims = np.concatenate(ims, axis=0)\n",
+        "  assert ims.shape[0] == num\n",
+        "  ims = np.clip(((ims + 1) / 2.0) * 256, 0, 255)\n",
+        "  ims = np.uint8(ims)\n",
+        "  return ims\n",
+        "\n",
+        "def interpolate(A, B, num_interps):\n",
+        "  if A.shape != B.shape:\n",
+        "    raise ValueError('A and B must have the same shape to interpolate.')\n",
+        "  alphas = np.linspace(0, 1, num_interps)\n",
+        "  return np.array([(1-a)*A + a*B for a in alphas])\n",
+        "\n",
+        "def imgrid(imarray, cols=5, pad=1):\n",
+        "  if imarray.dtype != np.uint8:\n",
+        "    raise ValueError('imgrid input imarray must be uint8')\n",
+        "  pad = int(pad)\n",
+        "  assert pad >= 0\n",
+        "  cols = int(cols)\n",
+        "  assert cols >= 1\n",
+        "  N, H, W, C = imarray.shape\n",
+        "  rows = N // cols + int(N % cols != 0)\n",
+        "  batch_pad = rows * cols - N\n",
+        "  assert batch_pad >= 0\n",
+        "  post_pad = [batch_pad, pad, pad, 0]\n",
+        "  pad_arg = [[0, p] for p in post_pad]\n",
+        "  imarray = np.pad(imarray, pad_arg, 'constant', constant_values=255)\n",
+        "  H += pad\n",
+        "  W += pad\n",
+        "  grid = (imarray\n",
+        "          .reshape(rows, cols, H, W, C)\n",
+        "          .transpose(0, 2, 1, 3, 4)\n",
+        "          .reshape(rows*H, cols*W, C))\n",
+        "  if pad:\n",
+        "    grid = grid[:-pad, :-pad]\n",
+        "  return grid\n",
+        "\n",
+        "def imshow(a, format='png', jpeg_fallback=True):\n",
+        "  a = np.asarray(a, dtype=np.uint8)\n",
+        "  data = io.BytesIO()\n",
+        "  PIL.Image.fromarray(a).save(data, format)\n",
+        "  im_data = data.getvalue()\n",
+        "  try:\n",
+        "    disp = IPython.display.display(IPython.display.Image(im_data))\n",
+        "  except IOError:\n",
+        "    if jpeg_fallback and format != 'jpeg':\n",
+        "      print(('Warning: image was too large to display in format \"{}\"; '\n",
+        "             'trying jpeg instead.').format(format))\n",
+        "      return imshow(a, format='jpeg')\n",
+        "    else:\n",
+        "      raise\n",
+        "  return disp"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "uCeCg3Sdf8Nv"
+      },
+      "source": [
+        "## Create a TensorFlow session and initialize variables"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "rYJor5bOaVn1"
+      },
+      "outputs": [],
+      "source": [
+        "initializer = tf.global_variables_initializer()\n",
+        "sess = tf.Session()\n",
+        "sess.run(initializer)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "SeZ7u3rWd9jz"
+      },
+      "source": [
+        "# Explore BigGAN samples of a particular category\n",
+        "\n",
+        "Try varying the **`truncation`** value.\n",
+        "\n",
+        "(Double-click on the cell to view code.)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "HuCO9tv3IKT2"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Category-conditional sampling { display-mode: \"form\", run: \"auto\" }\n",
+        "\n",
+        "num_samples = 10 #@param {type:\"slider\", min:1, max:20, step:1}\n",
+        "truncation = 0.4 #@param {type:\"slider\", min:0.02, max:1, step:0.02}\n",
+        "noise_seed = 0 #@param {type:\"slider\", min:0, max:100, step:1}\n",
+        "category = \"933) cheeseburger\"\n", 
+        "\n",
+        "z = truncated_z_sample(num_samples, truncation, noise_seed)\n",
+        "y = int(category.split(')')[0])\n",
+        "\n",
+        "ims = sample(sess, z, y, truncation=truncation)\n",
+        "imshow(imgrid(ims, cols=min(num_samples, 5)))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hHNXtvuQgKwa"
+      },
+      "source": [
+        "# Interpolate between BigGAN samples\n",
+        "\n",
+        "Try setting different **`category`**s  with the same **`noise_seed`**s, or the same **`category`**s with different **`noise_seed`**s. Or go wild and set both any way you like!\n",
+        "\n",
+        "(Double-click on the cell to view code.)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dSAyfDfnVugs"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Interpolation { display-mode: \"form\", run: \"auto\" }\n",
+        "\n",
+        "num_samples = 2 #@param {type:\"slider\", min:1, max:5, step:1}\n",
+        "num_interps = 5 #@param {type:\"slider\", min:2, max:10, step:1}\n",
+        "truncation = 0.2 #@param {type:\"slider\", min:0.02, max:1, step:0.02}\n",
+        "noise_seed_A = 0 #@param {type:\"slider\", min:0, max:100, step:1}\n",
+        "category_A = \"207) golden retriever\"\n",
+        "noise_seed_B = 0 #@param {type:\"slider\", min:0, max:100, step:1}\n",
+        "category_B = \"8) hen\"\n",
+        "\n",
+        "def interpolate_and_shape(A, B, num_interps):\n",
+        "  interps = interpolate(A, B, num_interps)\n",
+        "  return (interps.transpose(1, 0, *range(2, len(interps.shape)))\n",
+        "                 .reshape(num_samples * num_interps, *interps.shape[2:]))\n",
+        "\n",
+        "z_A, z_B = [truncated_z_sample(num_samples, truncation, noise_seed)\n",
+        "            for noise_seed in [noise_seed_A, noise_seed_B]]\n",
+        "y_A, y_B = [one_hot([int(category.split(')')[0])] * num_samples)\n",
+        "            for category in [category_A, category_B]]\n",
+        "\n",
+        "z_interp = interpolate_and_shape(z_A, z_B, num_interps)\n",
+        "y_interp = interpolate_and_shape(y_A, y_B, num_interps)\n",
+        "\n",
+        "ims = sample(sess, z_interp, y_interp, truncation=truncation)\n",
+        "imshow(imgrid(ims, cols=num_interps))"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [
+        "pLOYL1PJAAtK"
+      ],
+      "name": "biggan_generation_with_tf_hub.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/bird_vocalization_classifier.ipynb b/site/en/hub/tutorials/bird_vocalization_classifier.ipynb
new file mode 100644
index 00000000000..563be9b425a
--- /dev/null
+++ b/site/en/hub/tutorials/bird_vocalization_classifier.ipynb
@@ -0,0 +1,375 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "QD3FvutQsaqc"
+      },
+      "source": [
+        "##### Copyright 2023 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "-5fm9kVRsfuG"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Copyright 2023 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "QNDQZiSGtXMu"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <!-- <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/bird_vocalization_classifier\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td> -->\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/bird_vocalization_classifier.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/bird_vocalization_classifier.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/bird_vocalization_classifier.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/google/bird-vocalization-classifier/1\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub model</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "1JAO_rv_QEBr"
+      },
+      "source": [
+        "# Using Google Bird Vocalization model\n",
+        "\n",
+        "The Google Bird Vocalization is a global bird embedding and classification model.\n",
+        "\n",
+        "This model expects as input a 5-second audio segment sampled at 32kHz\n",
+        "\n",
+        "The model outputs both the logits and the embeddigs for each input window of audio.\n",
+        "\n",
+        "On this notebook you'll learn how to feed the audio properly to the model and how to use the logits for inference.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "bytIYq0MjEKT"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install -q \"tensorflow_io==0.28.*\"\n",
+        "!pip install -q librosa"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "aXXTdq-eq6lk"
+      },
+      "outputs": [],
+      "source": [
+        "import tensorflow as tf\n",
+        "import tensorflow_hub as hub\n",
+        "import tensorflow_io as tfio\n",
+        "\n",
+        "import numpy as np\n",
+        "import librosa\n",
+        "\n",
+        "import csv\n",
+        "import io\n",
+        "\n",
+        "from IPython.display import Audio"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "B6mFpgMWQjgk"
+      },
+      "source": [
+        "Loading the Model from TFHub"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "CQ1P3IkpQiya"
+      },
+      "outputs": [],
+      "source": [
+        "model_handle = \"https://tfhub.dev/google/bird-vocalization-classifier/1\"\n",
+        "model = hub.load(model_handle)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3OOw23B3fZT6"
+      },
+      "source": [
+        "Lets load the labels that the model was trained on.\n",
+        "\n",
+        "The labels file is in the assets forlder under label.csv. Each line is an ebird id."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "f5i-R4k9ZhwN"
+      },
+      "outputs": [],
+      "source": [
+        "# Find the name of the class with the top score when mean-aggregated across frames.\n",
+        "def class_names_from_csv(class_map_csv_text):\n",
+        "  \"\"\"Returns list of class names corresponding to score vector.\"\"\"\n",
+        "  with open(labels_path) as csv_file:\n",
+        "    csv_reader = csv.reader(csv_file, delimiter=',')\n",
+        "    class_names = [mid for mid, desc in csv_reader]\n",
+        "    return class_names[1:]\n",
+        "\n",
+        "labels_path = hub.resolve(model_handle) + \"/assets/label.csv\"\n",
+        "classes = class_names_from_csv(labels_path)\n",
+        "print(classes)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "b2JYPafeRRi_"
+      },
+      "source": [
+        "The ```frame_audio``` function is based on the [Chirp lib](https://github.com/google-research/chirp/blob/10c5faa325a3c3468fa6f18a736fc1aeb9bf8129/chirp/inference/interface.py#L128) version but using tf.signal instead of librosa.\n",
+        "\n",
+        "The `ensure_sample_rate` is a function to make sure that any audio used with the model has the expected sample rate of 32kHz"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "t65gi_DTrRaa"
+      },
+      "outputs": [],
+      "source": [
+        "def frame_audio(\n",
+        "      audio_array: np.ndarray,\n",
+        "      window_size_s: float = 5.0,\n",
+        "      hop_size_s: float = 5.0,\n",
+        "      sample_rate = 32000,\n",
+        "  ) -> np.ndarray:\n",
+        "    \"\"\"Helper function for framing audio for inference.\"\"\"\n",
+        "    if window_size_s is None or window_size_s < 0:\n",
+        "      return audio_array[np.newaxis, :]\n",
+        "    frame_length = int(window_size_s * sample_rate)\n",
+        "    hop_length = int(hop_size_s * sample_rate)\n",
+        "    framed_audio = tf.signal.frame(audio_array, frame_length, hop_length, pad_end=True)\n",
+        "    return framed_audio\n",
+        "\n",
+        "def ensure_sample_rate(waveform, original_sample_rate,\n",
+        "                       desired_sample_rate=32000):\n",
+        "  \"\"\"Resample waveform if required.\"\"\"\n",
+        "  if original_sample_rate != desired_sample_rate:\n",
+        "    waveform = tfio.audio.resample(waveform, original_sample_rate, desired_sample_rate)\n",
+        "  return desired_sample_rate, waveform"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "G7uAuI4f6ehb"
+      },
+      "source": [
+        "Lets load a file from Wikipedia.\n",
+        "\n",
+        "To be more precise, the audio of a [Common Blackbird](https://es.wikipedia.org/wiki/Turdus_merula)\n",
+        "\n",
+        "|<p><img src=\"https://upload.wikimedia.org/wikipedia/commons/thumb/a/a9/Common_Blackbird.jpg/1200px-Common_Blackbird.jpg\" alt=\"Common Blackbird.jpg\">|\n",
+        "|:--:|\n",
+        "| *By <a rel=\"nofollow\" class=\"external text\" href=\"http://photo-natur.de\">Andreas Trepte</a> - <span class=\"int-own-work\" lang=\"en\">Own work</span>, <a href=\"https://creativecommons.org/licenses/by-sa/2.5\" title=\"Creative Commons Attribution-Share Alike 2.5\">CC BY-SA 2.5</a>, <a href=\"https://commons.wikimedia.org/w/index.php?curid=16110223\">Link*</a></p> |\n",
+        "\n",
+        "\n",
+        "The audio was contributed by Oona Räisänen (Mysid) under the public domain license."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "whkmGeJ9lmyd"
+      },
+      "outputs": [],
+      "source": [
+        "!curl -O  \"https://upload.wikimedia.org/wikipedia/commons/7/7c/Turdus_merula_2.ogg\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ff6nOV2EurAO"
+      },
+      "outputs": [],
+      "source": [
+        "turdus_merula = \"Turdus_merula_2.ogg\"\n",
+        "\n",
+        "audio, sample_rate = librosa.load(turdus_merula)\n",
+        "\n",
+        "sample_rate, wav_data_turdus = ensure_sample_rate(audio, sample_rate)\n",
+        "Audio(wav_data_turdus, rate=sample_rate)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "sjpKLk9K7TTV"
+      },
+      "source": [
+        "The audio has 24 seconds and the model expects chunks of 5 seconds.\n",
+        "\n",
+        "The `frame_audio` function can fix that and split the audio in proper frames"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "VzgK0xWw9g8X"
+      },
+      "outputs": [],
+      "source": [
+        "fixed_tm = frame_audio(wav_data_turdus)\n",
+        "fixed_tm.shape"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rU5-UqaCAVZ7"
+      },
+      "source": [
+        "Let's apply the model only on the first frame:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "0zveWSOU9QBC"
+      },
+      "outputs": [],
+      "source": [
+        "logits, embeddings = model.infer_tf(fixed_tm[:1])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "osmRNWciEEuG"
+      },
+      "source": [
+        "The label.csv file contains ebirds ids.\n",
+        "The ebird id for Turdus Merula is eurbla"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "E-UehjA6Acn_"
+      },
+      "outputs": [],
+      "source": [
+        "probabilities = tf.nn.softmax(logits)\n",
+        "argmax = np.argmax(probabilities)\n",
+        "print(f\"The audio is from the class {classes[argmax]} (element:{argmax} in the label.csv file), with probability of {probabilities[0][argmax]}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bGK84egXBg2f"
+      },
+      "source": [
+        "Lets apply the model on all the frames now:\n",
+        "\n",
+        "*note*: this code is also based on the [Chirp library](https://github.com/google-research/chirp/blob/d6ff5e7cee3865940f31697bf4b70176c1072572/chirp/inference/models.py#L174)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "UT_Im9i50EGy"
+      },
+      "outputs": [],
+      "source": [
+        "all_logits, all_embeddings = model.infer_tf(fixed_tm[:1])\n",
+        "for window in fixed_tm[1:]:\n",
+        "  logits, embeddings = model.infer_tf(window[np.newaxis, :])\n",
+        "  all_logits = np.concatenate([all_logits, logits], axis=0)\n",
+        "\n",
+        "all_logits.shape"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "kKuJWq4SxyR1"
+      },
+      "outputs": [],
+      "source": [
+        "frame = 0\n",
+        "for frame_logits in all_logits:\n",
+        "  probabilities = tf.nn.softmax(frame_logits)\n",
+        "  argmax = np.argmax(probabilities)\n",
+        "  print(f\"For frame {frame}, the audio is from the class {classes[argmax]} (element:{argmax} in the label.csv file), with probability of {probabilities[argmax]}\")\n",
+        "  frame += 1"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "name": "bird_vocalization_classifier.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/boundless.ipynb b/site/en/hub/tutorials/boundless.ipynb
new file mode 100644
index 00000000000..f53fc5bb004
--- /dev/null
+++ b/site/en/hub/tutorials/boundless.ipynb
@@ -0,0 +1,306 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9veUEV0CfmHX"
+      },
+      "source": [
+        "##### Copyright 2020 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "both",
+        "id": "BlCInyRifxHS"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Copyright 2020 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_LRMeRxCfzC4"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/boundless\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/boundless.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/boundless.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/boundless.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/s?q=google%2Fboundless\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub models</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "QOjczJJ4gWHS"
+      },
+      "source": [
+        "# Boundless Colab\n",
+        "\n",
+        "Welcome to the Boundless model Colab! This notebook will take you through the steps of running the model on images and visualize the results.\n",
+        "\n",
+        "## Overview\n",
+        "\n",
+        "Boundless is a model for image extrapolation. This model takes an image, internally masks a portion of it ([1/2](https://tfhub.dev/google/boundless/half/1), [1/4](https://tfhub.dev/google/boundless/quarter/1), [3/4](https://tfhub.dev/google/boundless/three_quarter/1)) and completes the masked part. For more details refer to [Boundless: Generative Adversarial Networks for Image Extension](https://arxiv.org/pdf/1908.07007.pdf) or the model documentation on TensorFlow Hub."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hDKbpAEZf8Lt"
+      },
+      "source": [
+        "## Imports and setup\n",
+        "\n",
+        "Start with the base imports:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "xJMFtTqPr7lf"
+      },
+      "outputs": [],
+      "source": [
+        "import tensorflow as tf\n",
+        "import tensorflow_hub as hub\n",
+        "from io import BytesIO\n",
+        "from PIL import Image as PilImage\n",
+        "import numpy as np\n",
+        "from matplotlib import pyplot as plt\n",
+        "from six.moves.urllib.request import urlopen"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "pigUDIXtciQO"
+      },
+      "source": [
+        "## Create a function for reading an image\n",
+        "\n",
+        "Create a utility function to help load an image and format it for the model (257x257x3). This method will also crop the image to a square to avoid distortion and you can use it with local images or from the internet."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "KTEVPgXH6rtV"
+      },
+      "outputs": [],
+      "source": [
+        "  def read_image(filename):\n",
+        "    fd = None\n",
+        "    if(filename.startswith('http')):\n",
+        "      fd = urlopen(filename)\n",
+        "    else:\n",
+        "      fd = tf.io.gfile.GFile(filename, 'rb')\n",
+        "\n",
+        "    pil_image = PilImage.open(fd)\n",
+        "    width, height = pil_image.size\n",
+        "    # crop to make the image square\n",
+        "    pil_image = pil_image.crop((0, 0, height, height))\n",
+        "    pil_image = pil_image.resize((257,257),PilImage.LANCZOS)\n",
+        "    image_unscaled = np.array(pil_image)\n",
+        "    image_np = np.expand_dims(\n",
+        "        image_unscaled.astype(np.float32) / 255., axis=0)\n",
+        "    return image_np"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "lonrLxuKcsL0"
+      },
+      "source": [
+        "## Create a visualization function\n",
+        "\n",
+        "Create a visualization function to show the original image side-by-side with the masked version and the \"filled\" version, both generated by the model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "j7AkoMFG7r-O"
+      },
+      "outputs": [],
+      "source": [
+        "def visualize_output_comparison(img_original, img_masked, img_filled):\n",
+        "  plt.figure(figsize=(24,12))\n",
+        "  plt.subplot(131)\n",
+        "  plt.imshow((np.squeeze(img_original)))\n",
+        "  plt.title(\"Original\", fontsize=24)\n",
+        "  plt.axis('off')\n",
+        "  plt.subplot(132)\n",
+        "  plt.imshow((np.squeeze(img_masked)))\n",
+        "  plt.title(\"Masked\", fontsize=24)\n",
+        "  plt.axis('off')\n",
+        "  plt.subplot(133)\n",
+        "  plt.imshow((np.squeeze(img_filled)))\n",
+        "  plt.title(\"Generated\", fontsize=24)\n",
+        "  plt.axis('off')\n",
+        "  plt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8rwaCWmxdJGH"
+      },
+      "source": [
+        "## Load an image\n",
+        "\n",
+        "Now you can load a sample image. Feel free to use your own image by uploading it to the Colab notebook. Remember that the model may have some limitations regarding human images."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "92w-Jfbm60XA"
+      },
+      "outputs": [],
+      "source": [
+        "wikimedia = \"https://upload.wikimedia.org/wikipedia/commons/thumb/3/31/Nusfjord_road%2C_2010_09.jpg/800px-Nusfjord_road%2C_2010_09.jpg\"\n",
+        "# wikimedia = \"https://upload.wikimedia.org/wikipedia/commons/thumb/4/47/Beech_forest_M%C3%A1tra_in_winter.jpg/640px-Beech_forest_M%C3%A1tra_in_winter.jpg\"\n",
+        "# wikimedia = \"https://upload.wikimedia.org/wikipedia/commons/thumb/b/b2/Marmolada_Sunset.jpg/640px-Marmolada_Sunset.jpg\"\n",
+        "# wikimedia = \"https://upload.wikimedia.org/wikipedia/commons/thumb/9/9d/Aegina_sunset.jpg/640px-Aegina_sunset.jpg\"\n",
+        "\n",
+        "input_img = read_image(wikimedia)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4lIkmZL_dtyX"
+      },
+      "source": [
+        "## Select a model from TensorFlow Hub\n",
+        "\n",
+        "On TensorFlow Hub there are three versions of the Boundless model: Half, Quarter and Three Quarters.\n",
+        "In the following cell you can choose any of the models and apply them on your image. If you want to pick another model, select it below and then run the following cells."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "B3myNctEQ5GE"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Model Selection { display-mode: \"form\" }\n",
+        "model_name = 'Boundless Quarter' # @param ['Boundless Half', 'Boundless Quarter', 'Boundless Three Quarters']\n",
+        "model_handle_map = {\n",
+        "    'Boundless Half' : 'https://tfhub.dev/google/boundless/half/1',\n",
+        "    'Boundless Quarter' : 'https://tfhub.dev/google/boundless/quarter/1', \n",
+        "    'Boundless Three Quarters' : 'https://tfhub.dev/google/boundless/three_quarter/1'\n",
+        "}\n",
+        "\n",
+        "model_handle = model_handle_map[model_name]\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "aSJFeNNSeOn8"
+      },
+      "source": [
+        "After choosing your model, you can load it from TensorFlow Hub.\n",
+        "\n",
+        "**Note**: You can point to a model handle to read the model's documentation."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "0IDKMNyYSWsj"
+      },
+      "outputs": [],
+      "source": [
+        "print(\"Loading model {} ({})\".format(model_name, model_handle))\n",
+        "model = hub.load(model_handle)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "L4G7CPOaeuQb"
+      },
+      "source": [
+        "## Perform inference\n",
+        "\n",
+        "The boundless model has two outputs:\n",
+        "\n",
+        "*   The input image with a mask applied\n",
+        "*   The masked image with the extrapolation to complete it\n",
+        "\n",
+        "You can compare these two images with a visualization as follows:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "W7uCAuKxSd-M"
+      },
+      "outputs": [],
+      "source": [
+        "result = model.signatures['default'](tf.constant(input_img))\n",
+        "generated_image =  result['default']\n",
+        "masked_image = result['masked_image']\n",
+        "\n",
+        "visualize_output_comparison(input_img, masked_image, generated_image)"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "boundless.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/cord_19_embeddings.ipynb b/site/en/hub/tutorials/cord_19_embeddings.ipynb
new file mode 100644
index 00000000000..01f43e5f9a9
--- /dev/null
+++ b/site/en/hub/tutorials/cord_19_embeddings.ipynb
@@ -0,0 +1,537 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5wFF5JFyD2Ki"
+      },
+      "source": [
+        "#### Copyright 2019 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Uf6NouXxDqGk"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ORy-KvWXGXBo"
+      },
+      "source": [
+        "# Exploring the TF-Hub CORD-19 Swivel Embeddings\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfBg1C5NB3X0"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/cord_19_embeddings\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/cord_19_embeddings.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/cord_19_embeddings.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/cord_19_embeddings.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/tensorflow/cord-19/swivel-128d/1\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub model</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9VusdTAH0isl"
+      },
+      "source": [
+        "The CORD-19 Swivel text embedding module from TF-Hub (https://tfhub.dev/tensorflow/cord-19/swivel-128d/1)\n",
+        " was built to support researchers analyzing natural languages text related to COVID-19.\n",
+        "These embeddings were trained on the titles, authors, abstracts, body texts, and\n",
+        "reference titles of articles in the [CORD-19 dataset](https://api.semanticscholar.org/CorpusID:216056360).\n",
+        "\n",
+        "In this colab we will:\n",
+        "- Analyze semantically similar words in the embedding space\n",
+        "- Train a classifier on the SciCite dataset using the CORD-19 embeddings\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "L69VQv2Z0isl"
+      },
+      "source": [
+        "## Setup\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Ym2nXOPuPV__"
+      },
+      "outputs": [],
+      "source": [
+        "import functools\n",
+        "import itertools\n",
+        "import matplotlib.pyplot as plt\n",
+        "import numpy as np\n",
+        "import seaborn as sns\n",
+        "import pandas as pd\n",
+        "\n",
+        "import tensorflow.compat.v1 as tf\n",
+        "tf.disable_eager_execution()\n",
+        "tf.logging.set_verbosity('ERROR')\n",
+        "\n",
+        "import tensorflow_datasets as tfds\n",
+        "import tensorflow_hub as hub\n",
+        "\n",
+        "try:\n",
+        "  from google.colab import data_table\n",
+        "  def display_df(df):\n",
+        "    return data_table.DataTable(df, include_index=False)\n",
+        "except ModuleNotFoundError:\n",
+        "  # If google-colab is not available, just display the raw DataFrame\n",
+        "  def display_df(df):\n",
+        "    return df"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_VgRRf2I7tER"
+      },
+      "source": [
+        "# Analyze the embeddings\n",
+        "\n",
+        "Let's start off by analyzing the embedding by calculating and plotting a correlation matrix between different terms. If the embedding learned to successfully capture the meaning of different words, the embedding vectors of semantically similar words should be close together. Let's take a look at some COVID-19 related terms."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "HNN_9bBKSLHU"
+      },
+      "outputs": [],
+      "source": [
+        "# Use the inner product between two embedding vectors as the similarity measure\n",
+        "def plot_correlation(labels, features):\n",
+        "  corr = np.inner(features, features)\n",
+        "  corr /= np.max(corr)\n",
+        "  sns.heatmap(corr, xticklabels=labels, yticklabels=labels)\n",
+        "\n",
+        "\n",
+        "with tf.Graph().as_default():\n",
+        "  # Load the module\n",
+        "  query_input = tf.placeholder(tf.string)\n",
+        "  module = hub.Module('https://tfhub.dev/tensorflow/cord-19/swivel-128d/1')\n",
+        "  embeddings = module(query_input)\n",
+        "\n",
+        "  with tf.train.MonitoredTrainingSession() as sess:\n",
+        "\n",
+        "    # Generate embeddings for some terms\n",
+        "    queries = [\n",
+        "        # Related viruses\n",
+        "        \"coronavirus\", \"SARS\", \"MERS\",\n",
+        "        # Regions\n",
+        "        \"Italy\", \"Spain\", \"Europe\",\n",
+        "        # Symptoms\n",
+        "        \"cough\", \"fever\", \"throat\"\n",
+        "    ]\n",
+        "\n",
+        "    features = sess.run(embeddings, feed_dict={query_input: queries})\n",
+        "    plot_correlation(queries, features)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Bg-PGqtm8B7K"
+      },
+      "source": [
+        "We can see that the embedding successfully captured the meaning of the different terms. Each word is similar to the other words of its cluster (i.e. \"coronavirus\" highly correlates with \"SARS\" and \"MERS\"), while they are different from terms of other clusters (i.e. the similarity between \"SARS\" and \"Spain\" is close to 0).\n",
+        "\n",
+        "Now let's see how we can use these embeddings to solve a specific task."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "idJ1jFmH7xMa"
+      },
+      "source": [
+        "## SciCite: Citation Intent Classification\n",
+        "\n",
+        "This section shows how one can use the embedding for downstream tasks such as text classification. We'll use the [SciCite dataset](https://www.tensorflow.org/datasets/catalog/scicite) from TensorFlow Datasets to classify citation intents in academic papers. Given a sentence with a citation from an academic paper, classify whether the main intent of the citation is as background information, use of methods, or comparing results."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "-FB19HLfVp2V"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Set up the dataset from TFDS\n",
+        "\n",
+        "class Dataset:\n",
+        "  \"\"\"Build a dataset from a TFDS dataset.\"\"\"\n",
+        "  def __init__(self, tfds_name, feature_name, label_name):\n",
+        "    self.dataset_builder = tfds.builder(tfds_name)\n",
+        "    self.dataset_builder.download_and_prepare()\n",
+        "    self.feature_name = feature_name\n",
+        "    self.label_name = label_name\n",
+        "  \n",
+        "  def get_data(self, for_eval):\n",
+        "    splits = THE_DATASET.dataset_builder.info.splits\n",
+        "    if tfds.Split.TEST in splits:\n",
+        "      split = tfds.Split.TEST if for_eval else tfds.Split.TRAIN\n",
+        "    else:\n",
+        "      SPLIT_PERCENT = 80\n",
+        "      split = \"train[{}%:]\".format(SPLIT_PERCENT) if for_eval else \"train[:{}%]\".format(SPLIT_PERCENT)\n",
+        "    return self.dataset_builder.as_dataset(split=split)\n",
+        "\n",
+        "  def num_classes(self):\n",
+        "    return self.dataset_builder.info.features[self.label_name].num_classes\n",
+        "\n",
+        "  def class_names(self):\n",
+        "    return self.dataset_builder.info.features[self.label_name].names\n",
+        "\n",
+        "  def preprocess_fn(self, data):\n",
+        "    return data[self.feature_name], data[self.label_name]\n",
+        "\n",
+        "  def example_fn(self, data):\n",
+        "    feature, label = self.preprocess_fn(data)\n",
+        "    return {'feature': feature, 'label': label}, label\n",
+        "\n",
+        "\n",
+        "def get_example_data(dataset, num_examples, **data_kw):\n",
+        "  \"\"\"Show example data\"\"\"\n",
+        "  with tf.Session() as sess:\n",
+        "    batched_ds = dataset.get_data(**data_kw).take(num_examples).map(dataset.preprocess_fn).batch(num_examples)\n",
+        "    it = tf.data.make_one_shot_iterator(batched_ds).get_next()\n",
+        "    data = sess.run(it)\n",
+        "  return data\n",
+        "\n",
+        "\n",
+        "TFDS_NAME = 'scicite' #@param {type: \"string\"}\n",
+        "TEXT_FEATURE_NAME = 'string' #@param {type: \"string\"}\n",
+        "LABEL_NAME = 'label' #@param {type: \"string\"}\n",
+        "THE_DATASET = Dataset(TFDS_NAME, TEXT_FEATURE_NAME, LABEL_NAME)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "CVjyBD0ZPh4Z"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Let's take a look at a few labeled examples from the training set\n",
+        "NUM_EXAMPLES = 20  #@param {type:\"integer\"}\n",
+        "data = get_example_data(THE_DATASET, NUM_EXAMPLES, for_eval=False)\n",
+        "display_df(\n",
+        "    pd.DataFrame({\n",
+        "        TEXT_FEATURE_NAME: [ex.decode('utf8') for ex in data[0]],\n",
+        "        LABEL_NAME: [THE_DATASET.class_names()[x] for x in data[1]]\n",
+        "    }))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "65s9UpYJ_1ct"
+      },
+      "source": [
+        "## Training a citaton intent classifier\n",
+        "\n",
+        "We'll train a classifier on the [SciCite dataset](https://www.tensorflow.org/datasets/catalog/scicite) using an Estimator. Let's set up the input_fns to read the dataset into the model"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "both",
+        "id": "OldapWmKSGsW"
+      },
+      "outputs": [],
+      "source": [
+        "def preprocessed_input_fn(for_eval):\n",
+        "  data = THE_DATASET.get_data(for_eval=for_eval)\n",
+        "  data = data.map(THE_DATASET.example_fn, num_parallel_calls=1)\n",
+        "  return data\n",
+        "\n",
+        "\n",
+        "def input_fn_train(params):\n",
+        "  data = preprocessed_input_fn(for_eval=False)\n",
+        "  data = data.repeat(None)\n",
+        "  data = data.shuffle(1024)\n",
+        "  data = data.batch(batch_size=params['batch_size'])\n",
+        "  return data\n",
+        "\n",
+        "\n",
+        "def input_fn_eval(params):\n",
+        "  data = preprocessed_input_fn(for_eval=True)\n",
+        "  data = data.repeat(1)\n",
+        "  data = data.batch(batch_size=params['batch_size'])\n",
+        "  return data\n",
+        "\n",
+        "\n",
+        "def input_fn_predict(params):\n",
+        "  data = preprocessed_input_fn(for_eval=True)\n",
+        "  data = data.batch(batch_size=params['batch_size'])\n",
+        "  return data"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KcrmWUkVKg2u"
+      },
+      "source": [
+        "Let's build a model which use the CORD-19 embeddings with a classification layer on top."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ff0uKqJCA9zh"
+      },
+      "outputs": [],
+      "source": [
+        "def model_fn(features, labels, mode, params):\n",
+        "  # Embed the text\n",
+        "  embed = hub.Module(params['module_name'], trainable=params['trainable_module'])\n",
+        "  embeddings = embed(features['feature'])\n",
+        "\n",
+        "  # Add a linear layer on top\n",
+        "  logits = tf.layers.dense(\n",
+        "      embeddings, units=THE_DATASET.num_classes(), activation=None)\n",
+        "  predictions = tf.argmax(input=logits, axis=1)\n",
+        "\n",
+        "  if mode == tf.estimator.ModeKeys.PREDICT:\n",
+        "    return tf.estimator.EstimatorSpec(\n",
+        "        mode=mode,\n",
+        "        predictions={\n",
+        "            'logits': logits,\n",
+        "            'predictions': predictions,\n",
+        "            'features': features['feature'],\n",
+        "            'labels': features['label']\n",
+        "        })\n",
+        "  \n",
+        "  # Set up a multi-class classification head\n",
+        "  loss = tf.nn.sparse_softmax_cross_entropy_with_logits(\n",
+        "      labels=labels, logits=logits)\n",
+        "  loss = tf.reduce_mean(loss)\n",
+        "\n",
+        "  if mode == tf.estimator.ModeKeys.TRAIN:\n",
+        "    optimizer = tf.train.GradientDescentOptimizer(learning_rate=params['learning_rate'])\n",
+        "    train_op = optimizer.minimize(loss, global_step=tf.train.get_or_create_global_step())\n",
+        "    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)\n",
+        "\n",
+        "  elif mode == tf.estimator.ModeKeys.EVAL:\n",
+        "    accuracy = tf.metrics.accuracy(labels=labels, predictions=predictions)\n",
+        "    precision = tf.metrics.precision(labels=labels, predictions=predictions)\n",
+        "    recall = tf.metrics.recall(labels=labels, predictions=predictions)\n",
+        "\n",
+        "    return tf.estimator.EstimatorSpec(\n",
+        "        mode=mode,\n",
+        "        loss=loss,\n",
+        "        eval_metric_ops={\n",
+        "            'accuracy': accuracy,\n",
+        "            'precision': precision,\n",
+        "            'recall': recall,\n",
+        "        })\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "yZUclu8xBYlj"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Hyperparmeters { run: \"auto\" }\n",
+        "\n",
+        "EMBEDDING = 'https://tfhub.dev/tensorflow/cord-19/swivel-128d/1'  #@param {type: \"string\"}\n",
+        "TRAINABLE_MODULE = False  #@param {type: \"boolean\"}\n",
+        "STEPS =   8000#@param {type: \"integer\"}\n",
+        "EVAL_EVERY = 200  #@param {type: \"integer\"}\n",
+        "BATCH_SIZE = 10  #@param {type: \"integer\"}\n",
+        "LEARNING_RATE = 0.01  #@param {type: \"number\"}\n",
+        "\n",
+        "params = {\n",
+        "    'batch_size': BATCH_SIZE,\n",
+        "    'learning_rate': LEARNING_RATE,\n",
+        "    'module_name': EMBEDDING,\n",
+        "    'trainable_module': TRAINABLE_MODULE\n",
+        "}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "weZKWK-pLBll"
+      },
+      "source": [
+        "## Train and evaluate the model\n",
+        "\n",
+        "Let's train and evaluate the model to see the performance on the SciCite task"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "cO1FWkZW2WS9"
+      },
+      "outputs": [],
+      "source": [
+        "estimator = tf.estimator.Estimator(functools.partial(model_fn, params=params))\n",
+        "metrics = []\n",
+        "\n",
+        "for step in range(0, STEPS, EVAL_EVERY):\n",
+        "  estimator.train(input_fn=functools.partial(input_fn_train, params=params), steps=EVAL_EVERY)\n",
+        "  step_metrics = estimator.evaluate(input_fn=functools.partial(input_fn_eval, params=params))\n",
+        "  print('Global step {}: loss {:.3f}, accuracy {:.3f}'.format(step, step_metrics['loss'], step_metrics['accuracy']))\n",
+        "  metrics.append(step_metrics)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "RUNGAeyf1ygC"
+      },
+      "outputs": [],
+      "source": [
+        "global_steps = [x['global_step'] for x in metrics]\n",
+        "fig, axes = plt.subplots(ncols=2, figsize=(20,8))\n",
+        "\n",
+        "for axes_index, metric_names in enumerate([['accuracy', 'precision', 'recall'],\n",
+        "                                            ['loss']]):\n",
+        "  for metric_name in metric_names:\n",
+        "    axes[axes_index].plot(global_steps, [x[metric_name] for x in metrics], label=metric_name)\n",
+        "  axes[axes_index].legend()\n",
+        "  axes[axes_index].set_xlabel(\"Global Step\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "1biWylvB6ayg"
+      },
+      "source": [
+        "We can see that the loss quickly decreases while especially the accuracy rapidly increases. Let's plot some examples to check how the prediction relates to the true labels:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "zK_NJXtoyG2o"
+      },
+      "outputs": [],
+      "source": [
+        "predictions = estimator.predict(functools.partial(input_fn_predict, params))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "nlxFER_Oriam"
+      },
+      "outputs": [],
+      "source": [
+        "first_10_predictions = list(itertools.islice(predictions, 10))\n",
+        "\n",
+        "display_df(\n",
+        "  pd.DataFrame({\n",
+        "      TEXT_FEATURE_NAME: [pred['features'].decode('utf8') for pred in first_10_predictions],\n",
+        "      LABEL_NAME: [THE_DATASET.class_names()[pred['labels']] for pred in first_10_predictions],\n",
+        "      'prediction': [THE_DATASET.class_names()[pred['predictions']] for pred in first_10_predictions]\n",
+        "  }))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "OSGcrkE069_Q"
+      },
+      "source": [
+        "We can see that for this random sample, the model predicts the correct label most of the times, indicating that it can embed scientific sentences pretty well."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "oLE0kCfO5CIA"
+      },
+      "source": [
+        "# What's next?\n",
+        "\n",
+        "Now that you've gotten to know a bit more about the CORD-19 Swivel embeddings from TF-Hub, we encourage you to participate in the CORD-19 Kaggle competition to contribute to gaining scientific insights from COVID-19 related academic texts.\n",
+        "\n",
+        "* Participate in the [CORD-19 Kaggle Challenge](https://www.kaggle.com/allen-institute-for-ai/CORD-19-research-challenge)\n",
+        "* Learn more about the [COVID-19 Open Research Dataset (CORD-19)](https://api.semanticscholar.org/CorpusID:216056360)\n",
+        "* See documentation and more about the TF-Hub embeddings at https://tfhub.dev/tensorflow/cord-19/swivel-128d/1\n",
+        "* Explore the CORD-19 embedding space with the [TensorFlow Embedding Projector](http://projector.tensorflow.org/?config=https://storage.googleapis.com/tfhub-examples/tensorflow/cord-19/swivel-128d/1/tensorboard/full_projector_config.json)"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [
+        "5wFF5JFyD2Ki"
+      ],
+      "name": "cord_19_embeddings.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/cord_19_embeddings_keras.ipynb b/site/en/hub/tutorials/cord_19_embeddings_keras.ipynb
new file mode 100644
index 00000000000..388de741e34
--- /dev/null
+++ b/site/en/hub/tutorials/cord_19_embeddings_keras.ipynb
@@ -0,0 +1,421 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5wFF5JFyD2Ki"
+      },
+      "source": [
+        "#### Copyright 2019 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Uf6NouXxDqGk"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ORy-KvWXGXBo"
+      },
+      "source": [
+        "# Exploring the TF-Hub CORD-19 Swivel Embeddings\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfBg1C5NB3X0"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/cord_19_embeddings_keras\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/cord_19_embeddings_keras.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/cord_19_embeddings_keras.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/cord_19_embeddings_keras.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/tensorflow/cord-19/swivel-128d/3\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub model</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yI6Mh3-P0_Pk"
+      },
+      "source": [
+        "The CORD-19 Swivel text embedding module from TF-Hub (https://tfhub.dev/tensorflow/cord-19/swivel-128d/3)\n",
+        " was built to support researchers analyzing natural languages text related to COVID-19.\n",
+        "These embeddings were trained on the titles, authors, abstracts, body texts, and\n",
+        "reference titles of articles in the [CORD-19 dataset](https://api.semanticscholar.org/CorpusID:216056360).\n",
+        "\n",
+        "In this colab we will:\n",
+        "- Analyze semantically similar words in the embedding space\n",
+        "- Train a classifier on the SciCite dataset using the CORD-19 embeddings\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gVWOrccw0_Pl"
+      },
+      "source": [
+        "## Setup\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Ym2nXOPuPV__"
+      },
+      "outputs": [],
+      "source": [
+        "import functools\n",
+        "import itertools\n",
+        "import matplotlib.pyplot as plt\n",
+        "import numpy as np\n",
+        "import seaborn as sns\n",
+        "import pandas as pd\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "\n",
+        "import tensorflow_datasets as tfds\n",
+        "import tensorflow_hub as hub\n",
+        "\n",
+        "from tqdm import trange"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_VgRRf2I7tER"
+      },
+      "source": [
+        "# Analyze the embeddings\n",
+        "\n",
+        "Let's start off by analyzing the embedding by calculating and plotting a correlation matrix between different terms. If the embedding learned to successfully capture the meaning of different words, the embedding vectors of semantically similar words should be close together. Let's take a look at some COVID-19 related terms."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "HNN_9bBKSLHU"
+      },
+      "outputs": [],
+      "source": [
+        "# Use the inner product between two embedding vectors as the similarity measure\n",
+        "def plot_correlation(labels, features):\n",
+        "  corr = np.inner(features, features)\n",
+        "  corr /= np.max(corr)\n",
+        "  sns.heatmap(corr, xticklabels=labels, yticklabels=labels)\n",
+        "\n",
+        "# Generate embeddings for some terms\n",
+        "queries = [\n",
+        "  # Related viruses\n",
+        "  'coronavirus', 'SARS', 'MERS',\n",
+        "  # Regions\n",
+        "  'Italy', 'Spain', 'Europe',\n",
+        "  # Symptoms\n",
+        "  'cough', 'fever', 'throat'\n",
+        "]\n",
+        "\n",
+        "module = hub.load('https://tfhub.dev/tensorflow/cord-19/swivel-128d/3')\n",
+        "embeddings = module(queries)\n",
+        "\n",
+        "plot_correlation(queries, embeddings)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Bg-PGqtm8B7K"
+      },
+      "source": [
+        "We can see that the embedding successfully captured the meaning of the different terms. Each word is similar to the other words of its cluster (i.e. \"coronavirus\" highly correlates with \"SARS\" and \"MERS\"), while they are different from terms of other clusters (i.e. the similarity between \"SARS\" and \"Spain\" is close to 0).\n",
+        "\n",
+        "Now let's see how we can use these embeddings to solve a specific task."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "idJ1jFmH7xMa"
+      },
+      "source": [
+        "## SciCite: Citation Intent Classification\n",
+        "\n",
+        "This section shows how one can use the embedding for downstream tasks such as text classification. We'll use the [SciCite dataset](https://www.tensorflow.org/datasets/catalog/scicite) from TensorFlow Datasets to classify citation intents in academic papers. Given a sentence with a citation from an academic paper, classify whether the main intent of the citation is as background information, use of methods, or comparing results."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Ghc-CzT8DDaZ"
+      },
+      "outputs": [],
+      "source": [
+        "builder = tfds.builder(name='scicite')\n",
+        "builder.download_and_prepare()\n",
+        "train_data, validation_data, test_data = builder.as_dataset(\n",
+        "    split=('train', 'validation', 'test'),\n",
+        "    as_supervised=True)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "CVjyBD0ZPh4Z"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Let's take a look at a few labeled examples from the training set\n",
+        "NUM_EXAMPLES =   10#@param {type:\"integer\"}\n",
+        "\n",
+        "TEXT_FEATURE_NAME = builder.info.supervised_keys[0]\n",
+        "LABEL_NAME = builder.info.supervised_keys[1]\n",
+        "\n",
+        "def label2str(numeric_label):\n",
+        "  m = builder.info.features[LABEL_NAME].names\n",
+        "  return m[numeric_label]\n",
+        "\n",
+        "data = next(iter(train_data.batch(NUM_EXAMPLES)))\n",
+        "\n",
+        "\n",
+        "pd.DataFrame({\n",
+        "    TEXT_FEATURE_NAME: [ex.numpy().decode('utf8') for ex in data[0]],\n",
+        "    LABEL_NAME: [label2str(x) for x in data[1]]\n",
+        "})"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "65s9UpYJ_1ct"
+      },
+      "source": [
+        "## Training a citaton intent classifier\n",
+        "\n",
+        "We'll train a classifier on the [SciCite dataset](https://www.tensorflow.org/datasets/catalog/scicite) using Keras.  Let's build a model which use the CORD-19 embeddings with a classification layer on top."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "yZUclu8xBYlj"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Hyperparameters { run: \"auto\" }\n",
+        "\n",
+        "EMBEDDING = 'https://tfhub.dev/tensorflow/cord-19/swivel-128d/3'  #@param {type: \"string\"}\n",
+        "TRAINABLE_MODULE = False  #@param {type: \"boolean\"}\n",
+        "\n",
+        "hub_layer = hub.KerasLayer(EMBEDDING, input_shape=[], \n",
+        "                           dtype=tf.string, trainable=TRAINABLE_MODULE)\n",
+        "\n",
+        "model = tf.keras.Sequential()\n",
+        "model.add(hub_layer)\n",
+        "model.add(tf.keras.layers.Dense(3))\n",
+        "model.summary()\n",
+        "model.compile(optimizer='adam',\n",
+        "              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
+        "              metrics=['accuracy'])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "weZKWK-pLBll"
+      },
+      "source": [
+        "## Train and evaluate the model\n",
+        "\n",
+        "Let's train and evaluate the model to see the performance on the SciCite task"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "cO1FWkZW2WS9"
+      },
+      "outputs": [],
+      "source": [
+        "EPOCHS = 35#@param {type: \"integer\"}\n",
+        "BATCH_SIZE = 32#@param {type: \"integer\"}\n",
+        "\n",
+        "history = model.fit(train_data.shuffle(10000).batch(BATCH_SIZE),\n",
+        "                    epochs=EPOCHS,\n",
+        "                    validation_data=validation_data.batch(BATCH_SIZE),\n",
+        "                    verbose=1)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2sKE7kEyLJQZ"
+      },
+      "outputs": [],
+      "source": [
+        "from matplotlib import pyplot as plt\n",
+        "def display_training_curves(training, validation, title, subplot):\n",
+        "  if subplot%10==1: # set up the subplots on the first call\n",
+        "    plt.subplots(figsize=(10,10), facecolor='#F0F0F0')\n",
+        "    plt.tight_layout()\n",
+        "  ax = plt.subplot(subplot)\n",
+        "  ax.set_facecolor('#F8F8F8')\n",
+        "  ax.plot(training)\n",
+        "  ax.plot(validation)\n",
+        "  ax.set_title('model '+ title)\n",
+        "  ax.set_ylabel(title)\n",
+        "  ax.set_xlabel('epoch')\n",
+        "  ax.legend(['train', 'valid.'])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "nnQfxevhLKld"
+      },
+      "outputs": [],
+      "source": [
+        "display_training_curves(history.history['accuracy'], history.history['val_accuracy'], 'accuracy', 211)\n",
+        "display_training_curves(history.history['loss'], history.history['val_loss'], 'loss', 212)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "BjvtOw72Lpyw"
+      },
+      "source": [
+        "## Evaluate the model\n",
+        "\n",
+        "And let's see how the model performs. Two values will be returned. Loss (a number which represents our error, lower values are better), and accuracy."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "y0ExC8D0LX8m"
+      },
+      "outputs": [],
+      "source": [
+        "results = model.evaluate(test_data.batch(512), verbose=2)\n",
+        "\n",
+        "for name, value in zip(model.metrics_names, results):\n",
+        "  print('%s: %.3f' % (name, value))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dWp5OWeTL2EW"
+      },
+      "source": [
+        "We can see that the loss quickly decreases while especially the accuracy rapidly increases. Let's plot some examples to check how the prediction relates to the true labels:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "VzHzAOaaOVC0"
+      },
+      "outputs": [],
+      "source": [
+        "prediction_dataset = next(iter(test_data.batch(20)))\n",
+        "\n",
+        "prediction_texts = [ex.numpy().decode('utf8') for ex in prediction_dataset[0]]\n",
+        "prediction_labels = [label2str(x) for x in prediction_dataset[1]]\n",
+        "\n",
+        "predictions = [\n",
+        "    label2str(x) for x in np.argmax(model.predict(prediction_texts), axis=-1)]\n",
+        "\n",
+        "\n",
+        "pd.DataFrame({\n",
+        "    TEXT_FEATURE_NAME: prediction_texts,\n",
+        "    LABEL_NAME: prediction_labels,\n",
+        "    'prediction': predictions\n",
+        "})"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "OSGcrkE069_Q"
+      },
+      "source": [
+        "We can see that for this random sample, the model predicts the correct label most of the times, indicating that it can embed scientific sentences pretty well."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "oLE0kCfO5CIA"
+      },
+      "source": [
+        "# What's next?\n",
+        "\n",
+        "Now that you've gotten to know a bit more about the CORD-19 Swivel embeddings from TF-Hub, we encourage you to participate in the CORD-19 Kaggle competition to contribute to gaining scientific insights from COVID-19 related academic texts.\n",
+        "\n",
+        "* Participate in the [CORD-19 Kaggle Challenge](https://www.kaggle.com/allen-institute-for-ai/CORD-19-research-challenge)\n",
+        "* Learn more about the [COVID-19 Open Research Dataset (CORD-19)](https://api.semanticscholar.org/CorpusID:216056360)\n",
+        "* See documentation and more about the TF-Hub embeddings at https://tfhub.dev/tensorflow/cord-19/swivel-128d/3\n",
+        "* Explore the CORD-19 embedding space with the [TensorFlow Embedding Projector](http://projector.tensorflow.org/?config=https://storage.googleapis.com/tfhub-examples/tensorflow/cord-19/swivel-128d/3/tensorboard/projector_config.json)"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "cord_19_embeddings_keras.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/cropnet_cassava.ipynb b/site/en/hub/tutorials/cropnet_cassava.ipynb
new file mode 100644
index 00000000000..926b5395e41
--- /dev/null
+++ b/site/en/hub/tutorials/cropnet_cassava.ipynb
@@ -0,0 +1,413 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "vtNtfcHHoHNP"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jZwnHZ70oUIM"
+      },
+      "source": [
+        "# CropNet: Cassava Disease Detection"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6sg9wHP9oR3q"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/cropnet_cassava\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/cropnet_cassava.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/cropnet_cassava.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/cropnet_cassava.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/google/cropnet/classifier/cassava_disease_V1/2\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub model</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "grEgSWu2iTxm"
+      },
+      "source": [
+        "This notebook shows how to use the CropNet [cassava disease classifier](https://tfhub.dev/google/cropnet/classifier/cassava_disease_V1/2) model from **TensorFlow Hub**. The model classifies images of cassava leaves into one of 6 classes: *bacterial blight, brown streak disease, green mite, mosaic disease, healthy, or unknown*.\n",
+        "\n",
+        "This colab demonstrates how to:\n",
+        " * Load the https://tfhub.dev/google/cropnet/classifier/cassava_disease_V1/2 model from **TensorFlow Hub**\n",
+        " * Load the [cassava](https://www.tensorflow.org/datasets/catalog/cassava) dataset from **TensorFlow Datasets (TFDS)**\n",
+        " * Classify images of cassava leaves into 4 distinct cassava disease categories or as healthy or unknown.\n",
+        " * Evaluate the *accuracy* of the classifier and look at how *robust* the model is  when applied to out of domain images."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bKn4Fiq2OD7u"
+      },
+      "source": [
+        "## Imports and setup"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "LMgjpSoYqJIz"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install matplotlib==3.2.2"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "both",
+        "id": "FIP4rkjp45MG"
+      },
+      "outputs": [],
+      "source": [
+        "import numpy as np\n",
+        "import matplotlib.pyplot as plt\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "import tensorflow_datasets as tfds\n",
+        "import tensorflow_hub as hub"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "mIqmq_qmWw78"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Helper function for displaying examples\n",
+        "def plot(examples, predictions=None):\n",
+        "  # Get the images, labels, and optionally predictions\n",
+        "  images = examples['image']\n",
+        "  labels = examples['label']\n",
+        "  batch_size = len(images)\n",
+        "  if predictions is None:\n",
+        "    predictions = batch_size * [None]\n",
+        "\n",
+        "  # Configure the layout of the grid\n",
+        "  x = np.ceil(np.sqrt(batch_size))\n",
+        "  y = np.ceil(batch_size / x)\n",
+        "  fig = plt.figure(figsize=(x * 6, y * 7))\n",
+        "\n",
+        "  for i, (image, label, prediction) in enumerate(zip(images, labels, predictions)):\n",
+        "    # Render the image\n",
+        "    ax = fig.add_subplot(x, y, i+1)\n",
+        "    ax.imshow(image, aspect='auto')\n",
+        "    ax.grid(False)\n",
+        "    ax.set_xticks([])\n",
+        "    ax.set_yticks([])\n",
+        "\n",
+        "    # Display the label and optionally prediction\n",
+        "    x_label = 'Label: ' + name_map[class_names[label]]\n",
+        "    if prediction is not None:\n",
+        "      x_label = 'Prediction: ' + name_map[class_names[prediction]] + '\\n' + x_label\n",
+        "      ax.xaxis.label.set_color('green' if label == prediction else 'red')\n",
+        "    ax.set_xlabel(x_label)\n",
+        "\n",
+        "  plt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kwrg9yIlaUSb"
+      },
+      "source": [
+        "## Dataset\n",
+        "\n",
+        "Let's load the *cassava* dataset from TFDS"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "0rTcnxoSkp31"
+      },
+      "outputs": [],
+      "source": [
+        "dataset, info = tfds.load('cassava', with_info=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GpC71TFDhJFO"
+      },
+      "source": [
+        "Let's take a look at the dataset info to learn more about it, like the description and citation and information about how many examples are available"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "btJBMovmbYtR"
+      },
+      "outputs": [],
+      "source": [
+        "info"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "QT3XWAtR6BRy"
+      },
+      "source": [
+        "The *cassava* dataset has images of cassava leaves with 4 distinct diseases as well as healthy cassava leaves. The model can predict all of these classes as well as sixth class for \"unknown\" when the model is not confident in its prediction."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9NT9q8yyXZfX"
+      },
+      "outputs": [],
+      "source": [
+        "# Extend the cassava dataset classes with 'unknown'\n",
+        "class_names = info.features['label'].names + ['unknown']\n",
+        "\n",
+        "# Map the class names to human readable names\n",
+        "name_map = dict(\n",
+        "    cmd='Mosaic Disease',\n",
+        "    cbb='Bacterial Blight',\n",
+        "    cgm='Green Mite',\n",
+        "    cbsd='Brown Streak Disease',\n",
+        "    healthy='Healthy',\n",
+        "    unknown='Unknown')\n",
+        "\n",
+        "print(len(class_names), 'classes:')\n",
+        "print(class_names)\n",
+        "print([name_map[name] for name in class_names])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "I6y_MGxgiW09"
+      },
+      "source": [
+        "Before we can feed the data to the model, we need to do a bit of preprocessing. The model expects 224 x 224 images with RGB channel values in [0, 1]. Let's normalize and resize the images."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "UxtxvqRjh7Nm"
+      },
+      "outputs": [],
+      "source": [
+        "def preprocess_fn(data):\n",
+        "  image = data['image']\n",
+        "\n",
+        "  # Normalize [0, 255] to [0, 1]\n",
+        "  image = tf.cast(image, tf.float32)\n",
+        "  image = image / 255.\n",
+        "\n",
+        "  # Resize the images to 224 x 224\n",
+        "  image = tf.image.resize(image, (224, 224))\n",
+        "\n",
+        "  data['image'] = image\n",
+        "  return data"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "qz27YrZahdvn"
+      },
+      "source": [
+        "Let's take a look at a few examples from the dataset"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "j6LkAxv3f-aJ"
+      },
+      "outputs": [],
+      "source": [
+        "batch = dataset['validation'].map(preprocess_fn).batch(25).as_numpy_iterator()\n",
+        "examples = next(batch)\n",
+        "plot(examples)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "eHlEAhL3hq2R"
+      },
+      "source": [
+        "## Model\n",
+        "\n",
+        "Let's load the classifier from TF Hub and get some predictions and see the predictions of the model is on a few examples"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "b6eIWkTjIQhS"
+      },
+      "outputs": [],
+      "source": [
+        "classifier = hub.KerasLayer('https://tfhub.dev/google/cropnet/classifier/cassava_disease_V1/2')\n",
+        "probabilities = classifier(examples['image'])\n",
+        "predictions = tf.argmax(probabilities, axis=-1)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "MTQA1YAltfRZ"
+      },
+      "outputs": [],
+      "source": [
+        "plot(examples, predictions)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MuFE8A5aZv9z"
+      },
+      "source": [
+        "## Evaluation & robustness\n",
+        "\n",
+        "Let's measure the *accuracy* of our classifier on a split of the dataset. We can also look at the *robustness* of the model by evaluating its performance on a non-cassava dataset. For image of other plant datasets like iNaturalist or beans, the model should almost always return *unknown*."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "0ERcNxs0kHd3"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Parameters {run: \"auto\"}\n",
+        "\n",
+        "DATASET = 'cassava'  #@param {type:\"string\"} ['cassava', 'beans', 'i_naturalist2017']\n",
+        "DATASET_SPLIT = 'test' #@param {type:\"string\"} ['train', 'test', 'validation']\n",
+        "BATCH_SIZE =  32 #@param {type:\"integer\"}\n",
+        "MAX_EXAMPLES = 1000 #@param {type:\"integer\"}\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Mt0-IVmZplbb"
+      },
+      "outputs": [],
+      "source": [
+        "def label_to_unknown_fn(data):\n",
+        "  data['label'] = 5  # Override label to unknown.\n",
+        "  return data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "cQYvY3IvY2Nx"
+      },
+      "outputs": [],
+      "source": [
+        "# Preprocess the examples and map the image label to unknown for non-cassava datasets.\n",
+        "ds = tfds.load(DATASET, split=DATASET_SPLIT).map(preprocess_fn).take(MAX_EXAMPLES)\n",
+        "dataset_description = DATASET\n",
+        "if DATASET != 'cassava':\n",
+        "  ds = ds.map(label_to_unknown_fn)\n",
+        "  dataset_description += ' (labels mapped to unknown)'\n",
+        "ds = ds.batch(BATCH_SIZE)\n",
+        "\n",
+        "# Calculate the accuracy of the model\n",
+        "metric = tf.keras.metrics.Accuracy()\n",
+        "for examples in ds:\n",
+        "  probabilities = classifier(examples['image'])\n",
+        "  predictions = tf.math.argmax(probabilities, axis=-1)\n",
+        "  labels = examples['label']\n",
+        "  metric.update_state(labels, predictions)\n",
+        "\n",
+        "print('Accuracy on %s: %.2f' % (dataset_description, metric.result().numpy()))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rvS18sBExpdL"
+      },
+      "source": [
+        "## Learn more\n",
+        "\n",
+        "* Learn more about the model on TensorFlow Hub: https://tfhub.dev/google/cropnet/classifier/cassava_disease_V1/2\n",
+        "* Learn how to build a custom image classifier running on a mobile phone with [ML Kit](https://developers.google.com/ml-kit/custom-models#tfhub) with the [TensorFlow Lite version of this model](https://tfhub.dev/google/lite-model/cropnet/classifier/cassava_disease_V1/1)."
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [],
+      "name": "cropnet_cassava.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/cropnet_on_device.ipynb b/site/en/hub/tutorials/cropnet_on_device.ipynb
new file mode 100644
index 00000000000..0e1cb1e0b0d
--- /dev/null
+++ b/site/en/hub/tutorials/cropnet_on_device.ipynb
@@ -0,0 +1,724 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3XX46cTrh6iD"
+      },
+      "source": [
+        "##### Copyright 2021 The TensorFlow Hub Authors. \n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "sKrlWr6Kh-mF"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Copyright 2021 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DMVmlJ0fAMkH"
+      },
+      "source": [
+        "# Fine tuning models for plant disease detection\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hk5u_9KN1m-t"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/cropnet_on_device\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/cropnet_on_device.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/cropnet_on_device.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/cropnet_on_device.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/s?module-type=image-feature-vector&q=cropnet\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub models</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "OEHq-hV5sWYO"
+      },
+      "source": [
+        "This notebook shows you how to **fine-tune CropNet models from TensorFlow Hub** on a dataset from TFDS or your own crop disease detection dataset.\n",
+        "\n",
+        "You will:\n",
+        "- Load the TFDS cassava dataset or your own data\n",
+        "- Enrich the data with unknown (negative) examples to get a more robust model\n",
+        "- Apply image augmentations to the data\n",
+        "- Load and fine tune a [CropNet model](https://tfhub.dev/s?module-type=image-feature-vector&q=cropnet) from TF Hub\n",
+        "- Export a TFLite model, ready to be deployed on your app with [Task Library](https://www.tensorflow.org/lite/inference_with_metadata/task_library/image_classifier), [MLKit](https://developers.google.com/ml-kit/vision/image-labeling/custom-models/android) or [TFLite](https://www.tensorflow.org/lite/guide/inference) directly"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dQvS4p807mZf"
+      },
+      "source": [
+        "## Imports and Dependencies\n",
+        "\n",
+        "Before starting, you'll need to install some of the dependencies that will be needed like [Model Maker](https://www.tensorflow.org/lite/guide/model_maker#installation) and the latest version of TensorFlow Datasets."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "5BDTEMtexXE3"
+      },
+      "outputs": [],
+      "source": [
+        "!sudo apt install -q libportaudio2\n",
+        "## image_classifier library requires numpy <= 1.23.5\n",
+        "!pip install \"numpy<=1.23.5\"\n",
+        "!pip install --use-deprecated=legacy-resolver tflite-model-maker-nightly\n",
+        "!pip install -U tensorflow-datasets\n",
+        "## scann library requires tensorflow < 2.9.0\n",
+        "!pip install \"tensorflow<2.9.0\"\n",
+        "!pip install \"tensorflow-datasets~=4.8.0\"  # protobuf>=3.12.2\n",
+        "!pip install tensorflow-metadata~=1.10.0  # protobuf>=3.13\n",
+        "## tensorflowjs requires packaging < 20.10\n",
+        "!pip install \"packaging<20.10\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "nekG9Iwgxbx0"
+      },
+      "outputs": [],
+      "source": [
+        "import matplotlib.pyplot as plt\n",
+        "import os\n",
+        "import seaborn as sns\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "import tensorflow_datasets as tfds\n",
+        "\n",
+        "from tensorflow_examples.lite.model_maker.core.export_format import ExportFormat\n",
+        "from tensorflow_examples.lite.model_maker.core.task import image_preprocessing\n",
+        "\n",
+        "from tflite_model_maker import image_classifier\n",
+        "from tflite_model_maker import ImageClassifierDataLoader\n",
+        "from tflite_model_maker.image_classifier import ModelSpec"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fV0k2Q4x4N_4"
+      },
+      "source": [
+        "## Load a TFDS dataset to fine-tune on\n",
+        "\n",
+        "Lets use the publicly available [Cassava Leaf Disease dataset](https://www.tensorflow.org/datasets/catalog/cassava) from TFDS."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "TTaD5W_1xjUz"
+      },
+      "outputs": [],
+      "source": [
+        "tfds_name = 'cassava'\n",
+        "(ds_train, ds_validation, ds_test), ds_info = tfds.load(\n",
+        "    name=tfds_name,\n",
+        "    split=['train', 'validation', 'test'],\n",
+        "    with_info=True,\n",
+        "    as_supervised=True)\n",
+        "TFLITE_NAME_PREFIX = tfds_name"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "xDuDGUAxyHtA"
+      },
+      "source": [
+        "## Or alternatively load your own data to fine-tune on\n",
+        "\n",
+        "Instead of using a TFDS dataset, you can also train on your own data. This code snippet shows how to load your own custom dataset. See [this](https://www.tensorflow.org/datasets/api_docs/python/tfds/folder_dataset/ImageFolder) link for the supported structure of the data. An example is provided here using the publicly available [Cassava Leaf Disease dataset](https://www.tensorflow.org/datasets/catalog/cassava)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "k003tLvflHpC"
+      },
+      "outputs": [],
+      "source": [
+        "# data_root_dir = tf.keras.utils.get_file(\n",
+        "#     'cassavaleafdata.zip',\n",
+        "#     'https://storage.googleapis.com/emcassavadata/cassavaleafdata.zip',\n",
+        "#     extract=True)\n",
+        "# data_root_dir = os.path.splitext(data_root_dir)[0]  # Remove the .zip extension\n",
+        "\n",
+        "# builder = tfds.ImageFolder(data_root_dir)\n",
+        "\n",
+        "# ds_info = builder.info\n",
+        "# ds_train = builder.as_dataset(split='train', as_supervised=True)\n",
+        "# ds_validation = builder.as_dataset(split='validation', as_supervised=True)\n",
+        "# ds_test = builder.as_dataset(split='test', as_supervised=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hs3XCVLo4Fa1"
+      },
+      "source": [
+        "## Visualize samples from train split\n",
+        "\n",
+        "Let's take a look at some examples from the dataset including the class id and the class name for the image samples and their labels."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "89GkD60Eyfe0"
+      },
+      "outputs": [],
+      "source": [
+        "_ = tfds.show_examples(ds_train, ds_info)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-KW-n0lV4AZ-"
+      },
+      "source": [
+        "## Add images to be used as Unknown examples from TFDS datasets\n",
+        "\n",
+        "Add additional unknown (negative) examples to the training dataset and assign a new unknown class label number to them. The goal is to have a model that, when used in practice (e.g. in the field), has the option of predicting \"Unknown\" when it sees something unexpected.\n",
+        "\n",
+        "Below you can see a list of datasets that will be used to sample the additional unknown imagery. It includes 3 completely different datasets to increase diversity. One of them is a beans leaf disease dataset, so that the model has exposure to diseased plants other than cassava.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "SYDMjRhDkDnd"
+      },
+      "outputs": [],
+      "source": [
+        "UNKNOWN_TFDS_DATASETS = [{\n",
+        "    'tfds_name': 'imagenet_v2/matched-frequency',\n",
+        "    'train_split': 'test[:80%]',\n",
+        "    'test_split': 'test[80%:]',\n",
+        "    'num_examples_ratio_to_normal': 1.0,\n",
+        "}, {\n",
+        "    'tfds_name': 'oxford_flowers102',\n",
+        "    'train_split': 'train',\n",
+        "    'test_split': 'test',\n",
+        "    'num_examples_ratio_to_normal': 1.0,\n",
+        "}, {\n",
+        "    'tfds_name': 'beans',\n",
+        "    'train_split': 'train',\n",
+        "    'test_split': 'test',\n",
+        "    'num_examples_ratio_to_normal': 1.0,\n",
+        "}]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XUM_d0evktGi"
+      },
+      "source": [
+        "The UNKNOWN datasets are also loaded from TFDS."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "5DdWgBTe8uKR"
+      },
+      "outputs": [],
+      "source": [
+        "# Load unknown datasets.\n",
+        "weights = [\n",
+        "    spec['num_examples_ratio_to_normal'] for spec in UNKNOWN_TFDS_DATASETS\n",
+        "]\n",
+        "num_unknown_train_examples = sum(\n",
+        "    int(w * ds_train.cardinality().numpy()) for w in weights)\n",
+        "ds_unknown_train = tf.data.Dataset.sample_from_datasets([\n",
+        "    tfds.load(\n",
+        "        name=spec['tfds_name'], split=spec['train_split'],\n",
+        "        as_supervised=True).repeat(-1) for spec in UNKNOWN_TFDS_DATASETS\n",
+        "], weights).take(num_unknown_train_examples)\n",
+        "ds_unknown_train = ds_unknown_train.apply(\n",
+        "    tf.data.experimental.assert_cardinality(num_unknown_train_examples))\n",
+        "ds_unknown_tests = [\n",
+        "    tfds.load(\n",
+        "        name=spec['tfds_name'], split=spec['test_split'], as_supervised=True)\n",
+        "    for spec in UNKNOWN_TFDS_DATASETS\n",
+        "]\n",
+        "ds_unknown_test = ds_unknown_tests[0]\n",
+        "for ds in ds_unknown_tests[1:]:\n",
+        "  ds_unknown_test = ds_unknown_test.concatenate(ds)\n",
+        "\n",
+        "# All examples from the unknown datasets will get a new class label number.\n",
+        "num_normal_classes = len(ds_info.features['label'].names)\n",
+        "unknown_label_value = tf.convert_to_tensor(num_normal_classes, tf.int64)\n",
+        "ds_unknown_train = ds_unknown_train.map(lambda image, _:\n",
+        "                                        (image, unknown_label_value))\n",
+        "ds_unknown_test = ds_unknown_test.map(lambda image, _:\n",
+        "                                      (image, unknown_label_value))\n",
+        "\n",
+        "# Merge the normal train dataset with the unknown train dataset.\n",
+        "weights = [\n",
+        "    ds_train.cardinality().numpy(),\n",
+        "    ds_unknown_train.cardinality().numpy()\n",
+        "]\n",
+        "ds_train_with_unknown = tf.data.Dataset.sample_from_datasets(\n",
+        "    [ds_train, ds_unknown_train], [float(w) for w in weights])\n",
+        "ds_train_with_unknown = ds_train_with_unknown.apply(\n",
+        "    tf.data.experimental.assert_cardinality(sum(weights)))\n",
+        "\n",
+        "print((f\"Added {ds_unknown_train.cardinality().numpy()} negative examples.\"\n",
+        "       f\"Training dataset has now {ds_train_with_unknown.cardinality().numpy()}\"\n",
+        "       ' examples in total.'))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "am6eKbzt7raH"
+      },
+      "source": [
+        "## Apply augmentations"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "sxIUP0Flk35V"
+      },
+      "source": [
+        "For all the images, to make them more diverse, you'll apply some augmentation, like changes in:\n",
+        "- Brightness\n",
+        "- Contrast\n",
+        "- Saturation\n",
+        "- Hue\n",
+        "- Crop\n",
+        "\n",
+        "These types of augmentations help make the model more robust to variations in image inputs.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "q_BiOkXjqRju"
+      },
+      "outputs": [],
+      "source": [
+        "def random_crop_and_random_augmentations_fn(image):\n",
+        "  # preprocess_for_train does random crop and resize internally.\n",
+        "  image = image_preprocessing.preprocess_for_train(image)\n",
+        "  image = tf.image.random_brightness(image, 0.2)\n",
+        "  image = tf.image.random_contrast(image, 0.5, 2.0)\n",
+        "  image = tf.image.random_saturation(image, 0.75, 1.25)\n",
+        "  image = tf.image.random_hue(image, 0.1)\n",
+        "  return image\n",
+        "\n",
+        "\n",
+        "def random_crop_fn(image):\n",
+        "  # preprocess_for_train does random crop and resize internally.\n",
+        "  image = image_preprocessing.preprocess_for_train(image)\n",
+        "  return image\n",
+        "\n",
+        "\n",
+        "def resize_and_center_crop_fn(image):\n",
+        "  image = tf.image.resize(image, (256, 256))\n",
+        "  image = image[16:240, 16:240]\n",
+        "  return image\n",
+        "\n",
+        "\n",
+        "no_augment_fn = lambda image: image\n",
+        "\n",
+        "train_augment_fn = lambda image, label: (\n",
+        "    random_crop_and_random_augmentations_fn(image), label)\n",
+        "eval_augment_fn = lambda image, label: (resize_and_center_crop_fn(image), label)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "RUfqE1c3l6my"
+      },
+      "source": [
+        "To apply the augmentation, it uses the `map` method from the Dataset class."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Uq-NCtaH_h8j"
+      },
+      "outputs": [],
+      "source": [
+        "ds_train_with_unknown = ds_train_with_unknown.map(train_augment_fn)\n",
+        "ds_validation = ds_validation.map(eval_augment_fn)\n",
+        "ds_test = ds_test.map(eval_augment_fn)\n",
+        "ds_unknown_test = ds_unknown_test.map(eval_augment_fn)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DvnwolLiCqYX"
+      },
+      "source": [
+        "## Wrap the data into Model Maker friendly format\n",
+        "\n",
+        "To use these dataset with Model Maker, they need to be in a ImageClassifierDataLoader class."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "OXPWEDFDRlVu"
+      },
+      "outputs": [],
+      "source": [
+        "label_names = ds_info.features['label'].names + ['UNKNOWN']\n",
+        "\n",
+        "train_data = ImageClassifierDataLoader(ds_train_with_unknown,\n",
+        "                                       ds_train_with_unknown.cardinality(),\n",
+        "                                       label_names)\n",
+        "validation_data = ImageClassifierDataLoader(ds_validation,\n",
+        "                                            ds_validation.cardinality(),\n",
+        "                                            label_names)\n",
+        "test_data = ImageClassifierDataLoader(ds_test, ds_test.cardinality(),\n",
+        "                                      label_names)\n",
+        "unknown_test_data = ImageClassifierDataLoader(ds_unknown_test,\n",
+        "                                              ds_unknown_test.cardinality(),\n",
+        "                                              label_names)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "j2iDwq2Njpb_"
+      },
+      "source": [
+        "## Run training\n",
+        "\n",
+        "[TensorFlow Hub](https://tfhub.dev) has multiple models available for Transfer Learning.\n",
+        "\n",
+        "Here you can choose one and you can also keep experimenting with other ones to try to get better results.\n",
+        "\n",
+        "If you want even more models to try, you can add them from this [collection](https://tfhub.dev/google/collections/image/1).\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "5UhNpR0Ex_5-"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Choose a base model\n",
+        "\n",
+        "model_name = 'mobilenet_v3_large_100_224'  #@param ['cropnet_cassava', 'cropnet_concat', 'cropnet_imagenet', 'mobilenet_v3_large_100_224']\n",
+        "\n",
+        "map_model_name = {\n",
+        "    'cropnet_cassava':\n",
+        "        'https://tfhub.dev/google/cropnet/feature_vector/cassava_disease_V1/1',\n",
+        "    'cropnet_concat':\n",
+        "        'https://tfhub.dev/google/cropnet/feature_vector/concat/1',\n",
+        "    'cropnet_imagenet':\n",
+        "        'https://tfhub.dev/google/cropnet/feature_vector/imagenet/1',\n",
+        "    'mobilenet_v3_large_100_224':\n",
+        "        'https://tfhub.dev/google/imagenet/mobilenet_v3_large_100_224/feature_vector/5',\n",
+        "}\n",
+        "\n",
+        "model_handle = map_model_name[model_name]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Y1ecXlQgR5Uk"
+      },
+      "source": [
+        "To fine tune the model, you will use Model Maker. This  makes the overall solution easier since after the training of the model, it'll also convert it to TFLite.\n",
+        "\n",
+        "Model Maker makes this conversion be the best one possible and with all the necessary information to easily deploy the model on-device later.\n",
+        "\n",
+        "The model spec is how you tell Model Maker which base model you'd like to use."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "L8P-VTqJ8GaF"
+      },
+      "outputs": [],
+      "source": [
+        "image_model_spec = ModelSpec(uri=model_handle)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "AnWN3kk6jCHf"
+      },
+      "source": [
+        "One important detail here is setting `train_whole_model` which will make the base model fine tuned during training. This makes the process slower but the final model has a higher accuracy. Setting `shuffle` will make sure the model sees the data in a random shuffled order which is a best practice for model learning."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "KRbSDbnA6Xap"
+      },
+      "outputs": [],
+      "source": [
+        "model = image_classifier.create(\n",
+        "    train_data,\n",
+        "    model_spec=image_model_spec,\n",
+        "    batch_size=128,\n",
+        "    learning_rate=0.03,\n",
+        "    epochs=5,\n",
+        "    shuffle=True,\n",
+        "    train_whole_model=True,\n",
+        "    validation_data=validation_data)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "buFDW0izBqIQ"
+      },
+      "source": [
+        "## Evaluate model on test split"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "OYIZ1rlV7lxm"
+      },
+      "outputs": [],
+      "source": [
+        "model.evaluate(test_data)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YJaReZ_OVU71"
+      },
+      "source": [
+        "To have an even better understanding of the fine tuned model, it's good to analyse the confusion matrix. This will show how often one class is predicted as another."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "o9_vs1nNKOLF"
+      },
+      "outputs": [],
+      "source": [
+        "def predict_class_label_number(dataset):\n",
+        "  \"\"\"Runs inference and returns predictions as class label numbers.\"\"\"\n",
+        "  rev_label_names = {l: i for i, l in enumerate(label_names)}\n",
+        "  return [\n",
+        "      rev_label_names[o[0][0]]\n",
+        "      for o in model.predict_top_k(dataset, batch_size=128)\n",
+        "  ]\n",
+        "\n",
+        "def show_confusion_matrix(cm, labels):\n",
+        "  plt.figure(figsize=(10, 8))\n",
+        "  sns.heatmap(cm, xticklabels=labels, yticklabels=labels, \n",
+        "              annot=True, fmt='g')\n",
+        "  plt.xlabel('Prediction')\n",
+        "  plt.ylabel('Label')\n",
+        "  plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "7BWZCKerCNF_"
+      },
+      "outputs": [],
+      "source": [
+        "confusion_mtx = tf.math.confusion_matrix(\n",
+        "    list(ds_test.map(lambda x, y: y)),\n",
+        "    predict_class_label_number(test_data),\n",
+        "    num_classes=len(label_names))\n",
+        "\n",
+        "show_confusion_matrix(confusion_mtx, label_names)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ksu9BFULBvmj"
+      },
+      "source": [
+        "## Evaluate model on unknown test data\n",
+        "\n",
+        "In this evaluation we expect the model to have accuracy of almost 1. All images the model is tested on are not related to the normal dataset and hence we expect the model to predict the \"Unknown\" class label."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "f5wvZwliZcJP"
+      },
+      "outputs": [],
+      "source": [
+        "model.evaluate(unknown_test_data)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jm47Odo5Vaiq"
+      },
+      "source": [
+        "Print the confusion matrix."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "E_gEX3oWH1YT"
+      },
+      "outputs": [],
+      "source": [
+        "unknown_confusion_mtx = tf.math.confusion_matrix(\n",
+        "    list(ds_unknown_test.map(lambda x, y: y)),\n",
+        "    predict_class_label_number(unknown_test_data),\n",
+        "    num_classes=len(label_names))\n",
+        "\n",
+        "show_confusion_matrix(unknown_confusion_mtx, label_names)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "o2agDx2fCHyd"
+      },
+      "source": [
+        "## Export the model as TFLite and SavedModel\n",
+        "\n",
+        "Now we can export the trained models in TFLite and SavedModel formats for deploying on-device and using for inference in TensorFlow."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "bAFvBmMr7owW"
+      },
+      "outputs": [],
+      "source": [
+        "tflite_filename = f'{TFLITE_NAME_PREFIX}_model_{model_name}.tflite'\n",
+        "model.export(export_dir='.', tflite_filename=tflite_filename)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Pz0-6To2C4yM"
+      },
+      "outputs": [],
+      "source": [
+        "# Export saved model version.\n",
+        "model.export(export_dir='.', export_format=ExportFormat.SAVED_MODEL)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4V4GdQqxjEU7"
+      },
+      "source": [
+        "## Next steps\n",
+        "\n",
+        "The model that you've just trained can be used on mobile devices and even deployed in the field!\n",
+        "\n",
+        "**To download the model, click the folder icon for the Files menu on the left side of the colab, and choose the download option.**\n",
+        "\n",
+        "The same technique used here could be applied to other plant diseases tasks that might be more suitable for your use case or any other type of image classification task. If you want to follow up and deploy on an Android app, you can continue on this [Android quickstart guide](https://www.tensorflow.org/lite/android/quickstart)."
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [
+        "3XX46cTrh6iD",
+        "xDuDGUAxyHtA"
+      ],
+      "name": "cropnet_on_device.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/cross_lingual_similarity_with_tf_hub_multilingual_universal_encoder.ipynb b/site/en/hub/tutorials/cross_lingual_similarity_with_tf_hub_multilingual_universal_encoder.ipynb
new file mode 100644
index 00000000000..920d197811e
--- /dev/null
+++ b/site/en/hub/tutorials/cross_lingual_similarity_with_tf_hub_multilingual_universal_encoder.ipynb
@@ -0,0 +1,4463 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "RUymE2l9GZfO"
+      },
+      "source": [
+        "**Copyright 2019 The TensorFlow Hub Authors.**\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "code",
+        "id": "JMyTNwSJGGWg"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "co7MV6sX7Xto"
+      },
+      "source": [
+        "# Cross-Lingual Similarity and Semantic Search Engine with Multilingual Universal Sentence Encoder\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfBg1C5NB3X0"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/cross_lingual_similarity_with_tf_hub_multilingual_universal_encoder\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/cross_lingual_similarity_with_tf_hub_multilingual_universal_encoder.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/cross_lingual_similarity_with_tf_hub_multilingual_universal_encoder.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/cross_lingual_similarity_with_tf_hub_multilingual_universal_encoder.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/google/universal-sentence-encoder-multilingual/3\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub model</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "eAVQGidpL8v5"
+      },
+      "source": [
+        "This notebook illustrates how to access the Multilingual Universal Sentence Encoder module and use it for sentence similarity across multiple languages. This module is an extension of the [original Universal Encoder module](https://tfhub.dev/google/universal-sentence-encoder/2).\n",
+        "\n",
+        "The notebook is divided as follows:\n",
+        "\n",
+        "*   The first section shows a visualization of sentences between pair of languages. This is a more academic exercise. \n",
+        "*   In the second section, we show how to build a semantic search engine from a sample of a Wikipedia corpus in multiple languages."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "UvNRbHGarYeR"
+      },
+      "source": [
+        "## Citation\n",
+        "\n",
+        "*Research papers that make use of the models explored in this colab should cite:*\n",
+        "\n",
+        "### [Multilingual universal sentence encoder for semantic retrieval](https://arxiv.org/abs/1907.04307)\n",
+        "Yinfei Yang, Daniel Cer, Amin Ahmad, Mandy Guo, Jax Law, Noah Constant, Gustavo Hernandez Abrego, Steve Yuan, Chris Tar, Yun-Hsuan Sung, Brian Strope, and Ray Kurzweil. 2019.\n",
+        " arXiv preprint arXiv:1907.04307"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "pOTzp8O36CyQ"
+      },
+      "source": [
+        "## Setup\n",
+        "\n",
+        "This section sets up the environment for access to the Multilingual Universal Sentence Encoder Module and also prepares a set of English sentences and their translations. In the following sections, the multilingual module will be used to compute similarity *across languages*."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "both",
+        "id": "lVjNK8shFKOC"
+      },
+      "outputs": [],
+      "source": [
+        "%%capture\n",
+        "#@title Setup Environment\n",
+        "# Install the latest Tensorflow version.\n",
+        "!pip install \"tensorflow-text==2.11.*\"\n",
+        "!pip install bokeh\n",
+        "!pip install simpleneighbors[annoy]\n",
+        "!pip install tqdm"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "both",
+        "id": "MSeY-MUQo2Ha"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Setup common imports and functions\n",
+        "import bokeh\n",
+        "import bokeh.models\n",
+        "import bokeh.plotting\n",
+        "import numpy as np\n",
+        "import os\n",
+        "import pandas as pd\n",
+        "import tensorflow.compat.v2 as tf\n",
+        "import tensorflow_hub as hub\n",
+        "from tensorflow_text import SentencepieceTokenizer\n",
+        "import sklearn.metrics.pairwise\n",
+        "\n",
+        "from simpleneighbors import SimpleNeighbors\n",
+        "from tqdm import tqdm\n",
+        "from tqdm import trange\n",
+        "\n",
+        "def visualize_similarity(embeddings_1, embeddings_2, labels_1, labels_2,\n",
+        "                         plot_title,\n",
+        "                         plot_width=1200, plot_height=600,\n",
+        "                         xaxis_font_size='12pt', yaxis_font_size='12pt'):\n",
+        "\n",
+        "  assert len(embeddings_1) == len(labels_1)\n",
+        "  assert len(embeddings_2) == len(labels_2)\n",
+        "\n",
+        "  # arccos based text similarity (Yang et al. 2019; Cer et al. 2019)\n",
+        "  sim = 1 - np.arccos(\n",
+        "      sklearn.metrics.pairwise.cosine_similarity(embeddings_1,\n",
+        "                                                 embeddings_2))/np.pi\n",
+        "\n",
+        "  embeddings_1_col, embeddings_2_col, sim_col = [], [], []\n",
+        "  for i in range(len(embeddings_1)):\n",
+        "    for j in range(len(embeddings_2)):\n",
+        "      embeddings_1_col.append(labels_1[i])\n",
+        "      embeddings_2_col.append(labels_2[j])\n",
+        "      sim_col.append(sim[i][j])\n",
+        "  df = pd.DataFrame(zip(embeddings_1_col, embeddings_2_col, sim_col),\n",
+        "                    columns=['embeddings_1', 'embeddings_2', 'sim'])\n",
+        "\n",
+        "  mapper = bokeh.models.LinearColorMapper(\n",
+        "      palette=[*reversed(bokeh.palettes.YlOrRd[9])], low=df.sim.min(),\n",
+        "      high=df.sim.max())\n",
+        "\n",
+        "  p = bokeh.plotting.figure(title=plot_title, x_range=labels_1,\n",
+        "                            x_axis_location=\"above\",\n",
+        "                            y_range=[*reversed(labels_2)],\n",
+        "                            plot_width=plot_width, plot_height=plot_height,\n",
+        "                            tools=\"save\",toolbar_location='below', tooltips=[\n",
+        "                                ('pair', '@embeddings_1 ||| @embeddings_2'),\n",
+        "                                ('sim', '@sim')])\n",
+        "  p.rect(x=\"embeddings_1\", y=\"embeddings_2\", width=1, height=1, source=df,\n",
+        "         fill_color={'field': 'sim', 'transform': mapper}, line_color=None)\n",
+        "\n",
+        "  p.title.text_font_size = '12pt'\n",
+        "  p.axis.axis_line_color = None\n",
+        "  p.axis.major_tick_line_color = None\n",
+        "  p.axis.major_label_standoff = 16\n",
+        "  p.xaxis.major_label_text_font_size = xaxis_font_size\n",
+        "  p.xaxis.major_label_orientation = 0.25 * np.pi\n",
+        "  p.yaxis.major_label_text_font_size = yaxis_font_size\n",
+        "  p.min_border_right = 300\n",
+        "\n",
+        "  bokeh.io.output_notebook()\n",
+        "  bokeh.io.show(p)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gk2IRjZFGDsK"
+      },
+      "source": [
+        "This is additional boilerplate code where we import the pre-trained ML model we will use to encode text throughout this notebook."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "mkmF3w8WGLcM"
+      },
+      "outputs": [],
+      "source": [
+        "# The 16-language multilingual module is the default but feel free\n",
+        "# to pick others from the list and compare the results.\n",
+        "module_url = 'https://tfhub.dev/google/universal-sentence-encoder-multilingual/3' #@param ['https://tfhub.dev/google/universal-sentence-encoder-multilingual/3', 'https://tfhub.dev/google/universal-sentence-encoder-multilingual-large/3']\n",
+        "\n",
+        "model = hub.load(module_url)\n",
+        "\n",
+        "def embed_text(input):\n",
+        "  return model(input)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jhLPq6AROyFk"
+      },
+      "source": [
+        "# Visualize Text Similarity Between Languages\n",
+        "With the sentence embeddings now in hand, we can visualize semantic similarity across different languages."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8xdAogbxJDTD"
+      },
+      "source": [
+        "## Computing Text Embeddings\n",
+        "\n",
+        "We first define a set of sentences translated to various languages in parallel. Then, we precompute the embeddings for all of our sentences."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Q8F4LNGFqOiq"
+      },
+      "outputs": [],
+      "source": [
+        "# Some texts of different lengths in different languages.\n",
+        "arabic_sentences = ['كلب', 'الجراء لطيفة.', 'أستمتع بالمشي لمسافات طويلة على طول الشاطئ مع كلبي.']\n",
+        "chinese_sentences = ['狗', '小狗很好。', '我喜欢和我的狗一起沿着海滩散步。']\n",
+        "english_sentences = ['dog', 'Puppies are nice.', 'I enjoy taking long walks along the beach with my dog.']\n",
+        "french_sentences = ['chien', 'Les chiots sont gentils.', 'J\\'aime faire de longues promenades sur la plage avec mon chien.']\n",
+        "german_sentences = ['Hund', 'Welpen sind nett.', 'Ich genieße lange Spaziergänge am Strand entlang mit meinem Hund.']\n",
+        "italian_sentences = ['cane', 'I cuccioli sono carini.', 'Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane.']\n",
+        "japanese_sentences = ['犬', '子犬はいいです', '私は犬と一緒にビーチを散歩するのが好きです']\n",
+        "korean_sentences = ['개', '강아지가 좋다.', '나는 나의 개와 해변을 따라 길게 산책하는 것을 즐긴다.']\n",
+        "russian_sentences = ['собака', 'Милые щенки.', 'Мне нравится подолгу гулять по пляжу со своей собакой.']\n",
+        "spanish_sentences = ['perro', 'Los cachorros son agradables.', 'Disfruto de dar largos paseos por la playa con mi perro.']\n",
+        "\n",
+        "# Multilingual example\n",
+        "multilingual_example = [\"Willkommen zu einfachen, aber\", \"verrassend krachtige\", \"multilingüe\", \"compréhension du language naturel\", \"модели.\", \"大家是什么意思\" , \"보다 중요한\", \".اللغة التي يتحدثونها\"]\n",
+        "multilingual_example_in_en =  [\"Welcome to simple yet\", \"surprisingly powerful\", \"multilingual\", \"natural language understanding\", \"models.\", \"What people mean\", \"matters more than\", \"the language they speak.\"]\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "weXZqLtTJY9b"
+      },
+      "outputs": [],
+      "source": [
+        "# Compute embeddings.\n",
+        "ar_result = embed_text(arabic_sentences)\n",
+        "en_result = embed_text(english_sentences)\n",
+        "es_result = embed_text(spanish_sentences)\n",
+        "de_result = embed_text(german_sentences)\n",
+        "fr_result = embed_text(french_sentences)\n",
+        "it_result = embed_text(italian_sentences)\n",
+        "ja_result = embed_text(japanese_sentences)\n",
+        "ko_result = embed_text(korean_sentences)\n",
+        "ru_result = embed_text(russian_sentences)\n",
+        "zh_result = embed_text(chinese_sentences)\n",
+        "\n",
+        "multilingual_result = embed_text(multilingual_example)\n",
+        "multilingual_in_en_result = embed_text(multilingual_example_in_en)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_3zGWuF-GhUm"
+      },
+      "source": [
+        "## Visualizing Similarity\n",
+        "\n",
+        "With text embeddings in hand, we can take their dot-product to visualize how similar sentences are between languages. A darker color indicates the embeddings are semantically similar."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WOEIJA0mh70g"
+      },
+      "source": [
+        "### Multilingual Similarity"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "R2hbCMhmiDWR"
+      },
+      "outputs": [
+        {
+          "data": {
+            "application/javascript": [
+              "\n",
+              "(function(root) {\n",
+              "  function now() {\n",
+              "    return new Date();\n",
+              "  }\n",
+              "\n",
+              "  var force = true;\n",
+              "\n",
+              "  if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n",
+              "    root._bokeh_onload_callbacks = [];\n",
+              "    root._bokeh_is_loading = undefined;\n",
+              "  }\n",
+              "\n",
+              "  var JS_MIME_TYPE = 'application/javascript';\n",
+              "  var HTML_MIME_TYPE = 'text/html';\n",
+              "  var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n",
+              "  var CLASS_NAME = 'output_bokeh rendered_html';\n",
+              "\n",
+              "  /**\n",
+              "   * Render data to the DOM node\n",
+              "   */\n",
+              "  function render(props, node) {\n",
+              "    var script = document.createElement(\"script\");\n",
+              "    node.appendChild(script);\n",
+              "  }\n",
+              "\n",
+              "  /**\n",
+              "   * Handle when an output is cleared or removed\n",
+              "   */\n",
+              "  function handleClearOutput(event, handle) {\n",
+              "    var cell = handle.cell;\n",
+              "\n",
+              "    var id = cell.output_area._bokeh_element_id;\n",
+              "    var server_id = cell.output_area._bokeh_server_id;\n",
+              "    // Clean up Bokeh references\n",
+              "    if (id != null && id in Bokeh.index) {\n",
+              "      Bokeh.index[id].model.document.clear();\n",
+              "      delete Bokeh.index[id];\n",
+              "    }\n",
+              "\n",
+              "    if (server_id !== undefined) {\n",
+              "      // Clean up Bokeh references\n",
+              "      var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n",
+              "      cell.notebook.kernel.execute(cmd, {\n",
+              "        iopub: {\n",
+              "          output: function(msg) {\n",
+              "            var id = msg.content.text.trim();\n",
+              "            if (id in Bokeh.index) {\n",
+              "              Bokeh.index[id].model.document.clear();\n",
+              "              delete Bokeh.index[id];\n",
+              "            }\n",
+              "          }\n",
+              "        }\n",
+              "      });\n",
+              "      // Destroy server and session\n",
+              "      var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n",
+              "      cell.notebook.kernel.execute(cmd);\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "  /**\n",
+              "   * Handle when a new output is added\n",
+              "   */\n",
+              "  function handleAddOutput(event, handle) {\n",
+              "    var output_area = handle.output_area;\n",
+              "    var output = handle.output;\n",
+              "\n",
+              "    // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n",
+              "    if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n",
+              "      return\n",
+              "    }\n",
+              "\n",
+              "    var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n",
+              "\n",
+              "    if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n",
+              "      toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n",
+              "      // store reference to embed id on output_area\n",
+              "      output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n",
+              "    }\n",
+              "    if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n",
+              "      var bk_div = document.createElement(\"div\");\n",
+              "      bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n",
+              "      var script_attrs = bk_div.children[0].attributes;\n",
+              "      for (var i = 0; i < script_attrs.length; i++) {\n",
+              "        toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n",
+              "      }\n",
+              "      // store reference to server id on output_area\n",
+              "      output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "  function register_renderer(events, OutputArea) {\n",
+              "\n",
+              "    function append_mime(data, metadata, element) {\n",
+              "      // create a DOM node to render to\n",
+              "      var toinsert = this.create_output_subarea(\n",
+              "        metadata,\n",
+              "        CLASS_NAME,\n",
+              "        EXEC_MIME_TYPE\n",
+              "      );\n",
+              "      this.keyboard_manager.register_events(toinsert);\n",
+              "      // Render to node\n",
+              "      var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n",
+              "      render(props, toinsert[toinsert.length - 1]);\n",
+              "      element.append(toinsert);\n",
+              "      return toinsert\n",
+              "    }\n",
+              "\n",
+              "    /* Handle when an output is cleared or removed */\n",
+              "    events.on('clear_output.CodeCell', handleClearOutput);\n",
+              "    events.on('delete.Cell', handleClearOutput);\n",
+              "\n",
+              "    /* Handle when a new output is added */\n",
+              "    events.on('output_added.OutputArea', handleAddOutput);\n",
+              "\n",
+              "    /**\n",
+              "     * Register the mime type and append_mime function with output_area\n",
+              "     */\n",
+              "    OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n",
+              "      /* Is output safe? */\n",
+              "      safe: true,\n",
+              "      /* Index of renderer in `output_area.display_order` */\n",
+              "      index: 0\n",
+              "    });\n",
+              "  }\n",
+              "\n",
+              "  // register the mime type if in Jupyter Notebook environment and previously unregistered\n",
+              "  if (root.Jupyter !== undefined) {\n",
+              "    var events = require('base/js/events');\n",
+              "    var OutputArea = require('notebook/js/outputarea').OutputArea;\n",
+              "\n",
+              "    if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n",
+              "      register_renderer(events, OutputArea);\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "  \n",
+              "  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n",
+              "    root._bokeh_timeout = Date.now() + 5000;\n",
+              "    root._bokeh_failed_load = false;\n",
+              "  }\n",
+              "\n",
+              "  var NB_LOAD_WARNING = {'data': {'text/html':\n",
+              "     \"<div style='background-color: #fdd'>\\n\"+\n",
+              "     \"<p>\\n\"+\n",
+              "     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n",
+              "     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n",
+              "     \"</p>\\n\"+\n",
+              "     \"<ul>\\n\"+\n",
+              "     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n",
+              "     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n",
+              "     \"</ul>\\n\"+\n",
+              "     \"<code>\\n\"+\n",
+              "     \"from bokeh.resources import INLINE\\n\"+\n",
+              "     \"output_notebook(resources=INLINE)\\n\"+\n",
+              "     \"</code>\\n\"+\n",
+              "     \"</div>\"}};\n",
+              "\n",
+              "  function display_loaded() {\n",
+              "    var el = document.getElementById(null);\n",
+              "    if (el != null) {\n",
+              "      el.textContent = \"BokehJS is loading...\";\n",
+              "    }\n",
+              "    if (root.Bokeh !== undefined) {\n",
+              "      if (el != null) {\n",
+              "        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n",
+              "      }\n",
+              "    } else if (Date.now() < root._bokeh_timeout) {\n",
+              "      setTimeout(display_loaded, 100)\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "\n",
+              "  function run_callbacks() {\n",
+              "    try {\n",
+              "      root._bokeh_onload_callbacks.forEach(function(callback) {\n",
+              "        if (callback != null)\n",
+              "          callback();\n",
+              "      });\n",
+              "    } finally {\n",
+              "      delete root._bokeh_onload_callbacks\n",
+              "    }\n",
+              "    console.debug(\"Bokeh: all callbacks have finished\");\n",
+              "  }\n",
+              "\n",
+              "  function load_libs(css_urls, js_urls, callback) {\n",
+              "    if (css_urls == null) css_urls = [];\n",
+              "    if (js_urls == null) js_urls = [];\n",
+              "\n",
+              "    root._bokeh_onload_callbacks.push(callback);\n",
+              "    if (root._bokeh_is_loading > 0) {\n",
+              "      console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n",
+              "      return null;\n",
+              "    }\n",
+              "    if (js_urls == null || js_urls.length === 0) {\n",
+              "      run_callbacks();\n",
+              "      return null;\n",
+              "    }\n",
+              "    console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n",
+              "    root._bokeh_is_loading = css_urls.length + js_urls.length;\n",
+              "\n",
+              "    function on_load() {\n",
+              "      root._bokeh_is_loading--;\n",
+              "      if (root._bokeh_is_loading === 0) {\n",
+              "        console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n",
+              "        run_callbacks()\n",
+              "      }\n",
+              "    }\n",
+              "\n",
+              "    function on_error() {\n",
+              "      console.error(\"failed to load \" + url);\n",
+              "    }\n",
+              "\n",
+              "    for (var i = 0; i < css_urls.length; i++) {\n",
+              "      var url = css_urls[i];\n",
+              "      const element = document.createElement(\"link\");\n",
+              "      element.onload = on_load;\n",
+              "      element.onerror = on_error;\n",
+              "      element.rel = \"stylesheet\";\n",
+              "      element.type = \"text/css\";\n",
+              "      element.href = url;\n",
+              "      console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n",
+              "      document.body.appendChild(element);\n",
+              "    }\n",
+              "\n",
+              "    for (var i = 0; i < js_urls.length; i++) {\n",
+              "      var url = js_urls[i];\n",
+              "      var element = document.createElement('script');\n",
+              "      element.onload = on_load;\n",
+              "      element.onerror = on_error;\n",
+              "      element.async = false;\n",
+              "      element.src = url;\n",
+              "      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
+              "      document.head.appendChild(element);\n",
+              "    }\n",
+              "  };\n",
+              "\n",
+              "  function inject_raw_css(css) {\n",
+              "    const element = document.createElement(\"style\");\n",
+              "    element.appendChild(document.createTextNode(css));\n",
+              "    document.body.appendChild(element);\n",
+              "  }\n",
+              "\n",
+              "  \n",
+              "  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n",
+              "  var css_urls = [];\n",
+              "  \n",
+              "\n",
+              "  var inline_js = [\n",
+              "    function(Bokeh) {\n",
+              "      Bokeh.set_log_level(\"info\");\n",
+              "    },\n",
+              "    function(Bokeh) {\n",
+              "    \n",
+              "    \n",
+              "    }\n",
+              "  ];\n",
+              "\n",
+              "  function run_inline_js() {\n",
+              "    \n",
+              "    if (root.Bokeh !== undefined || force === true) {\n",
+              "      \n",
+              "    for (var i = 0; i < inline_js.length; i++) {\n",
+              "      inline_js[i].call(root, root.Bokeh);\n",
+              "    }\n",
+              "    } else if (Date.now() < root._bokeh_timeout) {\n",
+              "      setTimeout(run_inline_js, 100);\n",
+              "    } else if (!root._bokeh_failed_load) {\n",
+              "      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n",
+              "      root._bokeh_failed_load = true;\n",
+              "    } else if (force !== true) {\n",
+              "      var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n",
+              "      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n",
+              "    }\n",
+              "\n",
+              "  }\n",
+              "\n",
+              "  if (root._bokeh_is_loading === 0) {\n",
+              "    console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n",
+              "    run_inline_js();\n",
+              "  } else {\n",
+              "    load_libs(css_urls, js_urls, function() {\n",
+              "      console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n",
+              "      run_inline_js();\n",
+              "    });\n",
+              "  }\n",
+              "}(window));"
+            ],
+            "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n  function now() {\n    return new Date();\n  }\n\n  var force = true;\n\n  if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n    root._bokeh_onload_callbacks = [];\n    root._bokeh_is_loading = undefined;\n  }\n\n  \n\n  \n  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n    root._bokeh_timeout = Date.now() + 5000;\n    root._bokeh_failed_load = false;\n  }\n\n  var NB_LOAD_WARNING = {'data': {'text/html':\n     \"<div style='background-color: #fdd'>\\n\"+\n     \"<p>\\n\"+\n     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n     \"</p>\\n\"+\n     \"<ul>\\n\"+\n     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n     \"</ul>\\n\"+\n     \"<code>\\n\"+\n     \"from bokeh.resources import INLINE\\n\"+\n     \"output_notebook(resources=INLINE)\\n\"+\n     \"</code>\\n\"+\n     \"</div>\"}};\n\n  function display_loaded() {\n    var el = document.getElementById(null);\n    if (el != null) {\n      el.textContent = \"BokehJS is loading...\";\n    }\n    if (root.Bokeh !== undefined) {\n      if (el != null) {\n        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n      }\n    } else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(display_loaded, 100)\n    }\n  }\n\n\n  function run_callbacks() {\n    try {\n      root._bokeh_onload_callbacks.forEach(function(callback) {\n        if (callback != null)\n          callback();\n      });\n    } finally {\n      delete root._bokeh_onload_callbacks\n    }\n    console.debug(\"Bokeh: all callbacks have finished\");\n  }\n\n  function load_libs(css_urls, js_urls, callback) {\n    if (css_urls == null) css_urls = [];\n    if (js_urls == null) js_urls = [];\n\n    root._bokeh_onload_callbacks.push(callback);\n    if (root._bokeh_is_loading > 0) {\n      console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n      return null;\n    }\n    if (js_urls == null || js_urls.length === 0) {\n      run_callbacks();\n      return null;\n    }\n    console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n    root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n    function on_load() {\n      root._bokeh_is_loading--;\n      if (root._bokeh_is_loading === 0) {\n        console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n        run_callbacks()\n      }\n    }\n\n    function on_error() {\n      console.error(\"failed to load \" + url);\n    }\n\n    for (var i = 0; i < css_urls.length; i++) {\n      var url = css_urls[i];\n      const element = document.createElement(\"link\");\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.rel = \"stylesheet\";\n      element.type = \"text/css\";\n      element.href = url;\n      console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n      document.body.appendChild(element);\n    }\n\n    for (var i = 0; i < js_urls.length; i++) {\n      var url = js_urls[i];\n      var element = document.createElement('script');\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.async = false;\n      element.src = url;\n      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n      document.head.appendChild(element);\n    }\n  };\n\n  function inject_raw_css(css) {\n    const element = document.createElement(\"style\");\n    element.appendChild(document.createTextNode(css));\n    document.body.appendChild(element);\n  }\n\n  \n  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n  var css_urls = [];\n  \n\n  var inline_js = [\n    function(Bokeh) {\n      Bokeh.set_log_level(\"info\");\n    },\n    function(Bokeh) {\n    \n    \n    }\n  ];\n\n  function run_inline_js() {\n    \n    if (root.Bokeh !== undefined || force === true) {\n      \n    for (var i = 0; i < inline_js.length; i++) {\n      inline_js[i].call(root, root.Bokeh);\n    }\n    } else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(run_inline_js, 100);\n    } else if (!root._bokeh_failed_load) {\n      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n      root._bokeh_failed_load = true;\n    } else if (force !== true) {\n      var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n    }\n\n  }\n\n  if (root._bokeh_is_loading === 0) {\n    console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n    run_inline_js();\n  } else {\n    load_libs(css_urls, js_urls, function() {\n      console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n      run_inline_js();\n    });\n  }\n}(window));"
+          },
+          "metadata": {
+            "tags": []
+          },
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "\n",
+              "\n",
+              "\n",
+              "\n",
+              "\n",
+              "\n",
+              "  <div class=\"bk-root\" id=\"c23bc7cf-8f04-4fa0-a38c-20c29e5098b9\" data-root-id=\"1002\"></div>\n"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/javascript": [
+              "(function(root) {\n",
+              "  function embed_document(root) {\n",
+              "    \n",
+              "  var docs_json = {\"b33996e2-7bdd-4097-888b-8bf79a526bff\":{\"roots\":{\"references\":[{\"attributes\":{\"above\":[{\"id\":\"1013\",\"type\":\"CategoricalAxis\"}],\"center\":[{\"id\":\"1016\",\"type\":\"Grid\"},{\"id\":\"1020\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1017\",\"type\":\"CategoricalAxis\"}],\"min_border_right\":300,\"plot_width\":1200,\"renderers\":[{\"id\":\"1030\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1003\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1023\",\"type\":\"Toolbar\"},\"toolbar_location\":\"below\",\"x_range\":{\"id\":\"1005\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"1009\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"1007\",\"type\":\"FactorRange\"},\"y_scale\":{\"id\":\"1011\",\"type\":\"CategoricalScale\"}},\"id\":\"1002\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{\"callback\":null,\"data\":{\"embeddings_1\":[\"Welcome to simple yet\",\"Welcome to simple yet\",\"Welcome to simple yet\",\"Welcome to simple yet\",\"Welcome to simple yet\",\"Welcome to simple yet\",\"Welcome to simple yet\",\"Welcome to simple yet\",\"surprisingly powerful\",\"surprisingly powerful\",\"surprisingly powerful\",\"surprisingly powerful\",\"surprisingly powerful\",\"surprisingly powerful\",\"surprisingly powerful\",\"surprisingly powerful\",\"multilingual\",\"multilingual\",\"multilingual\",\"multilingual\",\"multilingual\",\"multilingual\",\"multilingual\",\"multilingual\",\"natural language understanding\",\"natural language understanding\",\"natural language understanding\",\"natural language understanding\",\"natural language understanding\",\"natural language understanding\",\"natural language understanding\",\"natural language understanding\",\"models.\",\"models.\",\"models.\",\"models.\",\"models.\",\"models.\",\"models.\",\"models.\",\"What people mean\",\"What people mean\",\"What people mean\",\"What people mean\",\"What people mean\",\"What people mean\",\"What people mean\",\"What people mean\",\"matters more than\",\"matters more than\",\"matters more than\",\"matters more than\",\"matters more than\",\"matters more than\",\"matters more than\",\"matters more than\",\"the language they speak.\",\"the language they speak.\",\"the language they speak.\",\"the language they speak.\",\"the language they speak.\",\"the language they speak.\",\"the language they speak.\",\"the language they speak.\"],\"embeddings_2\":[\"Willkommen zu einfachen, aber\",\"verrassend krachtige\",\"multiling\\u00fce\",\"compr\\u00e9hension du langage naturel\",\"\\u043c\\u043e\\u0434\\u0435\\u043b\\u0438.\",\"\\u5927\\u5bb6\\u662f\\u4ec0\\u4e48\\u610f\\u601d\",\"\\ubcf4\\ub2e4 \\uc911\\uc694\\ud55c\",\".\\u0627\\u0644\\u0644\\u063a\\u0629 \\u0627\\u0644\\u062a\\u064a \\u064a\\u062a\\u062d\\u062f\\u062b\\u0648\\u0646\\u0647\\u0627\",\"Willkommen zu einfachen, aber\",\"verrassend krachtige\",\"multiling\\u00fce\",\"compr\\u00e9hension du langage naturel\",\"\\u043c\\u043e\\u0434\\u0435\\u043b\\u0438.\",\"\\u5927\\u5bb6\\u662f\\u4ec0\\u4e48\\u610f\\u601d\",\"\\ubcf4\\ub2e4 \\uc911\\uc694\\ud55c\",\".\\u0627\\u0644\\u0644\\u063a\\u0629 \\u0627\\u0644\\u062a\\u064a \\u064a\\u062a\\u062d\\u062f\\u062b\\u0648\\u0646\\u0647\\u0627\",\"Willkommen zu einfachen, aber\",\"verrassend krachtige\",\"multiling\\u00fce\",\"compr\\u00e9hension du langage naturel\",\"\\u043c\\u043e\\u0434\\u0435\\u043b\\u0438.\",\"\\u5927\\u5bb6\\u662f\\u4ec0\\u4e48\\u610f\\u601d\",\"\\ubcf4\\ub2e4 \\uc911\\uc694\\ud55c\",\".\\u0627\\u0644\\u0644\\u063a\\u0629 \\u0627\\u0644\\u062a\\u064a \\u064a\\u062a\\u062d\\u062f\\u062b\\u0648\\u0646\\u0647\\u0627\",\"Willkommen zu einfachen, aber\",\"verrassend krachtige\",\"multiling\\u00fce\",\"compr\\u00e9hension du langage naturel\",\"\\u043c\\u043e\\u0434\\u0435\\u043b\\u0438.\",\"\\u5927\\u5bb6\\u662f\\u4ec0\\u4e48\\u610f\\u601d\",\"\\ubcf4\\ub2e4 \\uc911\\uc694\\ud55c\",\".\\u0627\\u0644\\u0644\\u063a\\u0629 \\u0627\\u0644\\u062a\\u064a \\u064a\\u062a\\u062d\\u062f\\u062b\\u0648\\u0646\\u0647\\u0627\",\"Willkommen zu einfachen, aber\",\"verrassend krachtige\",\"multiling\\u00fce\",\"compr\\u00e9hension du langage naturel\",\"\\u043c\\u043e\\u0434\\u0435\\u043b\\u0438.\",\"\\u5927\\u5bb6\\u662f\\u4ec0\\u4e48\\u610f\\u601d\",\"\\ubcf4\\ub2e4 \\uc911\\uc694\\ud55c\",\".\\u0627\\u0644\\u0644\\u063a\\u0629 \\u0627\\u0644\\u062a\\u064a \\u064a\\u062a\\u062d\\u062f\\u062b\\u0648\\u0646\\u0647\\u0627\",\"Willkommen zu einfachen, aber\",\"verrassend krachtige\",\"multiling\\u00fce\",\"compr\\u00e9hension du langage naturel\",\"\\u043c\\u043e\\u0434\\u0435\\u043b\\u0438.\",\"\\u5927\\u5bb6\\u662f\\u4ec0\\u4e48\\u610f\\u601d\",\"\\ubcf4\\ub2e4 \\uc911\\uc694\\ud55c\",\".\\u0627\\u0644\\u0644\\u063a\\u0629 \\u0627\\u0644\\u062a\\u064a \\u064a\\u062a\\u062d\\u062f\\u062b\\u0648\\u0646\\u0647\\u0627\",\"Willkommen zu einfachen, aber\",\"verrassend krachtige\",\"multiling\\u00fce\",\"compr\\u00e9hension du langage naturel\",\"\\u043c\\u043e\\u0434\\u0435\\u043b\\u0438.\",\"\\u5927\\u5bb6\\u662f\\u4ec0\\u4e48\\u610f\\u601d\",\"\\ubcf4\\ub2e4 \\uc911\\uc694\\ud55c\",\".\\u0627\\u0644\\u0644\\u063a\\u0629 \\u0627\\u0644\\u062a\\u064a \\u064a\\u062a\\u062d\\u062f\\u062b\\u0648\\u0646\\u0647\\u0627\",\"Willkommen zu einfachen, aber\",\"verrassend krachtige\",\"multiling\\u00fce\",\"compr\\u00e9hension du langage naturel\",\"\\u043c\\u043e\\u0434\\u0435\\u043b\\u0438.\",\"\\u5927\\u5bb6\\u662f\\u4ec0\\u4e48\\u610f\\u601d\",\"\\ubcf4\\ub2e4 \\uc911\\uc694\\ud55c\",\".\\u0627\\u0644\\u0644\\u063a\\u0629 \\u0627\\u0644\\u062a\\u064a \\u064a\\u062a\\u062d\\u062f\\u062b\\u0648\\u0646\\u0647\\u0627\"],\"index\":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63],\"sim\":{\"__ndarray__\":\"AAAAwK4B6j8AAACgVl7iPwAAAMD5l94/AAAAQOVr4T8AAABA8/TfPwAAACDzG+A/AAAAAIa+4T8AAABAMivgPwAAAKBdieE/AAAAgBBH6T8AAAAA5griPwAAAIAUkeE/AAAAAJhp4D8AAACAkSTgPwAAAICvreI/AAAAgIcw4j8AAAAAELXfPwAAAKD19uE/AAAAwH/P6T8AAADAZbjjPwAAAABaX+I/AAAAgG6I4T8AAACAx/viPwAAACBrc+Y/AAAAQApe4T8AAACA32XiPwAAAAD0j+M/AAAAADsK7D8AAADgBnHhPwAAAAAIZeE/AAAAAHZm4T8AAACgXprmPwAAAAB5Jt8/AAAAQMci4D8AAACgPI7hPwAAAKBmSOE/AAAAQArv7D8AAABA7O7fPwAAAID9d+E/AAAAgIRv4T8AAACAN4PgPwAAAMD/yOA/AAAAgEIH4j8AAADAAFbiPwAAAMDwOuE/AAAAQE1v5j8AAABg2bLiPwAAAMDSyOI/AAAAwLXb4D8AAACAztLhPwAAAOB1HeI/AAAAgCd/4T8AAAAg1xHhPwAAAAAPQeE/AAAAAELU6T8AAAAgZjLiPwAAAACyYd8/AAAA4FKw4D8AAACgbUHkPwAAAEDcVeU/AAAAIHV34j8AAAAARznhPwAAAOCXLOE/AAAAQP8H6T8=\",\"dtype\":\"float64\",\"shape\":[64]}},\"selected\":{\"id\":\"1037\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1038\",\"type\":\"UnionRenderers\"}},\"id\":\"1026\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"text\":\"Multilingual Universal Sentence Encoder for Semantic Retrieval (Yang et al., 2019)\",\"text_font_size\":{\"value\":\"12pt\"}},\"id\":\"1003\",\"type\":\"Title\"},{\"attributes\":{},\"id\":\"1033\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"callback\":null,\"factors\":[\".\\u0627\\u0644\\u0644\\u063a\\u0629 \\u0627\\u0644\\u062a\\u064a \\u064a\\u062a\\u062d\\u062f\\u062b\\u0648\\u0646\\u0647\\u0627\",\"\\ubcf4\\ub2e4 \\uc911\\uc694\\ud55c\",\"\\u5927\\u5bb6\\u662f\\u4ec0\\u4e48\\u610f\\u601d\",\"\\u043c\\u043e\\u0434\\u0435\\u043b\\u0438.\",\"compr\\u00e9hension du langage naturel\",\"multiling\\u00fce\",\"verrassend krachtige\",\"Willkommen zu einfachen, aber\"]},\"id\":\"1007\",\"type\":\"FactorRange\"},{\"attributes\":{\"callback\":null,\"factors\":[\"Welcome to simple yet\",\"surprisingly powerful\",\"multilingual\",\"natural language understanding\",\"models.\",\"What people mean\",\"matters more than\",\"the language they speak.\"]},\"id\":\"1005\",\"type\":\"FactorRange\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1021\",\"type\":\"SaveTool\"},{\"id\":\"1022\",\"type\":\"HoverTool\"}]},\"id\":\"1023\",\"type\":\"Toolbar\"},{\"attributes\":{\"data_source\":{\"id\":\"1026\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1028\",\"type\":\"Rect\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1029\",\"type\":\"Rect\"},\"selection_glyph\":null,\"view\":{\"id\":\"1031\",\"type\":\"CDSView\"}},\"id\":\"1030\",\"type\":\"GlyphRenderer\"},{\"attributes\":{},\"id\":\"1009\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"source\":{\"id\":\"1026\",\"type\":\"ColumnDataSource\"}},\"id\":\"1031\",\"type\":\"CDSView\"},{\"attributes\":{},\"id\":\"1011\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1035\",\"type\":\"CategoricalTickFormatter\"},\"major_label_orientation\":0.7853981633974483,\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1014\",\"type\":\"CategoricalTicker\"}},\"id\":\"1013\",\"type\":\"CategoricalAxis\"},{\"attributes\":{\"ticker\":{\"id\":\"1014\",\"type\":\"CategoricalTicker\"}},\"id\":\"1016\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"1014\",\"type\":\"CategoricalTicker\"},{\"attributes\":{},\"id\":\"1037\",\"type\":\"Selection\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1033\",\"type\":\"CategoricalTickFormatter\"},\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1018\",\"type\":\"CategoricalTicker\"}},\"id\":\"1017\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1038\",\"type\":\"UnionRenderers\"},{\"attributes\":{},\"id\":\"1018\",\"type\":\"CategoricalTicker\"},{\"attributes\":{},\"id\":\"1035\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1018\",\"type\":\"CategoricalTicker\"}},\"id\":\"1020\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"1021\",\"type\":\"SaveTool\"},{\"attributes\":{\"callback\":null,\"tooltips\":[[\"pair\",\"@embeddings_1 ||| @embeddings_2\"],[\"sim\",\"@sim\"]]},\"id\":\"1022\",\"type\":\"HoverTool\"},{\"attributes\":{\"fill_color\":{\"field\":\"sim\",\"transform\":{\"id\":\"1001\",\"type\":\"LinearColorMapper\"}},\"height\":{\"units\":\"data\",\"value\":1},\"line_color\":{\"value\":null},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1028\",\"type\":\"Rect\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"height\":{\"units\":\"data\",\"value\":1},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1029\",\"type\":\"Rect\"},{\"attributes\":{\"high\":0.9041796922683716,\"low\":0.4780258536338806,\"palette\":[\"#ffffcc\",\"#ffeda0\",\"#fed976\",\"#feb24c\",\"#fd8d3c\",\"#fc4e2a\",\"#e31a1c\",\"#bd0026\",\"#800026\"]},\"id\":\"1001\",\"type\":\"LinearColorMapper\"}],\"root_ids\":[\"1002\"]},\"title\":\"Bokeh Application\",\"version\":\"1.4.0\"}};\n",
+              "  var render_items = [{\"docid\":\"b33996e2-7bdd-4097-888b-8bf79a526bff\",\"roots\":{\"1002\":\"c23bc7cf-8f04-4fa0-a38c-20c29e5098b9\"}}];\n",
+              "  root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n",
+              "\n",
+              "  }\n",
+              "  if (root.Bokeh !== undefined) {\n",
+              "    embed_document(root);\n",
+              "  } else {\n",
+              "    var attempts = 0;\n",
+              "    var timer = setInterval(function(root) {\n",
+              "      if (root.Bokeh !== undefined) {\n",
+              "        clearInterval(timer);\n",
+              "        embed_document(root);\n",
+              "      } else {\n",
+              "        attempts++;\n",
+              "        if (attempts > 100) {\n",
+              "          clearInterval(timer);\n",
+              "          console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n",
+              "        }\n",
+              "      }\n",
+              "    }, 10, root)\n",
+              "  }\n",
+              "})(window);"
+            ],
+            "application/vnd.bokehjs_exec.v0+json": ""
+          },
+          "metadata": {
+            "application/vnd.bokehjs_exec.v0+json": {
+              "id": "1002"
+            },
+            "tags": []
+          },
+          "output_type": "display_data"
+        }
+      ],
+      "source": [
+        "visualize_similarity(multilingual_in_en_result, multilingual_result,\n",
+        "                     multilingual_example_in_en, multilingual_example,  \"Multilingual Universal Sentence Encoder for Semantic Retrieval (Yang et al., 2019)\")\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "h3TEhllsq3ax"
+      },
+      "source": [
+        "### English-Arabic Similarity"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Q9UDpStmq7Ii"
+      },
+      "outputs": [
+        {
+          "data": {
+            "application/javascript": [
+              "\n",
+              "(function(root) {\n",
+              "  function now() {\n",
+              "    return new Date();\n",
+              "  }\n",
+              "\n",
+              "  var force = true;\n",
+              "\n",
+              "  if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n",
+              "    root._bokeh_onload_callbacks = [];\n",
+              "    root._bokeh_is_loading = undefined;\n",
+              "  }\n",
+              "\n",
+              "  var JS_MIME_TYPE = 'application/javascript';\n",
+              "  var HTML_MIME_TYPE = 'text/html';\n",
+              "  var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n",
+              "  var CLASS_NAME = 'output_bokeh rendered_html';\n",
+              "\n",
+              "  /**\n",
+              "   * Render data to the DOM node\n",
+              "   */\n",
+              "  function render(props, node) {\n",
+              "    var script = document.createElement(\"script\");\n",
+              "    node.appendChild(script);\n",
+              "  }\n",
+              "\n",
+              "  /**\n",
+              "   * Handle when an output is cleared or removed\n",
+              "   */\n",
+              "  function handleClearOutput(event, handle) {\n",
+              "    var cell = handle.cell;\n",
+              "\n",
+              "    var id = cell.output_area._bokeh_element_id;\n",
+              "    var server_id = cell.output_area._bokeh_server_id;\n",
+              "    // Clean up Bokeh references\n",
+              "    if (id != null && id in Bokeh.index) {\n",
+              "      Bokeh.index[id].model.document.clear();\n",
+              "      delete Bokeh.index[id];\n",
+              "    }\n",
+              "\n",
+              "    if (server_id !== undefined) {\n",
+              "      // Clean up Bokeh references\n",
+              "      var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n",
+              "      cell.notebook.kernel.execute(cmd, {\n",
+              "        iopub: {\n",
+              "          output: function(msg) {\n",
+              "            var id = msg.content.text.trim();\n",
+              "            if (id in Bokeh.index) {\n",
+              "              Bokeh.index[id].model.document.clear();\n",
+              "              delete Bokeh.index[id];\n",
+              "            }\n",
+              "          }\n",
+              "        }\n",
+              "      });\n",
+              "      // Destroy server and session\n",
+              "      var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n",
+              "      cell.notebook.kernel.execute(cmd);\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "  /**\n",
+              "   * Handle when a new output is added\n",
+              "   */\n",
+              "  function handleAddOutput(event, handle) {\n",
+              "    var output_area = handle.output_area;\n",
+              "    var output = handle.output;\n",
+              "\n",
+              "    // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n",
+              "    if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n",
+              "      return\n",
+              "    }\n",
+              "\n",
+              "    var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n",
+              "\n",
+              "    if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n",
+              "      toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n",
+              "      // store reference to embed id on output_area\n",
+              "      output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n",
+              "    }\n",
+              "    if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n",
+              "      var bk_div = document.createElement(\"div\");\n",
+              "      bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n",
+              "      var script_attrs = bk_div.children[0].attributes;\n",
+              "      for (var i = 0; i < script_attrs.length; i++) {\n",
+              "        toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n",
+              "      }\n",
+              "      // store reference to server id on output_area\n",
+              "      output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "  function register_renderer(events, OutputArea) {\n",
+              "\n",
+              "    function append_mime(data, metadata, element) {\n",
+              "      // create a DOM node to render to\n",
+              "      var toinsert = this.create_output_subarea(\n",
+              "        metadata,\n",
+              "        CLASS_NAME,\n",
+              "        EXEC_MIME_TYPE\n",
+              "      );\n",
+              "      this.keyboard_manager.register_events(toinsert);\n",
+              "      // Render to node\n",
+              "      var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n",
+              "      render(props, toinsert[toinsert.length - 1]);\n",
+              "      element.append(toinsert);\n",
+              "      return toinsert\n",
+              "    }\n",
+              "\n",
+              "    /* Handle when an output is cleared or removed */\n",
+              "    events.on('clear_output.CodeCell', handleClearOutput);\n",
+              "    events.on('delete.Cell', handleClearOutput);\n",
+              "\n",
+              "    /* Handle when a new output is added */\n",
+              "    events.on('output_added.OutputArea', handleAddOutput);\n",
+              "\n",
+              "    /**\n",
+              "     * Register the mime type and append_mime function with output_area\n",
+              "     */\n",
+              "    OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n",
+              "      /* Is output safe? */\n",
+              "      safe: true,\n",
+              "      /* Index of renderer in `output_area.display_order` */\n",
+              "      index: 0\n",
+              "    });\n",
+              "  }\n",
+              "\n",
+              "  // register the mime type if in Jupyter Notebook environment and previously unregistered\n",
+              "  if (root.Jupyter !== undefined) {\n",
+              "    var events = require('base/js/events');\n",
+              "    var OutputArea = require('notebook/js/outputarea').OutputArea;\n",
+              "\n",
+              "    if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n",
+              "      register_renderer(events, OutputArea);\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "  \n",
+              "  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n",
+              "    root._bokeh_timeout = Date.now() + 5000;\n",
+              "    root._bokeh_failed_load = false;\n",
+              "  }\n",
+              "\n",
+              "  var NB_LOAD_WARNING = {'data': {'text/html':\n",
+              "     \"<div style='background-color: #fdd'>\\n\"+\n",
+              "     \"<p>\\n\"+\n",
+              "     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n",
+              "     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n",
+              "     \"</p>\\n\"+\n",
+              "     \"<ul>\\n\"+\n",
+              "     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n",
+              "     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n",
+              "     \"</ul>\\n\"+\n",
+              "     \"<code>\\n\"+\n",
+              "     \"from bokeh.resources import INLINE\\n\"+\n",
+              "     \"output_notebook(resources=INLINE)\\n\"+\n",
+              "     \"</code>\\n\"+\n",
+              "     \"</div>\"}};\n",
+              "\n",
+              "  function display_loaded() {\n",
+              "    var el = document.getElementById(null);\n",
+              "    if (el != null) {\n",
+              "      el.textContent = \"BokehJS is loading...\";\n",
+              "    }\n",
+              "    if (root.Bokeh !== undefined) {\n",
+              "      if (el != null) {\n",
+              "        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n",
+              "      }\n",
+              "    } else if (Date.now() < root._bokeh_timeout) {\n",
+              "      setTimeout(display_loaded, 100)\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "\n",
+              "  function run_callbacks() {\n",
+              "    try {\n",
+              "      root._bokeh_onload_callbacks.forEach(function(callback) {\n",
+              "        if (callback != null)\n",
+              "          callback();\n",
+              "      });\n",
+              "    } finally {\n",
+              "      delete root._bokeh_onload_callbacks\n",
+              "    }\n",
+              "    console.debug(\"Bokeh: all callbacks have finished\");\n",
+              "  }\n",
+              "\n",
+              "  function load_libs(css_urls, js_urls, callback) {\n",
+              "    if (css_urls == null) css_urls = [];\n",
+              "    if (js_urls == null) js_urls = [];\n",
+              "\n",
+              "    root._bokeh_onload_callbacks.push(callback);\n",
+              "    if (root._bokeh_is_loading > 0) {\n",
+              "      console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n",
+              "      return null;\n",
+              "    }\n",
+              "    if (js_urls == null || js_urls.length === 0) {\n",
+              "      run_callbacks();\n",
+              "      return null;\n",
+              "    }\n",
+              "    console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n",
+              "    root._bokeh_is_loading = css_urls.length + js_urls.length;\n",
+              "\n",
+              "    function on_load() {\n",
+              "      root._bokeh_is_loading--;\n",
+              "      if (root._bokeh_is_loading === 0) {\n",
+              "        console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n",
+              "        run_callbacks()\n",
+              "      }\n",
+              "    }\n",
+              "\n",
+              "    function on_error() {\n",
+              "      console.error(\"failed to load \" + url);\n",
+              "    }\n",
+              "\n",
+              "    for (var i = 0; i < css_urls.length; i++) {\n",
+              "      var url = css_urls[i];\n",
+              "      const element = document.createElement(\"link\");\n",
+              "      element.onload = on_load;\n",
+              "      element.onerror = on_error;\n",
+              "      element.rel = \"stylesheet\";\n",
+              "      element.type = \"text/css\";\n",
+              "      element.href = url;\n",
+              "      console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n",
+              "      document.body.appendChild(element);\n",
+              "    }\n",
+              "\n",
+              "    for (var i = 0; i < js_urls.length; i++) {\n",
+              "      var url = js_urls[i];\n",
+              "      var element = document.createElement('script');\n",
+              "      element.onload = on_load;\n",
+              "      element.onerror = on_error;\n",
+              "      element.async = false;\n",
+              "      element.src = url;\n",
+              "      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
+              "      document.head.appendChild(element);\n",
+              "    }\n",
+              "  };\n",
+              "\n",
+              "  function inject_raw_css(css) {\n",
+              "    const element = document.createElement(\"style\");\n",
+              "    element.appendChild(document.createTextNode(css));\n",
+              "    document.body.appendChild(element);\n",
+              "  }\n",
+              "\n",
+              "  \n",
+              "  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n",
+              "  var css_urls = [];\n",
+              "  \n",
+              "\n",
+              "  var inline_js = [\n",
+              "    function(Bokeh) {\n",
+              "      Bokeh.set_log_level(\"info\");\n",
+              "    },\n",
+              "    function(Bokeh) {\n",
+              "    \n",
+              "    \n",
+              "    }\n",
+              "  ];\n",
+              "\n",
+              "  function run_inline_js() {\n",
+              "    \n",
+              "    if (root.Bokeh !== undefined || force === true) {\n",
+              "      \n",
+              "    for (var i = 0; i < inline_js.length; i++) {\n",
+              "      inline_js[i].call(root, root.Bokeh);\n",
+              "    }\n",
+              "    } else if (Date.now() < root._bokeh_timeout) {\n",
+              "      setTimeout(run_inline_js, 100);\n",
+              "    } else if (!root._bokeh_failed_load) {\n",
+              "      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n",
+              "      root._bokeh_failed_load = true;\n",
+              "    } else if (force !== true) {\n",
+              "      var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n",
+              "      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n",
+              "    }\n",
+              "\n",
+              "  }\n",
+              "\n",
+              "  if (root._bokeh_is_loading === 0) {\n",
+              "    console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n",
+              "    run_inline_js();\n",
+              "  } else {\n",
+              "    load_libs(css_urls, js_urls, function() {\n",
+              "      console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n",
+              "      run_inline_js();\n",
+              "    });\n",
+              "  }\n",
+              "}(window));"
+            ],
+            "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n  function now() {\n    return new Date();\n  }\n\n  var force = true;\n\n  if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n    root._bokeh_onload_callbacks = [];\n    root._bokeh_is_loading = undefined;\n  }\n\n  \n\n  \n  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n    root._bokeh_timeout = Date.now() + 5000;\n    root._bokeh_failed_load = false;\n  }\n\n  var NB_LOAD_WARNING = {'data': {'text/html':\n     \"<div style='background-color: #fdd'>\\n\"+\n     \"<p>\\n\"+\n     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n     \"</p>\\n\"+\n     \"<ul>\\n\"+\n     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n     \"</ul>\\n\"+\n     \"<code>\\n\"+\n     \"from bokeh.resources import INLINE\\n\"+\n     \"output_notebook(resources=INLINE)\\n\"+\n     \"</code>\\n\"+\n     \"</div>\"}};\n\n  function display_loaded() {\n    var el = document.getElementById(null);\n    if (el != null) {\n      el.textContent = \"BokehJS is loading...\";\n    }\n    if (root.Bokeh !== undefined) {\n      if (el != null) {\n        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n      }\n    } else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(display_loaded, 100)\n    }\n  }\n\n\n  function run_callbacks() {\n    try {\n      root._bokeh_onload_callbacks.forEach(function(callback) {\n        if (callback != null)\n          callback();\n      });\n    } finally {\n      delete root._bokeh_onload_callbacks\n    }\n    console.debug(\"Bokeh: all callbacks have finished\");\n  }\n\n  function load_libs(css_urls, js_urls, callback) {\n    if (css_urls == null) css_urls = [];\n    if (js_urls == null) js_urls = [];\n\n    root._bokeh_onload_callbacks.push(callback);\n    if (root._bokeh_is_loading > 0) {\n      console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n      return null;\n    }\n    if (js_urls == null || js_urls.length === 0) {\n      run_callbacks();\n      return null;\n    }\n    console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n    root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n    function on_load() {\n      root._bokeh_is_loading--;\n      if (root._bokeh_is_loading === 0) {\n        console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n        run_callbacks()\n      }\n    }\n\n    function on_error() {\n      console.error(\"failed to load \" + url);\n    }\n\n    for (var i = 0; i < css_urls.length; i++) {\n      var url = css_urls[i];\n      const element = document.createElement(\"link\");\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.rel = \"stylesheet\";\n      element.type = \"text/css\";\n      element.href = url;\n      console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n      document.body.appendChild(element);\n    }\n\n    for (var i = 0; i < js_urls.length; i++) {\n      var url = js_urls[i];\n      var element = document.createElement('script');\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.async = false;\n      element.src = url;\n      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n      document.head.appendChild(element);\n    }\n  };\n\n  function inject_raw_css(css) {\n    const element = document.createElement(\"style\");\n    element.appendChild(document.createTextNode(css));\n    document.body.appendChild(element);\n  }\n\n  \n  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n  var css_urls = [];\n  \n\n  var inline_js = [\n    function(Bokeh) {\n      Bokeh.set_log_level(\"info\");\n    },\n    function(Bokeh) {\n    \n    \n    }\n  ];\n\n  function run_inline_js() {\n    \n    if (root.Bokeh !== undefined || force === true) {\n      \n    for (var i = 0; i < inline_js.length; i++) {\n      inline_js[i].call(root, root.Bokeh);\n    }\n    } else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(run_inline_js, 100);\n    } else if (!root._bokeh_failed_load) {\n      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n      root._bokeh_failed_load = true;\n    } else if (force !== true) {\n      var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n    }\n\n  }\n\n  if (root._bokeh_is_loading === 0) {\n    console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n    run_inline_js();\n  } else {\n    load_libs(css_urls, js_urls, function() {\n      console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n      run_inline_js();\n    });\n  }\n}(window));"
+          },
+          "metadata": {
+            "tags": []
+          },
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "\n",
+              "\n",
+              "\n",
+              "\n",
+              "\n",
+              "\n",
+              "  <div class=\"bk-root\" id=\"d20b027b-6bf0-444d-8763-df8d299e1642\" data-root-id=\"1082\"></div>\n"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/javascript": [
+              "(function(root) {\n",
+              "  function embed_document(root) {\n",
+              "    \n",
+              "  var docs_json = {\"a8ebb685-fc25-4a41-b1f1-80df179ccab5\":{\"roots\":{\"references\":[{\"attributes\":{\"above\":[{\"id\":\"1093\",\"type\":\"CategoricalAxis\"}],\"center\":[{\"id\":\"1096\",\"type\":\"Grid\"},{\"id\":\"1100\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1097\",\"type\":\"CategoricalAxis\"}],\"min_border_right\":300,\"plot_width\":1200,\"renderers\":[{\"id\":\"1110\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1083\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1103\",\"type\":\"Toolbar\"},\"toolbar_location\":\"below\",\"x_range\":{\"id\":\"1085\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"1089\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"1087\",\"type\":\"FactorRange\"},\"y_scale\":{\"id\":\"1091\",\"type\":\"CategoricalScale\"}},\"id\":\"1082\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{\"data_source\":{\"id\":\"1106\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1108\",\"type\":\"Rect\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1109\",\"type\":\"Rect\"},\"selection_glyph\":null,\"view\":{\"id\":\"1111\",\"type\":\"CDSView\"}},\"id\":\"1110\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"source\":{\"id\":\"1106\",\"type\":\"ColumnDataSource\"}},\"id\":\"1111\",\"type\":\"CDSView\"},{\"attributes\":{},\"id\":\"1120\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"height\":{\"units\":\"data\",\"value\":1},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1109\",\"type\":\"Rect\"},{\"attributes\":{\"callback\":null,\"tooltips\":[[\"pair\",\"@embeddings_1 ||| @embeddings_2\"],[\"sim\",\"@sim\"]]},\"id\":\"1102\",\"type\":\"HoverTool\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1101\",\"type\":\"SaveTool\"},{\"id\":\"1102\",\"type\":\"HoverTool\"}]},\"id\":\"1103\",\"type\":\"Toolbar\"},{\"attributes\":{},\"id\":\"1094\",\"type\":\"CategoricalTicker\"},{\"attributes\":{},\"id\":\"1098\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1098\",\"type\":\"CategoricalTicker\"}},\"id\":\"1100\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"1122\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{},\"id\":\"1124\",\"type\":\"Selection\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1120\",\"type\":\"CategoricalTickFormatter\"},\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1098\",\"type\":\"CategoricalTicker\"}},\"id\":\"1097\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1125\",\"type\":\"UnionRenderers\"},{\"attributes\":{\"ticker\":{\"id\":\"1094\",\"type\":\"CategoricalTicker\"}},\"id\":\"1096\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"1091\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1122\",\"type\":\"CategoricalTickFormatter\"},\"major_label_orientation\":0.7853981633974483,\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1094\",\"type\":\"CategoricalTicker\"}},\"id\":\"1093\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1089\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"callback\":null,\"factors\":[\"\\u0623\\u0633\\u062a\\u0645\\u062a\\u0639 \\u0628\\u0627\\u0644\\u0645\\u0634\\u064a \\u0644\\u0645\\u0633\\u0627\\u0641\\u0627\\u062a \\u0637\\u0648\\u064a\\u0644\\u0629 \\u0639\\u0644\\u0649 \\u0637\\u0648\\u0644 \\u0627\\u0644\\u0634\\u0627\\u0637\\u0626 \\u0645\\u0639 \\u0643\\u0644\\u0628\\u064a.\",\"\\u0627\\u0644\\u062c\\u0631\\u0627\\u0621 \\u0644\\u0637\\u064a\\u0641\\u0629.\",\"\\u0643\\u0644\\u0628\"]},\"id\":\"1087\",\"type\":\"FactorRange\"},{\"attributes\":{\"callback\":null,\"factors\":[\"dog\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\"]},\"id\":\"1085\",\"type\":\"FactorRange\"},{\"attributes\":{\"fill_color\":{\"field\":\"sim\",\"transform\":{\"id\":\"1081\",\"type\":\"LinearColorMapper\"}},\"height\":{\"units\":\"data\",\"value\":1},\"line_color\":{\"value\":null},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1108\",\"type\":\"Rect\"},{\"attributes\":{\"callback\":null,\"data\":{\"embeddings_1\":[\"dog\",\"dog\",\"dog\",\"Puppies are nice.\",\"Puppies are nice.\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\"],\"embeddings_2\":[\"\\u0643\\u0644\\u0628\",\"\\u0627\\u0644\\u062c\\u0631\\u0627\\u0621 \\u0644\\u0637\\u064a\\u0641\\u0629.\",\"\\u0623\\u0633\\u062a\\u0645\\u062a\\u0639 \\u0628\\u0627\\u0644\\u0645\\u0634\\u064a \\u0644\\u0645\\u0633\\u0627\\u0641\\u0627\\u062a \\u0637\\u0648\\u064a\\u0644\\u0629 \\u0639\\u0644\\u0649 \\u0637\\u0648\\u0644 \\u0627\\u0644\\u0634\\u0627\\u0637\\u0626 \\u0645\\u0639 \\u0643\\u0644\\u0628\\u064a.\",\"\\u0643\\u0644\\u0628\",\"\\u0627\\u0644\\u062c\\u0631\\u0627\\u0621 \\u0644\\u0637\\u064a\\u0641\\u0629.\",\"\\u0623\\u0633\\u062a\\u0645\\u062a\\u0639 \\u0628\\u0627\\u0644\\u0645\\u0634\\u064a \\u0644\\u0645\\u0633\\u0627\\u0641\\u0627\\u062a \\u0637\\u0648\\u064a\\u0644\\u0629 \\u0639\\u0644\\u0649 \\u0637\\u0648\\u0644 \\u0627\\u0644\\u0634\\u0627\\u0637\\u0626 \\u0645\\u0639 \\u0643\\u0644\\u0628\\u064a.\",\"\\u0643\\u0644\\u0628\",\"\\u0627\\u0644\\u062c\\u0631\\u0627\\u0621 \\u0644\\u0637\\u064a\\u0641\\u0629.\",\"\\u0623\\u0633\\u062a\\u0645\\u062a\\u0639 \\u0628\\u0627\\u0644\\u0645\\u0634\\u064a \\u0644\\u0645\\u0633\\u0627\\u0641\\u0627\\u062a \\u0637\\u0648\\u064a\\u0644\\u0629 \\u0639\\u0644\\u0649 \\u0637\\u0648\\u0644 \\u0627\\u0644\\u0634\\u0627\\u0637\\u0626 \\u0645\\u0639 \\u0643\\u0644\\u0628\\u064a.\"],\"index\":[0,1,2,3,4,5,6,7,8],\"sim\":{\"__ndarray__\":\"AAAAQLvE6j8AAABAlJHkPwAAAAAOj+A/AAAAwOqU5D8AAACA9aTnPwAAAMACcuA/AAAAAFtl4j8AAACAFvjiPwAAAIB6QOg/\",\"dtype\":\"float64\",\"shape\":[9]}},\"selected\":{\"id\":\"1124\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1125\",\"type\":\"UnionRenderers\"}},\"id\":\"1106\",\"type\":\"ColumnDataSource\"},{\"attributes\":{},\"id\":\"1101\",\"type\":\"SaveTool\"},{\"attributes\":{\"text\":\"English-Arabic Similarity\",\"text_font_size\":{\"value\":\"12pt\"}},\"id\":\"1083\",\"type\":\"Title\"},{\"attributes\":{\"high\":0.8365150690078735,\"low\":0.5139173269271851,\"palette\":[\"#ffffcc\",\"#ffeda0\",\"#fed976\",\"#feb24c\",\"#fd8d3c\",\"#fc4e2a\",\"#e31a1c\",\"#bd0026\",\"#800026\"]},\"id\":\"1081\",\"type\":\"LinearColorMapper\"}],\"root_ids\":[\"1082\"]},\"title\":\"Bokeh Application\",\"version\":\"1.4.0\"}};\n",
+              "  var render_items = [{\"docid\":\"a8ebb685-fc25-4a41-b1f1-80df179ccab5\",\"roots\":{\"1082\":\"d20b027b-6bf0-444d-8763-df8d299e1642\"}}];\n",
+              "  root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n",
+              "\n",
+              "  }\n",
+              "  if (root.Bokeh !== undefined) {\n",
+              "    embed_document(root);\n",
+              "  } else {\n",
+              "    var attempts = 0;\n",
+              "    var timer = setInterval(function(root) {\n",
+              "      if (root.Bokeh !== undefined) {\n",
+              "        clearInterval(timer);\n",
+              "        embed_document(root);\n",
+              "      } else {\n",
+              "        attempts++;\n",
+              "        if (attempts > 100) {\n",
+              "          clearInterval(timer);\n",
+              "          console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n",
+              "        }\n",
+              "      }\n",
+              "    }, 10, root)\n",
+              "  }\n",
+              "})(window);"
+            ],
+            "application/vnd.bokehjs_exec.v0+json": ""
+          },
+          "metadata": {
+            "application/vnd.bokehjs_exec.v0+json": {
+              "id": "1082"
+            },
+            "tags": []
+          },
+          "output_type": "display_data"
+        }
+      ],
+      "source": [
+        "visualize_similarity(en_result, ar_result, english_sentences, arabic_sentences, 'English-Arabic Similarity')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "QF9z48HMp4WL"
+      },
+      "source": [
+        "### Engish-Russian Similarity"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "QE68UejYp86z"
+      },
+      "outputs": [
+        {
+          "data": {
+            "application/javascript": [
+              "\n",
+              "(function(root) {\n",
+              "  function now() {\n",
+              "    return new Date();\n",
+              "  }\n",
+              "\n",
+              "  var force = true;\n",
+              "\n",
+              "  if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n",
+              "    root._bokeh_onload_callbacks = [];\n",
+              "    root._bokeh_is_loading = undefined;\n",
+              "  }\n",
+              "\n",
+              "  var JS_MIME_TYPE = 'application/javascript';\n",
+              "  var HTML_MIME_TYPE = 'text/html';\n",
+              "  var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n",
+              "  var CLASS_NAME = 'output_bokeh rendered_html';\n",
+              "\n",
+              "  /**\n",
+              "   * Render data to the DOM node\n",
+              "   */\n",
+              "  function render(props, node) {\n",
+              "    var script = document.createElement(\"script\");\n",
+              "    node.appendChild(script);\n",
+              "  }\n",
+              "\n",
+              "  /**\n",
+              "   * Handle when an output is cleared or removed\n",
+              "   */\n",
+              "  function handleClearOutput(event, handle) {\n",
+              "    var cell = handle.cell;\n",
+              "\n",
+              "    var id = cell.output_area._bokeh_element_id;\n",
+              "    var server_id = cell.output_area._bokeh_server_id;\n",
+              "    // Clean up Bokeh references\n",
+              "    if (id != null && id in Bokeh.index) {\n",
+              "      Bokeh.index[id].model.document.clear();\n",
+              "      delete Bokeh.index[id];\n",
+              "    }\n",
+              "\n",
+              "    if (server_id !== undefined) {\n",
+              "      // Clean up Bokeh references\n",
+              "      var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n",
+              "      cell.notebook.kernel.execute(cmd, {\n",
+              "        iopub: {\n",
+              "          output: function(msg) {\n",
+              "            var id = msg.content.text.trim();\n",
+              "            if (id in Bokeh.index) {\n",
+              "              Bokeh.index[id].model.document.clear();\n",
+              "              delete Bokeh.index[id];\n",
+              "            }\n",
+              "          }\n",
+              "        }\n",
+              "      });\n",
+              "      // Destroy server and session\n",
+              "      var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n",
+              "      cell.notebook.kernel.execute(cmd);\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "  /**\n",
+              "   * Handle when a new output is added\n",
+              "   */\n",
+              "  function handleAddOutput(event, handle) {\n",
+              "    var output_area = handle.output_area;\n",
+              "    var output = handle.output;\n",
+              "\n",
+              "    // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n",
+              "    if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n",
+              "      return\n",
+              "    }\n",
+              "\n",
+              "    var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n",
+              "\n",
+              "    if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n",
+              "      toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n",
+              "      // store reference to embed id on output_area\n",
+              "      output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n",
+              "    }\n",
+              "    if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n",
+              "      var bk_div = document.createElement(\"div\");\n",
+              "      bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n",
+              "      var script_attrs = bk_div.children[0].attributes;\n",
+              "      for (var i = 0; i < script_attrs.length; i++) {\n",
+              "        toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n",
+              "      }\n",
+              "      // store reference to server id on output_area\n",
+              "      output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "  function register_renderer(events, OutputArea) {\n",
+              "\n",
+              "    function append_mime(data, metadata, element) {\n",
+              "      // create a DOM node to render to\n",
+              "      var toinsert = this.create_output_subarea(\n",
+              "        metadata,\n",
+              "        CLASS_NAME,\n",
+              "        EXEC_MIME_TYPE\n",
+              "      );\n",
+              "      this.keyboard_manager.register_events(toinsert);\n",
+              "      // Render to node\n",
+              "      var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n",
+              "      render(props, toinsert[toinsert.length - 1]);\n",
+              "      element.append(toinsert);\n",
+              "      return toinsert\n",
+              "    }\n",
+              "\n",
+              "    /* Handle when an output is cleared or removed */\n",
+              "    events.on('clear_output.CodeCell', handleClearOutput);\n",
+              "    events.on('delete.Cell', handleClearOutput);\n",
+              "\n",
+              "    /* Handle when a new output is added */\n",
+              "    events.on('output_added.OutputArea', handleAddOutput);\n",
+              "\n",
+              "    /**\n",
+              "     * Register the mime type and append_mime function with output_area\n",
+              "     */\n",
+              "    OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n",
+              "      /* Is output safe? */\n",
+              "      safe: true,\n",
+              "      /* Index of renderer in `output_area.display_order` */\n",
+              "      index: 0\n",
+              "    });\n",
+              "  }\n",
+              "\n",
+              "  // register the mime type if in Jupyter Notebook environment and previously unregistered\n",
+              "  if (root.Jupyter !== undefined) {\n",
+              "    var events = require('base/js/events');\n",
+              "    var OutputArea = require('notebook/js/outputarea').OutputArea;\n",
+              "\n",
+              "    if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n",
+              "      register_renderer(events, OutputArea);\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "  \n",
+              "  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n",
+              "    root._bokeh_timeout = Date.now() + 5000;\n",
+              "    root._bokeh_failed_load = false;\n",
+              "  }\n",
+              "\n",
+              "  var NB_LOAD_WARNING = {'data': {'text/html':\n",
+              "     \"<div style='background-color: #fdd'>\\n\"+\n",
+              "     \"<p>\\n\"+\n",
+              "     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n",
+              "     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n",
+              "     \"</p>\\n\"+\n",
+              "     \"<ul>\\n\"+\n",
+              "     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n",
+              "     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n",
+              "     \"</ul>\\n\"+\n",
+              "     \"<code>\\n\"+\n",
+              "     \"from bokeh.resources import INLINE\\n\"+\n",
+              "     \"output_notebook(resources=INLINE)\\n\"+\n",
+              "     \"</code>\\n\"+\n",
+              "     \"</div>\"}};\n",
+              "\n",
+              "  function display_loaded() {\n",
+              "    var el = document.getElementById(null);\n",
+              "    if (el != null) {\n",
+              "      el.textContent = \"BokehJS is loading...\";\n",
+              "    }\n",
+              "    if (root.Bokeh !== undefined) {\n",
+              "      if (el != null) {\n",
+              "        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n",
+              "      }\n",
+              "    } else if (Date.now() < root._bokeh_timeout) {\n",
+              "      setTimeout(display_loaded, 100)\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "\n",
+              "  function run_callbacks() {\n",
+              "    try {\n",
+              "      root._bokeh_onload_callbacks.forEach(function(callback) {\n",
+              "        if (callback != null)\n",
+              "          callback();\n",
+              "      });\n",
+              "    } finally {\n",
+              "      delete root._bokeh_onload_callbacks\n",
+              "    }\n",
+              "    console.debug(\"Bokeh: all callbacks have finished\");\n",
+              "  }\n",
+              "\n",
+              "  function load_libs(css_urls, js_urls, callback) {\n",
+              "    if (css_urls == null) css_urls = [];\n",
+              "    if (js_urls == null) js_urls = [];\n",
+              "\n",
+              "    root._bokeh_onload_callbacks.push(callback);\n",
+              "    if (root._bokeh_is_loading > 0) {\n",
+              "      console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n",
+              "      return null;\n",
+              "    }\n",
+              "    if (js_urls == null || js_urls.length === 0) {\n",
+              "      run_callbacks();\n",
+              "      return null;\n",
+              "    }\n",
+              "    console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n",
+              "    root._bokeh_is_loading = css_urls.length + js_urls.length;\n",
+              "\n",
+              "    function on_load() {\n",
+              "      root._bokeh_is_loading--;\n",
+              "      if (root._bokeh_is_loading === 0) {\n",
+              "        console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n",
+              "        run_callbacks()\n",
+              "      }\n",
+              "    }\n",
+              "\n",
+              "    function on_error() {\n",
+              "      console.error(\"failed to load \" + url);\n",
+              "    }\n",
+              "\n",
+              "    for (var i = 0; i < css_urls.length; i++) {\n",
+              "      var url = css_urls[i];\n",
+              "      const element = document.createElement(\"link\");\n",
+              "      element.onload = on_load;\n",
+              "      element.onerror = on_error;\n",
+              "      element.rel = \"stylesheet\";\n",
+              "      element.type = \"text/css\";\n",
+              "      element.href = url;\n",
+              "      console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n",
+              "      document.body.appendChild(element);\n",
+              "    }\n",
+              "\n",
+              "    for (var i = 0; i < js_urls.length; i++) {\n",
+              "      var url = js_urls[i];\n",
+              "      var element = document.createElement('script');\n",
+              "      element.onload = on_load;\n",
+              "      element.onerror = on_error;\n",
+              "      element.async = false;\n",
+              "      element.src = url;\n",
+              "      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
+              "      document.head.appendChild(element);\n",
+              "    }\n",
+              "  };\n",
+              "\n",
+              "  function inject_raw_css(css) {\n",
+              "    const element = document.createElement(\"style\");\n",
+              "    element.appendChild(document.createTextNode(css));\n",
+              "    document.body.appendChild(element);\n",
+              "  }\n",
+              "\n",
+              "  \n",
+              "  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n",
+              "  var css_urls = [];\n",
+              "  \n",
+              "\n",
+              "  var inline_js = [\n",
+              "    function(Bokeh) {\n",
+              "      Bokeh.set_log_level(\"info\");\n",
+              "    },\n",
+              "    function(Bokeh) {\n",
+              "    \n",
+              "    \n",
+              "    }\n",
+              "  ];\n",
+              "\n",
+              "  function run_inline_js() {\n",
+              "    \n",
+              "    if (root.Bokeh !== undefined || force === true) {\n",
+              "      \n",
+              "    for (var i = 0; i < inline_js.length; i++) {\n",
+              "      inline_js[i].call(root, root.Bokeh);\n",
+              "    }\n",
+              "    } else if (Date.now() < root._bokeh_timeout) {\n",
+              "      setTimeout(run_inline_js, 100);\n",
+              "    } else if (!root._bokeh_failed_load) {\n",
+              "      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n",
+              "      root._bokeh_failed_load = true;\n",
+              "    } else if (force !== true) {\n",
+              "      var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n",
+              "      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n",
+              "    }\n",
+              "\n",
+              "  }\n",
+              "\n",
+              "  if (root._bokeh_is_loading === 0) {\n",
+              "    console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n",
+              "    run_inline_js();\n",
+              "  } else {\n",
+              "    load_libs(css_urls, js_urls, function() {\n",
+              "      console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n",
+              "      run_inline_js();\n",
+              "    });\n",
+              "  }\n",
+              "}(window));"
+            ],
+            "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n  function now() {\n    return new Date();\n  }\n\n  var force = true;\n\n  if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n    root._bokeh_onload_callbacks = [];\n    root._bokeh_is_loading = undefined;\n  }\n\n  \n\n  \n  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n    root._bokeh_timeout = Date.now() + 5000;\n    root._bokeh_failed_load = false;\n  }\n\n  var NB_LOAD_WARNING = {'data': {'text/html':\n     \"<div style='background-color: #fdd'>\\n\"+\n     \"<p>\\n\"+\n     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n     \"</p>\\n\"+\n     \"<ul>\\n\"+\n     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n     \"</ul>\\n\"+\n     \"<code>\\n\"+\n     \"from bokeh.resources import INLINE\\n\"+\n     \"output_notebook(resources=INLINE)\\n\"+\n     \"</code>\\n\"+\n     \"</div>\"}};\n\n  function display_loaded() {\n    var el = document.getElementById(null);\n    if (el != null) {\n      el.textContent = \"BokehJS is loading...\";\n    }\n    if (root.Bokeh !== undefined) {\n      if (el != null) {\n        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n      }\n    } else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(display_loaded, 100)\n    }\n  }\n\n\n  function run_callbacks() {\n    try {\n      root._bokeh_onload_callbacks.forEach(function(callback) {\n        if (callback != null)\n          callback();\n      });\n    } finally {\n      delete root._bokeh_onload_callbacks\n    }\n    console.debug(\"Bokeh: all callbacks have finished\");\n  }\n\n  function load_libs(css_urls, js_urls, callback) {\n    if (css_urls == null) css_urls = [];\n    if (js_urls == null) js_urls = [];\n\n    root._bokeh_onload_callbacks.push(callback);\n    if (root._bokeh_is_loading > 0) {\n      console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n      return null;\n    }\n    if (js_urls == null || js_urls.length === 0) {\n      run_callbacks();\n      return null;\n    }\n    console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n    root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n    function on_load() {\n      root._bokeh_is_loading--;\n      if (root._bokeh_is_loading === 0) {\n        console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n        run_callbacks()\n      }\n    }\n\n    function on_error() {\n      console.error(\"failed to load \" + url);\n    }\n\n    for (var i = 0; i < css_urls.length; i++) {\n      var url = css_urls[i];\n      const element = document.createElement(\"link\");\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.rel = \"stylesheet\";\n      element.type = \"text/css\";\n      element.href = url;\n      console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n      document.body.appendChild(element);\n    }\n\n    for (var i = 0; i < js_urls.length; i++) {\n      var url = js_urls[i];\n      var element = document.createElement('script');\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.async = false;\n      element.src = url;\n      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n      document.head.appendChild(element);\n    }\n  };\n\n  function inject_raw_css(css) {\n    const element = document.createElement(\"style\");\n    element.appendChild(document.createTextNode(css));\n    document.body.appendChild(element);\n  }\n\n  \n  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n  var css_urls = [];\n  \n\n  var inline_js = [\n    function(Bokeh) {\n      Bokeh.set_log_level(\"info\");\n    },\n    function(Bokeh) {\n    \n    \n    }\n  ];\n\n  function run_inline_js() {\n    \n    if (root.Bokeh !== undefined || force === true) {\n      \n    for (var i = 0; i < inline_js.length; i++) {\n      inline_js[i].call(root, root.Bokeh);\n    }\n    } else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(run_inline_js, 100);\n    } else if (!root._bokeh_failed_load) {\n      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n      root._bokeh_failed_load = true;\n    } else if (force !== true) {\n      var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n    }\n\n  }\n\n  if (root._bokeh_is_loading === 0) {\n    console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n    run_inline_js();\n  } else {\n    load_libs(css_urls, js_urls, function() {\n      console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n      run_inline_js();\n    });\n  }\n}(window));"
+          },
+          "metadata": {
+            "tags": []
+          },
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "\n",
+              "\n",
+              "\n",
+              "\n",
+              "\n",
+              "\n",
+              "  <div class=\"bk-root\" id=\"02b6f17f-c980-4d2c-b9d7-58e2d001b1bf\" data-root-id=\"1169\"></div>\n"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/javascript": [
+              "(function(root) {\n",
+              "  function embed_document(root) {\n",
+              "    \n",
+              "  var docs_json = {\"c09fc8f6-d5fa-4ba4-ae4a-0ce276d613ba\":{\"roots\":{\"references\":[{\"attributes\":{\"above\":[{\"id\":\"1180\",\"type\":\"CategoricalAxis\"}],\"center\":[{\"id\":\"1183\",\"type\":\"Grid\"},{\"id\":\"1187\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1184\",\"type\":\"CategoricalAxis\"}],\"min_border_right\":300,\"plot_width\":1200,\"renderers\":[{\"id\":\"1197\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1170\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1190\",\"type\":\"Toolbar\"},\"toolbar_location\":\"below\",\"x_range\":{\"id\":\"1172\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"1176\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"1174\",\"type\":\"FactorRange\"},\"y_scale\":{\"id\":\"1178\",\"type\":\"CategoricalScale\"}},\"id\":\"1169\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{\"callback\":null,\"tooltips\":[[\"pair\",\"@embeddings_1 ||| @embeddings_2\"],[\"sim\",\"@sim\"]]},\"id\":\"1189\",\"type\":\"HoverTool\"},{\"attributes\":{},\"id\":\"1218\",\"type\":\"Selection\"},{\"attributes\":{},\"id\":\"1216\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{},\"id\":\"1219\",\"type\":\"UnionRenderers\"},{\"attributes\":{\"ticker\":{\"id\":\"1181\",\"type\":\"CategoricalTicker\"}},\"id\":\"1183\",\"type\":\"Grid\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1216\",\"type\":\"CategoricalTickFormatter\"},\"major_label_orientation\":0.7853981633974483,\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1181\",\"type\":\"CategoricalTicker\"}},\"id\":\"1180\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1178\",\"type\":\"CategoricalScale\"},{\"attributes\":{},\"id\":\"1176\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"callback\":null,\"factors\":[\"\\u041c\\u043d\\u0435 \\u043d\\u0440\\u0430\\u0432\\u0438\\u0442\\u0441\\u044f \\u043f\\u043e\\u0434\\u043e\\u043b\\u0433\\u0443 \\u0433\\u0443\\u043b\\u044f\\u0442\\u044c \\u043f\\u043e \\u043f\\u043b\\u044f\\u0436\\u0443 \\u0441\\u043e \\u0441\\u0432\\u043e\\u0435\\u0439 \\u0441\\u043e\\u0431\\u0430\\u043a\\u043e\\u0439.\",\"\\u041c\\u0438\\u043b\\u044b\\u0435 \\u0449\\u0435\\u043d\\u043a\\u0438.\",\"\\u0441\\u043e\\u0431\\u0430\\u043a\\u0430\"]},\"id\":\"1174\",\"type\":\"FactorRange\"},{\"attributes\":{},\"id\":\"1185\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1214\",\"type\":\"CategoricalTickFormatter\"},\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1185\",\"type\":\"CategoricalTicker\"}},\"id\":\"1184\",\"type\":\"CategoricalAxis\"},{\"attributes\":{\"fill_color\":{\"field\":\"sim\",\"transform\":{\"id\":\"1168\",\"type\":\"LinearColorMapper\"}},\"height\":{\"units\":\"data\",\"value\":1},\"line_color\":{\"value\":null},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1195\",\"type\":\"Rect\"},{\"attributes\":{\"high\":0.8845847249031067,\"low\":0.5800867676734924,\"palette\":[\"#ffffcc\",\"#ffeda0\",\"#fed976\",\"#feb24c\",\"#fd8d3c\",\"#fc4e2a\",\"#e31a1c\",\"#bd0026\",\"#800026\"]},\"id\":\"1168\",\"type\":\"LinearColorMapper\"},{\"attributes\":{},\"id\":\"1181\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"data_source\":{\"id\":\"1193\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1195\",\"type\":\"Rect\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1196\",\"type\":\"Rect\"},\"selection_glyph\":null,\"view\":{\"id\":\"1198\",\"type\":\"CDSView\"}},\"id\":\"1197\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1188\",\"type\":\"SaveTool\"},{\"id\":\"1189\",\"type\":\"HoverTool\"}]},\"id\":\"1190\",\"type\":\"Toolbar\"},{\"attributes\":{\"callback\":null,\"factors\":[\"dog\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\"]},\"id\":\"1172\",\"type\":\"FactorRange\"},{\"attributes\":{\"text\":\"English-Russian Similarity\",\"text_font_size\":{\"value\":\"12pt\"}},\"id\":\"1170\",\"type\":\"Title\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1185\",\"type\":\"CategoricalTicker\"}},\"id\":\"1187\",\"type\":\"Grid\"},{\"attributes\":{\"callback\":null,\"data\":{\"embeddings_1\":[\"dog\",\"dog\",\"dog\",\"Puppies are nice.\",\"Puppies are nice.\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\"],\"embeddings_2\":[\"\\u0441\\u043e\\u0431\\u0430\\u043a\\u0430\",\"\\u041c\\u0438\\u043b\\u044b\\u0435 \\u0449\\u0435\\u043d\\u043a\\u0438.\",\"\\u041c\\u043d\\u0435 \\u043d\\u0440\\u0430\\u0432\\u0438\\u0442\\u0441\\u044f \\u043f\\u043e\\u0434\\u043e\\u043b\\u0433\\u0443 \\u0433\\u0443\\u043b\\u044f\\u0442\\u044c \\u043f\\u043e \\u043f\\u043b\\u044f\\u0436\\u0443 \\u0441\\u043e \\u0441\\u0432\\u043e\\u0435\\u0439 \\u0441\\u043e\\u0431\\u0430\\u043a\\u043e\\u0439.\",\"\\u0441\\u043e\\u0431\\u0430\\u043a\\u0430\",\"\\u041c\\u0438\\u043b\\u044b\\u0435 \\u0449\\u0435\\u043d\\u043a\\u0438.\",\"\\u041c\\u043d\\u0435 \\u043d\\u0440\\u0430\\u0432\\u0438\\u0442\\u0441\\u044f \\u043f\\u043e\\u0434\\u043e\\u043b\\u0433\\u0443 \\u0433\\u0443\\u043b\\u044f\\u0442\\u044c \\u043f\\u043e \\u043f\\u043b\\u044f\\u0436\\u0443 \\u0441\\u043e \\u0441\\u0432\\u043e\\u0435\\u0439 \\u0441\\u043e\\u0431\\u0430\\u043a\\u043e\\u0439.\",\"\\u0441\\u043e\\u0431\\u0430\\u043a\\u0430\",\"\\u041c\\u0438\\u043b\\u044b\\u0435 \\u0449\\u0435\\u043d\\u043a\\u0438.\",\"\\u041c\\u043d\\u0435 \\u043d\\u0440\\u0430\\u0432\\u0438\\u0442\\u0441\\u044f \\u043f\\u043e\\u0434\\u043e\\u043b\\u0433\\u0443 \\u0433\\u0443\\u043b\\u044f\\u0442\\u044c \\u043f\\u043e \\u043f\\u043b\\u044f\\u0436\\u0443 \\u0441\\u043e \\u0441\\u0432\\u043e\\u0435\\u0439 \\u0441\\u043e\\u0431\\u0430\\u043a\\u043e\\u0439.\"],\"index\":[0,1,2,3,4,5,6,7,8],\"sim\":{\"__ndarray__\":\"AAAAoIRO7D8AAABgiPvkPwAAAMCko+M/AAAAgDBm5D8AAADAi1DoPwAAAKCr8eI/AAAAIBKQ4j8AAABARq3iPwAAAMAlV+g/\",\"dtype\":\"float64\",\"shape\":[9]}},\"selected\":{\"id\":\"1218\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1219\",\"type\":\"UnionRenderers\"}},\"id\":\"1193\",\"type\":\"ColumnDataSource\"},{\"attributes\":{},\"id\":\"1214\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"height\":{\"units\":\"data\",\"value\":1},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1196\",\"type\":\"Rect\"},{\"attributes\":{\"source\":{\"id\":\"1193\",\"type\":\"ColumnDataSource\"}},\"id\":\"1198\",\"type\":\"CDSView\"},{\"attributes\":{},\"id\":\"1188\",\"type\":\"SaveTool\"}],\"root_ids\":[\"1169\"]},\"title\":\"Bokeh Application\",\"version\":\"1.4.0\"}};\n",
+              "  var render_items = [{\"docid\":\"c09fc8f6-d5fa-4ba4-ae4a-0ce276d613ba\",\"roots\":{\"1169\":\"02b6f17f-c980-4d2c-b9d7-58e2d001b1bf\"}}];\n",
+              "  root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n",
+              "\n",
+              "  }\n",
+              "  if (root.Bokeh !== undefined) {\n",
+              "    embed_document(root);\n",
+              "  } else {\n",
+              "    var attempts = 0;\n",
+              "    var timer = setInterval(function(root) {\n",
+              "      if (root.Bokeh !== undefined) {\n",
+              "        clearInterval(timer);\n",
+              "        embed_document(root);\n",
+              "      } else {\n",
+              "        attempts++;\n",
+              "        if (attempts > 100) {\n",
+              "          clearInterval(timer);\n",
+              "          console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n",
+              "        }\n",
+              "      }\n",
+              "    }, 10, root)\n",
+              "  }\n",
+              "})(window);"
+            ],
+            "application/vnd.bokehjs_exec.v0+json": ""
+          },
+          "metadata": {
+            "application/vnd.bokehjs_exec.v0+json": {
+              "id": "1169"
+            },
+            "tags": []
+          },
+          "output_type": "display_data"
+        }
+      ],
+      "source": [
+        "visualize_similarity(en_result, ru_result, english_sentences, russian_sentences, 'English-Russian Similarity')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "BJkL6Az0QXNN"
+      },
+      "source": [
+        "### English-Spanish Similarity"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "CH_BXVGhQ0GL"
+      },
+      "outputs": [
+        {
+          "data": {
+            "application/javascript": [
+              "\n",
+              "(function(root) {\n",
+              "  function now() {\n",
+              "    return new Date();\n",
+              "  }\n",
+              "\n",
+              "  var force = true;\n",
+              "\n",
+              "  if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n",
+              "    root._bokeh_onload_callbacks = [];\n",
+              "    root._bokeh_is_loading = undefined;\n",
+              "  }\n",
+              "\n",
+              "  var JS_MIME_TYPE = 'application/javascript';\n",
+              "  var HTML_MIME_TYPE = 'text/html';\n",
+              "  var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n",
+              "  var CLASS_NAME = 'output_bokeh rendered_html';\n",
+              "\n",
+              "  /**\n",
+              "   * Render data to the DOM node\n",
+              "   */\n",
+              "  function render(props, node) {\n",
+              "    var script = document.createElement(\"script\");\n",
+              "    node.appendChild(script);\n",
+              "  }\n",
+              "\n",
+              "  /**\n",
+              "   * Handle when an output is cleared or removed\n",
+              "   */\n",
+              "  function handleClearOutput(event, handle) {\n",
+              "    var cell = handle.cell;\n",
+              "\n",
+              "    var id = cell.output_area._bokeh_element_id;\n",
+              "    var server_id = cell.output_area._bokeh_server_id;\n",
+              "    // Clean up Bokeh references\n",
+              "    if (id != null && id in Bokeh.index) {\n",
+              "      Bokeh.index[id].model.document.clear();\n",
+              "      delete Bokeh.index[id];\n",
+              "    }\n",
+              "\n",
+              "    if (server_id !== undefined) {\n",
+              "      // Clean up Bokeh references\n",
+              "      var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n",
+              "      cell.notebook.kernel.execute(cmd, {\n",
+              "        iopub: {\n",
+              "          output: function(msg) {\n",
+              "            var id = msg.content.text.trim();\n",
+              "            if (id in Bokeh.index) {\n",
+              "              Bokeh.index[id].model.document.clear();\n",
+              "              delete Bokeh.index[id];\n",
+              "            }\n",
+              "          }\n",
+              "        }\n",
+              "      });\n",
+              "      // Destroy server and session\n",
+              "      var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n",
+              "      cell.notebook.kernel.execute(cmd);\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "  /**\n",
+              "   * Handle when a new output is added\n",
+              "   */\n",
+              "  function handleAddOutput(event, handle) {\n",
+              "    var output_area = handle.output_area;\n",
+              "    var output = handle.output;\n",
+              "\n",
+              "    // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n",
+              "    if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n",
+              "      return\n",
+              "    }\n",
+              "\n",
+              "    var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n",
+              "\n",
+              "    if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n",
+              "      toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n",
+              "      // store reference to embed id on output_area\n",
+              "      output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n",
+              "    }\n",
+              "    if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n",
+              "      var bk_div = document.createElement(\"div\");\n",
+              "      bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n",
+              "      var script_attrs = bk_div.children[0].attributes;\n",
+              "      for (var i = 0; i < script_attrs.length; i++) {\n",
+              "        toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n",
+              "      }\n",
+              "      // store reference to server id on output_area\n",
+              "      output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "  function register_renderer(events, OutputArea) {\n",
+              "\n",
+              "    function append_mime(data, metadata, element) {\n",
+              "      // create a DOM node to render to\n",
+              "      var toinsert = this.create_output_subarea(\n",
+              "        metadata,\n",
+              "        CLASS_NAME,\n",
+              "        EXEC_MIME_TYPE\n",
+              "      );\n",
+              "      this.keyboard_manager.register_events(toinsert);\n",
+              "      // Render to node\n",
+              "      var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n",
+              "      render(props, toinsert[toinsert.length - 1]);\n",
+              "      element.append(toinsert);\n",
+              "      return toinsert\n",
+              "    }\n",
+              "\n",
+              "    /* Handle when an output is cleared or removed */\n",
+              "    events.on('clear_output.CodeCell', handleClearOutput);\n",
+              "    events.on('delete.Cell', handleClearOutput);\n",
+              "\n",
+              "    /* Handle when a new output is added */\n",
+              "    events.on('output_added.OutputArea', handleAddOutput);\n",
+              "\n",
+              "    /**\n",
+              "     * Register the mime type and append_mime function with output_area\n",
+              "     */\n",
+              "    OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n",
+              "      /* Is output safe? */\n",
+              "      safe: true,\n",
+              "      /* Index of renderer in `output_area.display_order` */\n",
+              "      index: 0\n",
+              "    });\n",
+              "  }\n",
+              "\n",
+              "  // register the mime type if in Jupyter Notebook environment and previously unregistered\n",
+              "  if (root.Jupyter !== undefined) {\n",
+              "    var events = require('base/js/events');\n",
+              "    var OutputArea = require('notebook/js/outputarea').OutputArea;\n",
+              "\n",
+              "    if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n",
+              "      register_renderer(events, OutputArea);\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "  \n",
+              "  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n",
+              "    root._bokeh_timeout = Date.now() + 5000;\n",
+              "    root._bokeh_failed_load = false;\n",
+              "  }\n",
+              "\n",
+              "  var NB_LOAD_WARNING = {'data': {'text/html':\n",
+              "     \"<div style='background-color: #fdd'>\\n\"+\n",
+              "     \"<p>\\n\"+\n",
+              "     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n",
+              "     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n",
+              "     \"</p>\\n\"+\n",
+              "     \"<ul>\\n\"+\n",
+              "     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n",
+              "     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n",
+              "     \"</ul>\\n\"+\n",
+              "     \"<code>\\n\"+\n",
+              "     \"from bokeh.resources import INLINE\\n\"+\n",
+              "     \"output_notebook(resources=INLINE)\\n\"+\n",
+              "     \"</code>\\n\"+\n",
+              "     \"</div>\"}};\n",
+              "\n",
+              "  function display_loaded() {\n",
+              "    var el = document.getElementById(null);\n",
+              "    if (el != null) {\n",
+              "      el.textContent = \"BokehJS is loading...\";\n",
+              "    }\n",
+              "    if (root.Bokeh !== undefined) {\n",
+              "      if (el != null) {\n",
+              "        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n",
+              "      }\n",
+              "    } else if (Date.now() < root._bokeh_timeout) {\n",
+              "      setTimeout(display_loaded, 100)\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "\n",
+              "  function run_callbacks() {\n",
+              "    try {\n",
+              "      root._bokeh_onload_callbacks.forEach(function(callback) {\n",
+              "        if (callback != null)\n",
+              "          callback();\n",
+              "      });\n",
+              "    } finally {\n",
+              "      delete root._bokeh_onload_callbacks\n",
+              "    }\n",
+              "    console.debug(\"Bokeh: all callbacks have finished\");\n",
+              "  }\n",
+              "\n",
+              "  function load_libs(css_urls, js_urls, callback) {\n",
+              "    if (css_urls == null) css_urls = [];\n",
+              "    if (js_urls == null) js_urls = [];\n",
+              "\n",
+              "    root._bokeh_onload_callbacks.push(callback);\n",
+              "    if (root._bokeh_is_loading > 0) {\n",
+              "      console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n",
+              "      return null;\n",
+              "    }\n",
+              "    if (js_urls == null || js_urls.length === 0) {\n",
+              "      run_callbacks();\n",
+              "      return null;\n",
+              "    }\n",
+              "    console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n",
+              "    root._bokeh_is_loading = css_urls.length + js_urls.length;\n",
+              "\n",
+              "    function on_load() {\n",
+              "      root._bokeh_is_loading--;\n",
+              "      if (root._bokeh_is_loading === 0) {\n",
+              "        console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n",
+              "        run_callbacks()\n",
+              "      }\n",
+              "    }\n",
+              "\n",
+              "    function on_error() {\n",
+              "      console.error(\"failed to load \" + url);\n",
+              "    }\n",
+              "\n",
+              "    for (var i = 0; i < css_urls.length; i++) {\n",
+              "      var url = css_urls[i];\n",
+              "      const element = document.createElement(\"link\");\n",
+              "      element.onload = on_load;\n",
+              "      element.onerror = on_error;\n",
+              "      element.rel = \"stylesheet\";\n",
+              "      element.type = \"text/css\";\n",
+              "      element.href = url;\n",
+              "      console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n",
+              "      document.body.appendChild(element);\n",
+              "    }\n",
+              "\n",
+              "    for (var i = 0; i < js_urls.length; i++) {\n",
+              "      var url = js_urls[i];\n",
+              "      var element = document.createElement('script');\n",
+              "      element.onload = on_load;\n",
+              "      element.onerror = on_error;\n",
+              "      element.async = false;\n",
+              "      element.src = url;\n",
+              "      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
+              "      document.head.appendChild(element);\n",
+              "    }\n",
+              "  };\n",
+              "\n",
+              "  function inject_raw_css(css) {\n",
+              "    const element = document.createElement(\"style\");\n",
+              "    element.appendChild(document.createTextNode(css));\n",
+              "    document.body.appendChild(element);\n",
+              "  }\n",
+              "\n",
+              "  \n",
+              "  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n",
+              "  var css_urls = [];\n",
+              "  \n",
+              "\n",
+              "  var inline_js = [\n",
+              "    function(Bokeh) {\n",
+              "      Bokeh.set_log_level(\"info\");\n",
+              "    },\n",
+              "    function(Bokeh) {\n",
+              "    \n",
+              "    \n",
+              "    }\n",
+              "  ];\n",
+              "\n",
+              "  function run_inline_js() {\n",
+              "    \n",
+              "    if (root.Bokeh !== undefined || force === true) {\n",
+              "      \n",
+              "    for (var i = 0; i < inline_js.length; i++) {\n",
+              "      inline_js[i].call(root, root.Bokeh);\n",
+              "    }\n",
+              "    } else if (Date.now() < root._bokeh_timeout) {\n",
+              "      setTimeout(run_inline_js, 100);\n",
+              "    } else if (!root._bokeh_failed_load) {\n",
+              "      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n",
+              "      root._bokeh_failed_load = true;\n",
+              "    } else if (force !== true) {\n",
+              "      var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n",
+              "      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n",
+              "    }\n",
+              "\n",
+              "  }\n",
+              "\n",
+              "  if (root._bokeh_is_loading === 0) {\n",
+              "    console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n",
+              "    run_inline_js();\n",
+              "  } else {\n",
+              "    load_libs(css_urls, js_urls, function() {\n",
+              "      console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n",
+              "      run_inline_js();\n",
+              "    });\n",
+              "  }\n",
+              "}(window));"
+            ],
+            "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n  function now() {\n    return new Date();\n  }\n\n  var force = true;\n\n  if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n    root._bokeh_onload_callbacks = [];\n    root._bokeh_is_loading = undefined;\n  }\n\n  \n\n  \n  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n    root._bokeh_timeout = Date.now() + 5000;\n    root._bokeh_failed_load = false;\n  }\n\n  var NB_LOAD_WARNING = {'data': {'text/html':\n     \"<div style='background-color: #fdd'>\\n\"+\n     \"<p>\\n\"+\n     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n     \"</p>\\n\"+\n     \"<ul>\\n\"+\n     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n     \"</ul>\\n\"+\n     \"<code>\\n\"+\n     \"from bokeh.resources import INLINE\\n\"+\n     \"output_notebook(resources=INLINE)\\n\"+\n     \"</code>\\n\"+\n     \"</div>\"}};\n\n  function display_loaded() {\n    var el = document.getElementById(null);\n    if (el != null) {\n      el.textContent = \"BokehJS is loading...\";\n    }\n    if (root.Bokeh !== undefined) {\n      if (el != null) {\n        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n      }\n    } else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(display_loaded, 100)\n    }\n  }\n\n\n  function run_callbacks() {\n    try {\n      root._bokeh_onload_callbacks.forEach(function(callback) {\n        if (callback != null)\n          callback();\n      });\n    } finally {\n      delete root._bokeh_onload_callbacks\n    }\n    console.debug(\"Bokeh: all callbacks have finished\");\n  }\n\n  function load_libs(css_urls, js_urls, callback) {\n    if (css_urls == null) css_urls = [];\n    if (js_urls == null) js_urls = [];\n\n    root._bokeh_onload_callbacks.push(callback);\n    if (root._bokeh_is_loading > 0) {\n      console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n      return null;\n    }\n    if (js_urls == null || js_urls.length === 0) {\n      run_callbacks();\n      return null;\n    }\n    console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n    root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n    function on_load() {\n      root._bokeh_is_loading--;\n      if (root._bokeh_is_loading === 0) {\n        console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n        run_callbacks()\n      }\n    }\n\n    function on_error() {\n      console.error(\"failed to load \" + url);\n    }\n\n    for (var i = 0; i < css_urls.length; i++) {\n      var url = css_urls[i];\n      const element = document.createElement(\"link\");\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.rel = \"stylesheet\";\n      element.type = \"text/css\";\n      element.href = url;\n      console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n      document.body.appendChild(element);\n    }\n\n    for (var i = 0; i < js_urls.length; i++) {\n      var url = js_urls[i];\n      var element = document.createElement('script');\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.async = false;\n      element.src = url;\n      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n      document.head.appendChild(element);\n    }\n  };\n\n  function inject_raw_css(css) {\n    const element = document.createElement(\"style\");\n    element.appendChild(document.createTextNode(css));\n    document.body.appendChild(element);\n  }\n\n  \n  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n  var css_urls = [];\n  \n\n  var inline_js = [\n    function(Bokeh) {\n      Bokeh.set_log_level(\"info\");\n    },\n    function(Bokeh) {\n    \n    \n    }\n  ];\n\n  function run_inline_js() {\n    \n    if (root.Bokeh !== undefined || force === true) {\n      \n    for (var i = 0; i < inline_js.length; i++) {\n      inline_js[i].call(root, root.Bokeh);\n    }\n    } else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(run_inline_js, 100);\n    } else if (!root._bokeh_failed_load) {\n      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n      root._bokeh_failed_load = true;\n    } else if (force !== true) {\n      var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n    }\n\n  }\n\n  if (root._bokeh_is_loading === 0) {\n    console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n    run_inline_js();\n  } else {\n    load_libs(css_urls, js_urls, function() {\n      console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n      run_inline_js();\n    });\n  }\n}(window));"
+          },
+          "metadata": {
+            "tags": []
+          },
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "\n",
+              "\n",
+              "\n",
+              "\n",
+              "\n",
+              "\n",
+              "  <div class=\"bk-root\" id=\"81e993c9-fc6b-4169-8c6b-a0101097b959\" data-root-id=\"1263\"></div>\n"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/javascript": [
+              "(function(root) {\n",
+              "  function embed_document(root) {\n",
+              "    \n",
+              "  var docs_json = {\"bb848e12-e360-4876-aa19-21896caab34d\":{\"roots\":{\"references\":[{\"attributes\":{\"above\":[{\"id\":\"1274\",\"type\":\"CategoricalAxis\"}],\"center\":[{\"id\":\"1277\",\"type\":\"Grid\"},{\"id\":\"1281\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1278\",\"type\":\"CategoricalAxis\"}],\"min_border_right\":300,\"plot_width\":1200,\"renderers\":[{\"id\":\"1291\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1264\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1284\",\"type\":\"Toolbar\"},\"toolbar_location\":\"below\",\"x_range\":{\"id\":\"1266\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"1270\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"1268\",\"type\":\"FactorRange\"},\"y_scale\":{\"id\":\"1272\",\"type\":\"CategoricalScale\"}},\"id\":\"1263\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{},\"id\":\"1272\",\"type\":\"CategoricalScale\"},{\"attributes\":{},\"id\":\"1270\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"callback\":null,\"factors\":[\"Disfruto de dar largos paseos por la playa con mi perro.\",\"Los cachorros son agradables.\",\"perro\"]},\"id\":\"1268\",\"type\":\"FactorRange\"},{\"attributes\":{\"callback\":null,\"factors\":[\"dog\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\"]},\"id\":\"1266\",\"type\":\"FactorRange\"},{\"attributes\":{\"fill_color\":{\"field\":\"sim\",\"transform\":{\"id\":\"1262\",\"type\":\"LinearColorMapper\"}},\"height\":{\"units\":\"data\",\"value\":1},\"line_color\":{\"value\":null},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1289\",\"type\":\"Rect\"},{\"attributes\":{\"callback\":null,\"data\":{\"embeddings_1\":[\"dog\",\"dog\",\"dog\",\"Puppies are nice.\",\"Puppies are nice.\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\"],\"embeddings_2\":[\"perro\",\"Los cachorros son agradables.\",\"Disfruto de dar largos paseos por la playa con mi perro.\",\"perro\",\"Los cachorros son agradables.\",\"Disfruto de dar largos paseos por la playa con mi perro.\",\"perro\",\"Los cachorros son agradables.\",\"Disfruto de dar largos paseos por la playa con mi perro.\"],\"index\":[0,1,2,3,4,5,6,7,8],\"sim\":{\"__ndarray__\":\"AAAA4AIT7T8AAAAAcHfkPwAAAKAngeI/AAAAQNoA5D8AAADAvfvoPwAAAGCGFeI/AAAAgMNr4j8AAAAArbPjPwAAAGCJ1eo/\",\"dtype\":\"float64\",\"shape\":[9]}},\"selected\":{\"id\":\"1319\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1320\",\"type\":\"UnionRenderers\"}},\"id\":\"1287\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"high\":0.9085707068443298,\"low\":0.5651275515556335,\"palette\":[\"#ffffcc\",\"#ffeda0\",\"#fed976\",\"#feb24c\",\"#fd8d3c\",\"#fc4e2a\",\"#e31a1c\",\"#bd0026\",\"#800026\"]},\"id\":\"1262\",\"type\":\"LinearColorMapper\"},{\"attributes\":{\"source\":{\"id\":\"1287\",\"type\":\"ColumnDataSource\"}},\"id\":\"1292\",\"type\":\"CDSView\"},{\"attributes\":{\"text\":\"English-Spanish Similarity\",\"text_font_size\":{\"value\":\"12pt\"}},\"id\":\"1264\",\"type\":\"Title\"},{\"attributes\":{\"callback\":null,\"tooltips\":[[\"pair\",\"@embeddings_1 ||| @embeddings_2\"],[\"sim\",\"@sim\"]]},\"id\":\"1283\",\"type\":\"HoverTool\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"height\":{\"units\":\"data\",\"value\":1},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1290\",\"type\":\"Rect\"},{\"attributes\":{\"ticker\":{\"id\":\"1275\",\"type\":\"CategoricalTicker\"}},\"id\":\"1277\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"1282\",\"type\":\"SaveTool\"},{\"attributes\":{\"data_source\":{\"id\":\"1287\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1289\",\"type\":\"Rect\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1290\",\"type\":\"Rect\"},\"selection_glyph\":null,\"view\":{\"id\":\"1292\",\"type\":\"CDSView\"}},\"id\":\"1291\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1279\",\"type\":\"CategoricalTicker\"}},\"id\":\"1281\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"1315\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1282\",\"type\":\"SaveTool\"},{\"id\":\"1283\",\"type\":\"HoverTool\"}]},\"id\":\"1284\",\"type\":\"Toolbar\"},{\"attributes\":{},\"id\":\"1320\",\"type\":\"UnionRenderers\"},{\"attributes\":{},\"id\":\"1279\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1315\",\"type\":\"CategoricalTickFormatter\"},\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1279\",\"type\":\"CategoricalTicker\"}},\"id\":\"1278\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1319\",\"type\":\"Selection\"},{\"attributes\":{},\"id\":\"1317\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1317\",\"type\":\"CategoricalTickFormatter\"},\"major_label_orientation\":0.7853981633974483,\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1275\",\"type\":\"CategoricalTicker\"}},\"id\":\"1274\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1275\",\"type\":\"CategoricalTicker\"}],\"root_ids\":[\"1263\"]},\"title\":\"Bokeh Application\",\"version\":\"1.4.0\"}};\n",
+              "  var render_items = [{\"docid\":\"bb848e12-e360-4876-aa19-21896caab34d\",\"roots\":{\"1263\":\"81e993c9-fc6b-4169-8c6b-a0101097b959\"}}];\n",
+              "  root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n",
+              "\n",
+              "  }\n",
+              "  if (root.Bokeh !== undefined) {\n",
+              "    embed_document(root);\n",
+              "  } else {\n",
+              "    var attempts = 0;\n",
+              "    var timer = setInterval(function(root) {\n",
+              "      if (root.Bokeh !== undefined) {\n",
+              "        clearInterval(timer);\n",
+              "        embed_document(root);\n",
+              "      } else {\n",
+              "        attempts++;\n",
+              "        if (attempts > 100) {\n",
+              "          clearInterval(timer);\n",
+              "          console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n",
+              "        }\n",
+              "      }\n",
+              "    }, 10, root)\n",
+              "  }\n",
+              "})(window);"
+            ],
+            "application/vnd.bokehjs_exec.v0+json": ""
+          },
+          "metadata": {
+            "application/vnd.bokehjs_exec.v0+json": {
+              "id": "1263"
+            },
+            "tags": []
+          },
+          "output_type": "display_data"
+        }
+      ],
+      "source": [
+        "visualize_similarity(en_result, es_result, english_sentences, spanish_sentences, 'English-Spanish Similarity')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "imn28LCiQO7d"
+      },
+      "source": [
+        "### English-Italian Similarity"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "X9uD3DirPIGd"
+      },
+      "outputs": [
+        {
+          "data": {
+            "application/javascript": [
+              "\n",
+              "(function(root) {\n",
+              "  function now() {\n",
+              "    return new Date();\n",
+              "  }\n",
+              "\n",
+              "  var force = true;\n",
+              "\n",
+              "  if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n",
+              "    root._bokeh_onload_callbacks = [];\n",
+              "    root._bokeh_is_loading = undefined;\n",
+              "  }\n",
+              "\n",
+              "  var JS_MIME_TYPE = 'application/javascript';\n",
+              "  var HTML_MIME_TYPE = 'text/html';\n",
+              "  var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n",
+              "  var CLASS_NAME = 'output_bokeh rendered_html';\n",
+              "\n",
+              "  /**\n",
+              "   * Render data to the DOM node\n",
+              "   */\n",
+              "  function render(props, node) {\n",
+              "    var script = document.createElement(\"script\");\n",
+              "    node.appendChild(script);\n",
+              "  }\n",
+              "\n",
+              "  /**\n",
+              "   * Handle when an output is cleared or removed\n",
+              "   */\n",
+              "  function handleClearOutput(event, handle) {\n",
+              "    var cell = handle.cell;\n",
+              "\n",
+              "    var id = cell.output_area._bokeh_element_id;\n",
+              "    var server_id = cell.output_area._bokeh_server_id;\n",
+              "    // Clean up Bokeh references\n",
+              "    if (id != null && id in Bokeh.index) {\n",
+              "      Bokeh.index[id].model.document.clear();\n",
+              "      delete Bokeh.index[id];\n",
+              "    }\n",
+              "\n",
+              "    if (server_id !== undefined) {\n",
+              "      // Clean up Bokeh references\n",
+              "      var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n",
+              "      cell.notebook.kernel.execute(cmd, {\n",
+              "        iopub: {\n",
+              "          output: function(msg) {\n",
+              "            var id = msg.content.text.trim();\n",
+              "            if (id in Bokeh.index) {\n",
+              "              Bokeh.index[id].model.document.clear();\n",
+              "              delete Bokeh.index[id];\n",
+              "            }\n",
+              "          }\n",
+              "        }\n",
+              "      });\n",
+              "      // Destroy server and session\n",
+              "      var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n",
+              "      cell.notebook.kernel.execute(cmd);\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "  /**\n",
+              "   * Handle when a new output is added\n",
+              "   */\n",
+              "  function handleAddOutput(event, handle) {\n",
+              "    var output_area = handle.output_area;\n",
+              "    var output = handle.output;\n",
+              "\n",
+              "    // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n",
+              "    if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n",
+              "      return\n",
+              "    }\n",
+              "\n",
+              "    var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n",
+              "\n",
+              "    if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n",
+              "      toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n",
+              "      // store reference to embed id on output_area\n",
+              "      output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n",
+              "    }\n",
+              "    if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n",
+              "      var bk_div = document.createElement(\"div\");\n",
+              "      bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n",
+              "      var script_attrs = bk_div.children[0].attributes;\n",
+              "      for (var i = 0; i < script_attrs.length; i++) {\n",
+              "        toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n",
+              "      }\n",
+              "      // store reference to server id on output_area\n",
+              "      output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "  function register_renderer(events, OutputArea) {\n",
+              "\n",
+              "    function append_mime(data, metadata, element) {\n",
+              "      // create a DOM node to render to\n",
+              "      var toinsert = this.create_output_subarea(\n",
+              "        metadata,\n",
+              "        CLASS_NAME,\n",
+              "        EXEC_MIME_TYPE\n",
+              "      );\n",
+              "      this.keyboard_manager.register_events(toinsert);\n",
+              "      // Render to node\n",
+              "      var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n",
+              "      render(props, toinsert[toinsert.length - 1]);\n",
+              "      element.append(toinsert);\n",
+              "      return toinsert\n",
+              "    }\n",
+              "\n",
+              "    /* Handle when an output is cleared or removed */\n",
+              "    events.on('clear_output.CodeCell', handleClearOutput);\n",
+              "    events.on('delete.Cell', handleClearOutput);\n",
+              "\n",
+              "    /* Handle when a new output is added */\n",
+              "    events.on('output_added.OutputArea', handleAddOutput);\n",
+              "\n",
+              "    /**\n",
+              "     * Register the mime type and append_mime function with output_area\n",
+              "     */\n",
+              "    OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n",
+              "      /* Is output safe? */\n",
+              "      safe: true,\n",
+              "      /* Index of renderer in `output_area.display_order` */\n",
+              "      index: 0\n",
+              "    });\n",
+              "  }\n",
+              "\n",
+              "  // register the mime type if in Jupyter Notebook environment and previously unregistered\n",
+              "  if (root.Jupyter !== undefined) {\n",
+              "    var events = require('base/js/events');\n",
+              "    var OutputArea = require('notebook/js/outputarea').OutputArea;\n",
+              "\n",
+              "    if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n",
+              "      register_renderer(events, OutputArea);\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "  \n",
+              "  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n",
+              "    root._bokeh_timeout = Date.now() + 5000;\n",
+              "    root._bokeh_failed_load = false;\n",
+              "  }\n",
+              "\n",
+              "  var NB_LOAD_WARNING = {'data': {'text/html':\n",
+              "     \"<div style='background-color: #fdd'>\\n\"+\n",
+              "     \"<p>\\n\"+\n",
+              "     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n",
+              "     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n",
+              "     \"</p>\\n\"+\n",
+              "     \"<ul>\\n\"+\n",
+              "     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n",
+              "     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n",
+              "     \"</ul>\\n\"+\n",
+              "     \"<code>\\n\"+\n",
+              "     \"from bokeh.resources import INLINE\\n\"+\n",
+              "     \"output_notebook(resources=INLINE)\\n\"+\n",
+              "     \"</code>\\n\"+\n",
+              "     \"</div>\"}};\n",
+              "\n",
+              "  function display_loaded() {\n",
+              "    var el = document.getElementById(null);\n",
+              "    if (el != null) {\n",
+              "      el.textContent = \"BokehJS is loading...\";\n",
+              "    }\n",
+              "    if (root.Bokeh !== undefined) {\n",
+              "      if (el != null) {\n",
+              "        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n",
+              "      }\n",
+              "    } else if (Date.now() < root._bokeh_timeout) {\n",
+              "      setTimeout(display_loaded, 100)\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "\n",
+              "  function run_callbacks() {\n",
+              "    try {\n",
+              "      root._bokeh_onload_callbacks.forEach(function(callback) {\n",
+              "        if (callback != null)\n",
+              "          callback();\n",
+              "      });\n",
+              "    } finally {\n",
+              "      delete root._bokeh_onload_callbacks\n",
+              "    }\n",
+              "    console.debug(\"Bokeh: all callbacks have finished\");\n",
+              "  }\n",
+              "\n",
+              "  function load_libs(css_urls, js_urls, callback) {\n",
+              "    if (css_urls == null) css_urls = [];\n",
+              "    if (js_urls == null) js_urls = [];\n",
+              "\n",
+              "    root._bokeh_onload_callbacks.push(callback);\n",
+              "    if (root._bokeh_is_loading > 0) {\n",
+              "      console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n",
+              "      return null;\n",
+              "    }\n",
+              "    if (js_urls == null || js_urls.length === 0) {\n",
+              "      run_callbacks();\n",
+              "      return null;\n",
+              "    }\n",
+              "    console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n",
+              "    root._bokeh_is_loading = css_urls.length + js_urls.length;\n",
+              "\n",
+              "    function on_load() {\n",
+              "      root._bokeh_is_loading--;\n",
+              "      if (root._bokeh_is_loading === 0) {\n",
+              "        console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n",
+              "        run_callbacks()\n",
+              "      }\n",
+              "    }\n",
+              "\n",
+              "    function on_error() {\n",
+              "      console.error(\"failed to load \" + url);\n",
+              "    }\n",
+              "\n",
+              "    for (var i = 0; i < css_urls.length; i++) {\n",
+              "      var url = css_urls[i];\n",
+              "      const element = document.createElement(\"link\");\n",
+              "      element.onload = on_load;\n",
+              "      element.onerror = on_error;\n",
+              "      element.rel = \"stylesheet\";\n",
+              "      element.type = \"text/css\";\n",
+              "      element.href = url;\n",
+              "      console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n",
+              "      document.body.appendChild(element);\n",
+              "    }\n",
+              "\n",
+              "    for (var i = 0; i < js_urls.length; i++) {\n",
+              "      var url = js_urls[i];\n",
+              "      var element = document.createElement('script');\n",
+              "      element.onload = on_load;\n",
+              "      element.onerror = on_error;\n",
+              "      element.async = false;\n",
+              "      element.src = url;\n",
+              "      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
+              "      document.head.appendChild(element);\n",
+              "    }\n",
+              "  };\n",
+              "\n",
+              "  function inject_raw_css(css) {\n",
+              "    const element = document.createElement(\"style\");\n",
+              "    element.appendChild(document.createTextNode(css));\n",
+              "    document.body.appendChild(element);\n",
+              "  }\n",
+              "\n",
+              "  \n",
+              "  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n",
+              "  var css_urls = [];\n",
+              "  \n",
+              "\n",
+              "  var inline_js = [\n",
+              "    function(Bokeh) {\n",
+              "      Bokeh.set_log_level(\"info\");\n",
+              "    },\n",
+              "    function(Bokeh) {\n",
+              "    \n",
+              "    \n",
+              "    }\n",
+              "  ];\n",
+              "\n",
+              "  function run_inline_js() {\n",
+              "    \n",
+              "    if (root.Bokeh !== undefined || force === true) {\n",
+              "      \n",
+              "    for (var i = 0; i < inline_js.length; i++) {\n",
+              "      inline_js[i].call(root, root.Bokeh);\n",
+              "    }\n",
+              "    } else if (Date.now() < root._bokeh_timeout) {\n",
+              "      setTimeout(run_inline_js, 100);\n",
+              "    } else if (!root._bokeh_failed_load) {\n",
+              "      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n",
+              "      root._bokeh_failed_load = true;\n",
+              "    } else if (force !== true) {\n",
+              "      var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n",
+              "      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n",
+              "    }\n",
+              "\n",
+              "  }\n",
+              "\n",
+              "  if (root._bokeh_is_loading === 0) {\n",
+              "    console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n",
+              "    run_inline_js();\n",
+              "  } else {\n",
+              "    load_libs(css_urls, js_urls, function() {\n",
+              "      console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n",
+              "      run_inline_js();\n",
+              "    });\n",
+              "  }\n",
+              "}(window));"
+            ],
+            "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n  function now() {\n    return new Date();\n  }\n\n  var force = true;\n\n  if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n    root._bokeh_onload_callbacks = [];\n    root._bokeh_is_loading = undefined;\n  }\n\n  \n\n  \n  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n    root._bokeh_timeout = Date.now() + 5000;\n    root._bokeh_failed_load = false;\n  }\n\n  var NB_LOAD_WARNING = {'data': {'text/html':\n     \"<div style='background-color: #fdd'>\\n\"+\n     \"<p>\\n\"+\n     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n     \"</p>\\n\"+\n     \"<ul>\\n\"+\n     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n     \"</ul>\\n\"+\n     \"<code>\\n\"+\n     \"from bokeh.resources import INLINE\\n\"+\n     \"output_notebook(resources=INLINE)\\n\"+\n     \"</code>\\n\"+\n     \"</div>\"}};\n\n  function display_loaded() {\n    var el = document.getElementById(null);\n    if (el != null) {\n      el.textContent = \"BokehJS is loading...\";\n    }\n    if (root.Bokeh !== undefined) {\n      if (el != null) {\n        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n      }\n    } else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(display_loaded, 100)\n    }\n  }\n\n\n  function run_callbacks() {\n    try {\n      root._bokeh_onload_callbacks.forEach(function(callback) {\n        if (callback != null)\n          callback();\n      });\n    } finally {\n      delete root._bokeh_onload_callbacks\n    }\n    console.debug(\"Bokeh: all callbacks have finished\");\n  }\n\n  function load_libs(css_urls, js_urls, callback) {\n    if (css_urls == null) css_urls = [];\n    if (js_urls == null) js_urls = [];\n\n    root._bokeh_onload_callbacks.push(callback);\n    if (root._bokeh_is_loading > 0) {\n      console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n      return null;\n    }\n    if (js_urls == null || js_urls.length === 0) {\n      run_callbacks();\n      return null;\n    }\n    console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n    root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n    function on_load() {\n      root._bokeh_is_loading--;\n      if (root._bokeh_is_loading === 0) {\n        console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n        run_callbacks()\n      }\n    }\n\n    function on_error() {\n      console.error(\"failed to load \" + url);\n    }\n\n    for (var i = 0; i < css_urls.length; i++) {\n      var url = css_urls[i];\n      const element = document.createElement(\"link\");\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.rel = \"stylesheet\";\n      element.type = \"text/css\";\n      element.href = url;\n      console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n      document.body.appendChild(element);\n    }\n\n    for (var i = 0; i < js_urls.length; i++) {\n      var url = js_urls[i];\n      var element = document.createElement('script');\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.async = false;\n      element.src = url;\n      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n      document.head.appendChild(element);\n    }\n  };\n\n  function inject_raw_css(css) {\n    const element = document.createElement(\"style\");\n    element.appendChild(document.createTextNode(css));\n    document.body.appendChild(element);\n  }\n\n  \n  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n  var css_urls = [];\n  \n\n  var inline_js = [\n    function(Bokeh) {\n      Bokeh.set_log_level(\"info\");\n    },\n    function(Bokeh) {\n    \n    \n    }\n  ];\n\n  function run_inline_js() {\n    \n    if (root.Bokeh !== undefined || force === true) {\n      \n    for (var i = 0; i < inline_js.length; i++) {\n      inline_js[i].call(root, root.Bokeh);\n    }\n    } else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(run_inline_js, 100);\n    } else if (!root._bokeh_failed_load) {\n      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n      root._bokeh_failed_load = true;\n    } else if (force !== true) {\n      var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n    }\n\n  }\n\n  if (root._bokeh_is_loading === 0) {\n    console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n    run_inline_js();\n  } else {\n    load_libs(css_urls, js_urls, function() {\n      console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n      run_inline_js();\n    });\n  }\n}(window));"
+          },
+          "metadata": {
+            "tags": []
+          },
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "\n",
+              "\n",
+              "\n",
+              "\n",
+              "\n",
+              "\n",
+              "  <div class=\"bk-root\" id=\"5e20475c-62a7-4a19-87ed-a605dc444c96\" data-root-id=\"1364\"></div>\n"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/javascript": [
+              "(function(root) {\n",
+              "  function embed_document(root) {\n",
+              "    \n",
+              "  var docs_json = {\"85d1d6b6-c6cd-4e71-b29b-2cc49ada74c5\":{\"roots\":{\"references\":[{\"attributes\":{\"above\":[{\"id\":\"1375\",\"type\":\"CategoricalAxis\"}],\"center\":[{\"id\":\"1378\",\"type\":\"Grid\"},{\"id\":\"1382\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1379\",\"type\":\"CategoricalAxis\"}],\"min_border_right\":300,\"plot_width\":1200,\"renderers\":[{\"id\":\"1392\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1365\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1385\",\"type\":\"Toolbar\"},\"toolbar_location\":\"below\",\"x_range\":{\"id\":\"1367\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"1371\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"1369\",\"type\":\"FactorRange\"},\"y_scale\":{\"id\":\"1373\",\"type\":\"CategoricalScale\"}},\"id\":\"1364\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{},\"id\":\"1371\",\"type\":\"CategoricalScale\"},{\"attributes\":{},\"id\":\"1427\",\"type\":\"Selection\"},{\"attributes\":{},\"id\":\"1373\",\"type\":\"CategoricalScale\"},{\"attributes\":{},\"id\":\"1423\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1425\",\"type\":\"CategoricalTickFormatter\"},\"major_label_orientation\":0.7853981633974483,\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1376\",\"type\":\"CategoricalTicker\"}},\"id\":\"1375\",\"type\":\"CategoricalAxis\"},{\"attributes\":{\"callback\":null,\"factors\":[\"Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane.\",\"I cuccioli sono carini.\",\"cane\"]},\"id\":\"1369\",\"type\":\"FactorRange\"},{\"attributes\":{},\"id\":\"1376\",\"type\":\"CategoricalTicker\"},{\"attributes\":{},\"id\":\"1425\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"ticker\":{\"id\":\"1376\",\"type\":\"CategoricalTicker\"}},\"id\":\"1378\",\"type\":\"Grid\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1423\",\"type\":\"CategoricalTickFormatter\"},\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1380\",\"type\":\"CategoricalTicker\"}},\"id\":\"1379\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1380\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"callback\":null,\"data\":{\"embeddings_1\":[\"dog\",\"dog\",\"dog\",\"Puppies are nice.\",\"Puppies are nice.\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\"],\"embeddings_2\":[\"cane\",\"I cuccioli sono carini.\",\"Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane.\",\"cane\",\"I cuccioli sono carini.\",\"Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane.\",\"cane\",\"I cuccioli sono carini.\",\"Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane.\"],\"index\":[0,1,2,3,4,5,6,7,8],\"sim\":{\"__ndarray__\":\"AAAAoHcI7T8AAADAU2/jPwAAAIBLIeM/AAAAAO8N5D8AAAAA5GToPwAAAIDhjeI/AAAAQLlt4j8AAAAAEj3iPwAAAGCPHuw/\",\"dtype\":\"float64\",\"shape\":[9]}},\"selected\":{\"id\":\"1427\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1428\",\"type\":\"UnionRenderers\"}},\"id\":\"1388\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1380\",\"type\":\"CategoricalTicker\"}},\"id\":\"1382\",\"type\":\"Grid\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1383\",\"type\":\"SaveTool\"},{\"id\":\"1384\",\"type\":\"HoverTool\"}]},\"id\":\"1385\",\"type\":\"Toolbar\"},{\"attributes\":{},\"id\":\"1383\",\"type\":\"SaveTool\"},{\"attributes\":{\"callback\":null,\"tooltips\":[[\"pair\",\"@embeddings_1 ||| @embeddings_2\"],[\"sim\",\"@sim\"]]},\"id\":\"1384\",\"type\":\"HoverTool\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"height\":{\"units\":\"data\",\"value\":1},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1391\",\"type\":\"Rect\"},{\"attributes\":{\"text\":\"English-Italian Similarity\",\"text_font_size\":{\"value\":\"12pt\"}},\"id\":\"1365\",\"type\":\"Title\"},{\"attributes\":{\"fill_color\":{\"field\":\"sim\",\"transform\":{\"id\":\"1363\",\"type\":\"LinearColorMapper\"}},\"height\":{\"units\":\"data\",\"value\":1},\"line_color\":{\"value\":null},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1390\",\"type\":\"Rect\"},{\"attributes\":{\"callback\":null,\"factors\":[\"dog\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\"]},\"id\":\"1367\",\"type\":\"FactorRange\"},{\"attributes\":{\"data_source\":{\"id\":\"1388\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1390\",\"type\":\"Rect\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1391\",\"type\":\"Rect\"},\"selection_glyph\":null,\"view\":{\"id\":\"1393\",\"type\":\"CDSView\"}},\"id\":\"1392\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"source\":{\"id\":\"1388\",\"type\":\"ColumnDataSource\"}},\"id\":\"1393\",\"type\":\"CDSView\"},{\"attributes\":{\"high\":0.90728360414505,\"low\":0.5699548721313477,\"palette\":[\"#ffffcc\",\"#ffeda0\",\"#fed976\",\"#feb24c\",\"#fd8d3c\",\"#fc4e2a\",\"#e31a1c\",\"#bd0026\",\"#800026\"]},\"id\":\"1363\",\"type\":\"LinearColorMapper\"},{\"attributes\":{},\"id\":\"1428\",\"type\":\"UnionRenderers\"}],\"root_ids\":[\"1364\"]},\"title\":\"Bokeh Application\",\"version\":\"1.4.0\"}};\n",
+              "  var render_items = [{\"docid\":\"85d1d6b6-c6cd-4e71-b29b-2cc49ada74c5\",\"roots\":{\"1364\":\"5e20475c-62a7-4a19-87ed-a605dc444c96\"}}];\n",
+              "  root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n",
+              "\n",
+              "  }\n",
+              "  if (root.Bokeh !== undefined) {\n",
+              "    embed_document(root);\n",
+              "  } else {\n",
+              "    var attempts = 0;\n",
+              "    var timer = setInterval(function(root) {\n",
+              "      if (root.Bokeh !== undefined) {\n",
+              "        clearInterval(timer);\n",
+              "        embed_document(root);\n",
+              "      } else {\n",
+              "        attempts++;\n",
+              "        if (attempts > 100) {\n",
+              "          clearInterval(timer);\n",
+              "          console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n",
+              "        }\n",
+              "      }\n",
+              "    }, 10, root)\n",
+              "  }\n",
+              "})(window);"
+            ],
+            "application/vnd.bokehjs_exec.v0+json": ""
+          },
+          "metadata": {
+            "application/vnd.bokehjs_exec.v0+json": {
+              "id": "1364"
+            },
+            "tags": []
+          },
+          "output_type": "display_data"
+        }
+      ],
+      "source": [
+        "visualize_similarity(en_result, it_result, english_sentences, italian_sentences, 'English-Italian Similarity')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "m6ySvEGbQaTM"
+      },
+      "source": [
+        "### Italian-Spanish Similarity"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "irfwIeitQ7V6"
+      },
+      "outputs": [
+        {
+          "data": {
+            "application/javascript": [
+              "\n",
+              "(function(root) {\n",
+              "  function now() {\n",
+              "    return new Date();\n",
+              "  }\n",
+              "\n",
+              "  var force = true;\n",
+              "\n",
+              "  if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n",
+              "    root._bokeh_onload_callbacks = [];\n",
+              "    root._bokeh_is_loading = undefined;\n",
+              "  }\n",
+              "\n",
+              "  var JS_MIME_TYPE = 'application/javascript';\n",
+              "  var HTML_MIME_TYPE = 'text/html';\n",
+              "  var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n",
+              "  var CLASS_NAME = 'output_bokeh rendered_html';\n",
+              "\n",
+              "  /**\n",
+              "   * Render data to the DOM node\n",
+              "   */\n",
+              "  function render(props, node) {\n",
+              "    var script = document.createElement(\"script\");\n",
+              "    node.appendChild(script);\n",
+              "  }\n",
+              "\n",
+              "  /**\n",
+              "   * Handle when an output is cleared or removed\n",
+              "   */\n",
+              "  function handleClearOutput(event, handle) {\n",
+              "    var cell = handle.cell;\n",
+              "\n",
+              "    var id = cell.output_area._bokeh_element_id;\n",
+              "    var server_id = cell.output_area._bokeh_server_id;\n",
+              "    // Clean up Bokeh references\n",
+              "    if (id != null && id in Bokeh.index) {\n",
+              "      Bokeh.index[id].model.document.clear();\n",
+              "      delete Bokeh.index[id];\n",
+              "    }\n",
+              "\n",
+              "    if (server_id !== undefined) {\n",
+              "      // Clean up Bokeh references\n",
+              "      var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n",
+              "      cell.notebook.kernel.execute(cmd, {\n",
+              "        iopub: {\n",
+              "          output: function(msg) {\n",
+              "            var id = msg.content.text.trim();\n",
+              "            if (id in Bokeh.index) {\n",
+              "              Bokeh.index[id].model.document.clear();\n",
+              "              delete Bokeh.index[id];\n",
+              "            }\n",
+              "          }\n",
+              "        }\n",
+              "      });\n",
+              "      // Destroy server and session\n",
+              "      var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n",
+              "      cell.notebook.kernel.execute(cmd);\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "  /**\n",
+              "   * Handle when a new output is added\n",
+              "   */\n",
+              "  function handleAddOutput(event, handle) {\n",
+              "    var output_area = handle.output_area;\n",
+              "    var output = handle.output;\n",
+              "\n",
+              "    // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n",
+              "    if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n",
+              "      return\n",
+              "    }\n",
+              "\n",
+              "    var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n",
+              "\n",
+              "    if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n",
+              "      toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n",
+              "      // store reference to embed id on output_area\n",
+              "      output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n",
+              "    }\n",
+              "    if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n",
+              "      var bk_div = document.createElement(\"div\");\n",
+              "      bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n",
+              "      var script_attrs = bk_div.children[0].attributes;\n",
+              "      for (var i = 0; i < script_attrs.length; i++) {\n",
+              "        toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n",
+              "      }\n",
+              "      // store reference to server id on output_area\n",
+              "      output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "  function register_renderer(events, OutputArea) {\n",
+              "\n",
+              "    function append_mime(data, metadata, element) {\n",
+              "      // create a DOM node to render to\n",
+              "      var toinsert = this.create_output_subarea(\n",
+              "        metadata,\n",
+              "        CLASS_NAME,\n",
+              "        EXEC_MIME_TYPE\n",
+              "      );\n",
+              "      this.keyboard_manager.register_events(toinsert);\n",
+              "      // Render to node\n",
+              "      var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n",
+              "      render(props, toinsert[toinsert.length - 1]);\n",
+              "      element.append(toinsert);\n",
+              "      return toinsert\n",
+              "    }\n",
+              "\n",
+              "    /* Handle when an output is cleared or removed */\n",
+              "    events.on('clear_output.CodeCell', handleClearOutput);\n",
+              "    events.on('delete.Cell', handleClearOutput);\n",
+              "\n",
+              "    /* Handle when a new output is added */\n",
+              "    events.on('output_added.OutputArea', handleAddOutput);\n",
+              "\n",
+              "    /**\n",
+              "     * Register the mime type and append_mime function with output_area\n",
+              "     */\n",
+              "    OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n",
+              "      /* Is output safe? */\n",
+              "      safe: true,\n",
+              "      /* Index of renderer in `output_area.display_order` */\n",
+              "      index: 0\n",
+              "    });\n",
+              "  }\n",
+              "\n",
+              "  // register the mime type if in Jupyter Notebook environment and previously unregistered\n",
+              "  if (root.Jupyter !== undefined) {\n",
+              "    var events = require('base/js/events');\n",
+              "    var OutputArea = require('notebook/js/outputarea').OutputArea;\n",
+              "\n",
+              "    if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n",
+              "      register_renderer(events, OutputArea);\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "  \n",
+              "  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n",
+              "    root._bokeh_timeout = Date.now() + 5000;\n",
+              "    root._bokeh_failed_load = false;\n",
+              "  }\n",
+              "\n",
+              "  var NB_LOAD_WARNING = {'data': {'text/html':\n",
+              "     \"<div style='background-color: #fdd'>\\n\"+\n",
+              "     \"<p>\\n\"+\n",
+              "     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n",
+              "     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n",
+              "     \"</p>\\n\"+\n",
+              "     \"<ul>\\n\"+\n",
+              "     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n",
+              "     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n",
+              "     \"</ul>\\n\"+\n",
+              "     \"<code>\\n\"+\n",
+              "     \"from bokeh.resources import INLINE\\n\"+\n",
+              "     \"output_notebook(resources=INLINE)\\n\"+\n",
+              "     \"</code>\\n\"+\n",
+              "     \"</div>\"}};\n",
+              "\n",
+              "  function display_loaded() {\n",
+              "    var el = document.getElementById(null);\n",
+              "    if (el != null) {\n",
+              "      el.textContent = \"BokehJS is loading...\";\n",
+              "    }\n",
+              "    if (root.Bokeh !== undefined) {\n",
+              "      if (el != null) {\n",
+              "        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n",
+              "      }\n",
+              "    } else if (Date.now() < root._bokeh_timeout) {\n",
+              "      setTimeout(display_loaded, 100)\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "\n",
+              "  function run_callbacks() {\n",
+              "    try {\n",
+              "      root._bokeh_onload_callbacks.forEach(function(callback) {\n",
+              "        if (callback != null)\n",
+              "          callback();\n",
+              "      });\n",
+              "    } finally {\n",
+              "      delete root._bokeh_onload_callbacks\n",
+              "    }\n",
+              "    console.debug(\"Bokeh: all callbacks have finished\");\n",
+              "  }\n",
+              "\n",
+              "  function load_libs(css_urls, js_urls, callback) {\n",
+              "    if (css_urls == null) css_urls = [];\n",
+              "    if (js_urls == null) js_urls = [];\n",
+              "\n",
+              "    root._bokeh_onload_callbacks.push(callback);\n",
+              "    if (root._bokeh_is_loading > 0) {\n",
+              "      console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n",
+              "      return null;\n",
+              "    }\n",
+              "    if (js_urls == null || js_urls.length === 0) {\n",
+              "      run_callbacks();\n",
+              "      return null;\n",
+              "    }\n",
+              "    console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n",
+              "    root._bokeh_is_loading = css_urls.length + js_urls.length;\n",
+              "\n",
+              "    function on_load() {\n",
+              "      root._bokeh_is_loading--;\n",
+              "      if (root._bokeh_is_loading === 0) {\n",
+              "        console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n",
+              "        run_callbacks()\n",
+              "      }\n",
+              "    }\n",
+              "\n",
+              "    function on_error() {\n",
+              "      console.error(\"failed to load \" + url);\n",
+              "    }\n",
+              "\n",
+              "    for (var i = 0; i < css_urls.length; i++) {\n",
+              "      var url = css_urls[i];\n",
+              "      const element = document.createElement(\"link\");\n",
+              "      element.onload = on_load;\n",
+              "      element.onerror = on_error;\n",
+              "      element.rel = \"stylesheet\";\n",
+              "      element.type = \"text/css\";\n",
+              "      element.href = url;\n",
+              "      console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n",
+              "      document.body.appendChild(element);\n",
+              "    }\n",
+              "\n",
+              "    for (var i = 0; i < js_urls.length; i++) {\n",
+              "      var url = js_urls[i];\n",
+              "      var element = document.createElement('script');\n",
+              "      element.onload = on_load;\n",
+              "      element.onerror = on_error;\n",
+              "      element.async = false;\n",
+              "      element.src = url;\n",
+              "      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
+              "      document.head.appendChild(element);\n",
+              "    }\n",
+              "  };\n",
+              "\n",
+              "  function inject_raw_css(css) {\n",
+              "    const element = document.createElement(\"style\");\n",
+              "    element.appendChild(document.createTextNode(css));\n",
+              "    document.body.appendChild(element);\n",
+              "  }\n",
+              "\n",
+              "  \n",
+              "  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n",
+              "  var css_urls = [];\n",
+              "  \n",
+              "\n",
+              "  var inline_js = [\n",
+              "    function(Bokeh) {\n",
+              "      Bokeh.set_log_level(\"info\");\n",
+              "    },\n",
+              "    function(Bokeh) {\n",
+              "    \n",
+              "    \n",
+              "    }\n",
+              "  ];\n",
+              "\n",
+              "  function run_inline_js() {\n",
+              "    \n",
+              "    if (root.Bokeh !== undefined || force === true) {\n",
+              "      \n",
+              "    for (var i = 0; i < inline_js.length; i++) {\n",
+              "      inline_js[i].call(root, root.Bokeh);\n",
+              "    }\n",
+              "    } else if (Date.now() < root._bokeh_timeout) {\n",
+              "      setTimeout(run_inline_js, 100);\n",
+              "    } else if (!root._bokeh_failed_load) {\n",
+              "      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n",
+              "      root._bokeh_failed_load = true;\n",
+              "    } else if (force !== true) {\n",
+              "      var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n",
+              "      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n",
+              "    }\n",
+              "\n",
+              "  }\n",
+              "\n",
+              "  if (root._bokeh_is_loading === 0) {\n",
+              "    console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n",
+              "    run_inline_js();\n",
+              "  } else {\n",
+              "    load_libs(css_urls, js_urls, function() {\n",
+              "      console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n",
+              "      run_inline_js();\n",
+              "    });\n",
+              "  }\n",
+              "}(window));"
+            ],
+            "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n  function now() {\n    return new Date();\n  }\n\n  var force = true;\n\n  if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n    root._bokeh_onload_callbacks = [];\n    root._bokeh_is_loading = undefined;\n  }\n\n  \n\n  \n  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n    root._bokeh_timeout = Date.now() + 5000;\n    root._bokeh_failed_load = false;\n  }\n\n  var NB_LOAD_WARNING = {'data': {'text/html':\n     \"<div style='background-color: #fdd'>\\n\"+\n     \"<p>\\n\"+\n     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n     \"</p>\\n\"+\n     \"<ul>\\n\"+\n     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n     \"</ul>\\n\"+\n     \"<code>\\n\"+\n     \"from bokeh.resources import INLINE\\n\"+\n     \"output_notebook(resources=INLINE)\\n\"+\n     \"</code>\\n\"+\n     \"</div>\"}};\n\n  function display_loaded() {\n    var el = document.getElementById(null);\n    if (el != null) {\n      el.textContent = \"BokehJS is loading...\";\n    }\n    if (root.Bokeh !== undefined) {\n      if (el != null) {\n        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n      }\n    } else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(display_loaded, 100)\n    }\n  }\n\n\n  function run_callbacks() {\n    try {\n      root._bokeh_onload_callbacks.forEach(function(callback) {\n        if (callback != null)\n          callback();\n      });\n    } finally {\n      delete root._bokeh_onload_callbacks\n    }\n    console.debug(\"Bokeh: all callbacks have finished\");\n  }\n\n  function load_libs(css_urls, js_urls, callback) {\n    if (css_urls == null) css_urls = [];\n    if (js_urls == null) js_urls = [];\n\n    root._bokeh_onload_callbacks.push(callback);\n    if (root._bokeh_is_loading > 0) {\n      console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n      return null;\n    }\n    if (js_urls == null || js_urls.length === 0) {\n      run_callbacks();\n      return null;\n    }\n    console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n    root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n    function on_load() {\n      root._bokeh_is_loading--;\n      if (root._bokeh_is_loading === 0) {\n        console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n        run_callbacks()\n      }\n    }\n\n    function on_error() {\n      console.error(\"failed to load \" + url);\n    }\n\n    for (var i = 0; i < css_urls.length; i++) {\n      var url = css_urls[i];\n      const element = document.createElement(\"link\");\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.rel = \"stylesheet\";\n      element.type = \"text/css\";\n      element.href = url;\n      console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n      document.body.appendChild(element);\n    }\n\n    for (var i = 0; i < js_urls.length; i++) {\n      var url = js_urls[i];\n      var element = document.createElement('script');\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.async = false;\n      element.src = url;\n      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n      document.head.appendChild(element);\n    }\n  };\n\n  function inject_raw_css(css) {\n    const element = document.createElement(\"style\");\n    element.appendChild(document.createTextNode(css));\n    document.body.appendChild(element);\n  }\n\n  \n  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n  var css_urls = [];\n  \n\n  var inline_js = [\n    function(Bokeh) {\n      Bokeh.set_log_level(\"info\");\n    },\n    function(Bokeh) {\n    \n    \n    }\n  ];\n\n  function run_inline_js() {\n    \n    if (root.Bokeh !== undefined || force === true) {\n      \n    for (var i = 0; i < inline_js.length; i++) {\n      inline_js[i].call(root, root.Bokeh);\n    }\n    } else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(run_inline_js, 100);\n    } else if (!root._bokeh_failed_load) {\n      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n      root._bokeh_failed_load = true;\n    } else if (force !== true) {\n      var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n    }\n\n  }\n\n  if (root._bokeh_is_loading === 0) {\n    console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n    run_inline_js();\n  } else {\n    load_libs(css_urls, js_urls, function() {\n      console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n      run_inline_js();\n    });\n  }\n}(window));"
+          },
+          "metadata": {
+            "tags": []
+          },
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "\n",
+              "\n",
+              "\n",
+              "\n",
+              "\n",
+              "\n",
+              "  <div class=\"bk-root\" id=\"6f559e42-2dec-4a29-a3d9-62c969a8c08a\" data-root-id=\"1472\"></div>\n"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/javascript": [
+              "(function(root) {\n",
+              "  function embed_document(root) {\n",
+              "    \n",
+              "  var docs_json = {\"9eb33289-2019-49f5-b3a2-c34c93bf2800\":{\"roots\":{\"references\":[{\"attributes\":{\"above\":[{\"id\":\"1483\",\"type\":\"CategoricalAxis\"}],\"center\":[{\"id\":\"1486\",\"type\":\"Grid\"},{\"id\":\"1490\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1487\",\"type\":\"CategoricalAxis\"}],\"min_border_right\":300,\"plot_width\":1200,\"renderers\":[{\"id\":\"1500\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1473\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1493\",\"type\":\"Toolbar\"},\"toolbar_location\":\"below\",\"x_range\":{\"id\":\"1475\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"1479\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"1477\",\"type\":\"FactorRange\"},\"y_scale\":{\"id\":\"1481\",\"type\":\"CategoricalScale\"}},\"id\":\"1472\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1491\",\"type\":\"SaveTool\"},{\"id\":\"1492\",\"type\":\"HoverTool\"}]},\"id\":\"1493\",\"type\":\"Toolbar\"},{\"attributes\":{\"callback\":null,\"tooltips\":[[\"pair\",\"@embeddings_1 ||| @embeddings_2\"],[\"sim\",\"@sim\"]]},\"id\":\"1492\",\"type\":\"HoverTool\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"height\":{\"units\":\"data\",\"value\":1},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1499\",\"type\":\"Rect\"},{\"attributes\":{\"fill_color\":{\"field\":\"sim\",\"transform\":{\"id\":\"1471\",\"type\":\"LinearColorMapper\"}},\"height\":{\"units\":\"data\",\"value\":1},\"line_color\":{\"value\":null},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1498\",\"type\":\"Rect\"},{\"attributes\":{\"data_source\":{\"id\":\"1496\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1498\",\"type\":\"Rect\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1499\",\"type\":\"Rect\"},\"selection_glyph\":null,\"view\":{\"id\":\"1501\",\"type\":\"CDSView\"}},\"id\":\"1500\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"ticker\":{\"id\":\"1484\",\"type\":\"CategoricalTicker\"}},\"id\":\"1486\",\"type\":\"Grid\"},{\"attributes\":{\"source\":{\"id\":\"1496\",\"type\":\"ColumnDataSource\"}},\"id\":\"1501\",\"type\":\"CDSView\"},{\"attributes\":{\"callback\":null,\"factors\":[\"Disfruto de dar largos paseos por la playa con mi perro.\",\"Los cachorros son agradables.\",\"perro\"]},\"id\":\"1477\",\"type\":\"FactorRange\"},{\"attributes\":{},\"id\":\"1491\",\"type\":\"SaveTool\"},{\"attributes\":{\"high\":0.9059451818466187,\"low\":0.564821720123291,\"palette\":[\"#ffffcc\",\"#ffeda0\",\"#fed976\",\"#feb24c\",\"#fd8d3c\",\"#fc4e2a\",\"#e31a1c\",\"#bd0026\",\"#800026\"]},\"id\":\"1471\",\"type\":\"LinearColorMapper\"},{\"attributes\":{},\"id\":\"1488\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1540\",\"type\":\"CategoricalTickFormatter\"},\"major_label_orientation\":0.7853981633974483,\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1484\",\"type\":\"CategoricalTicker\"}},\"id\":\"1483\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1484\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1538\",\"type\":\"CategoricalTickFormatter\"},\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1488\",\"type\":\"CategoricalTicker\"}},\"id\":\"1487\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1481\",\"type\":\"CategoricalScale\"},{\"attributes\":{},\"id\":\"1543\",\"type\":\"UnionRenderers\"},{\"attributes\":{},\"id\":\"1479\",\"type\":\"CategoricalScale\"},{\"attributes\":{},\"id\":\"1542\",\"type\":\"Selection\"},{\"attributes\":{},\"id\":\"1540\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"text\":\"Italian-Spanish Similarity\",\"text_font_size\":{\"value\":\"12pt\"}},\"id\":\"1473\",\"type\":\"Title\"},{\"attributes\":{\"callback\":null,\"factors\":[\"cane\",\"I cuccioli sono carini.\",\"Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane.\"]},\"id\":\"1475\",\"type\":\"FactorRange\"},{\"attributes\":{},\"id\":\"1538\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"callback\":null,\"data\":{\"embeddings_1\":[\"cane\",\"cane\",\"cane\",\"I cuccioli sono carini.\",\"I cuccioli sono carini.\",\"I cuccioli sono carini.\",\"Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane.\",\"Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane.\",\"Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane.\"],\"embeddings_2\":[\"perro\",\"Los cachorros son agradables.\",\"Disfruto de dar largos paseos por la playa con mi perro.\",\"perro\",\"Los cachorros son agradables.\",\"Disfruto de dar largos paseos por la playa con mi perro.\",\"perro\",\"Los cachorros son agradables.\",\"Disfruto de dar largos paseos por la playa con mi perro.\"],\"index\":[0,1,2,3,4,5,6,7,8],\"sim\":{\"__ndarray__\":\"AAAAwID97D8AAACAiUjkPwAAAICyQuI/AAAAQJl+4z8AAABAqebnPwAAAAAFE+I/AAAAgDrw4j8AAAAg++LjPwAAAEDo1eo/\",\"dtype\":\"float64\",\"shape\":[9]}},\"selected\":{\"id\":\"1542\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1543\",\"type\":\"UnionRenderers\"}},\"id\":\"1496\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1488\",\"type\":\"CategoricalTicker\"}},\"id\":\"1490\",\"type\":\"Grid\"}],\"root_ids\":[\"1472\"]},\"title\":\"Bokeh Application\",\"version\":\"1.4.0\"}};\n",
+              "  var render_items = [{\"docid\":\"9eb33289-2019-49f5-b3a2-c34c93bf2800\",\"roots\":{\"1472\":\"6f559e42-2dec-4a29-a3d9-62c969a8c08a\"}}];\n",
+              "  root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n",
+              "\n",
+              "  }\n",
+              "  if (root.Bokeh !== undefined) {\n",
+              "    embed_document(root);\n",
+              "  } else {\n",
+              "    var attempts = 0;\n",
+              "    var timer = setInterval(function(root) {\n",
+              "      if (root.Bokeh !== undefined) {\n",
+              "        clearInterval(timer);\n",
+              "        embed_document(root);\n",
+              "      } else {\n",
+              "        attempts++;\n",
+              "        if (attempts > 100) {\n",
+              "          clearInterval(timer);\n",
+              "          console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n",
+              "        }\n",
+              "      }\n",
+              "    }, 10, root)\n",
+              "  }\n",
+              "})(window);"
+            ],
+            "application/vnd.bokehjs_exec.v0+json": ""
+          },
+          "metadata": {
+            "application/vnd.bokehjs_exec.v0+json": {
+              "id": "1472"
+            },
+            "tags": []
+          },
+          "output_type": "display_data"
+        }
+      ],
+      "source": [
+        "visualize_similarity(it_result, es_result, italian_sentences, spanish_sentences, 'Italian-Spanish Similarity')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ueoRO8balwwr"
+      },
+      "source": [
+        "### English-Chinese Similarity"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "xA7anofVlxL7"
+      },
+      "outputs": [
+        {
+          "data": {
+            "application/javascript": [
+              "\n",
+              "(function(root) {\n",
+              "  function now() {\n",
+              "    return new Date();\n",
+              "  }\n",
+              "\n",
+              "  var force = true;\n",
+              "\n",
+              "  if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n",
+              "    root._bokeh_onload_callbacks = [];\n",
+              "    root._bokeh_is_loading = undefined;\n",
+              "  }\n",
+              "\n",
+              "  var JS_MIME_TYPE = 'application/javascript';\n",
+              "  var HTML_MIME_TYPE = 'text/html';\n",
+              "  var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n",
+              "  var CLASS_NAME = 'output_bokeh rendered_html';\n",
+              "\n",
+              "  /**\n",
+              "   * Render data to the DOM node\n",
+              "   */\n",
+              "  function render(props, node) {\n",
+              "    var script = document.createElement(\"script\");\n",
+              "    node.appendChild(script);\n",
+              "  }\n",
+              "\n",
+              "  /**\n",
+              "   * Handle when an output is cleared or removed\n",
+              "   */\n",
+              "  function handleClearOutput(event, handle) {\n",
+              "    var cell = handle.cell;\n",
+              "\n",
+              "    var id = cell.output_area._bokeh_element_id;\n",
+              "    var server_id = cell.output_area._bokeh_server_id;\n",
+              "    // Clean up Bokeh references\n",
+              "    if (id != null && id in Bokeh.index) {\n",
+              "      Bokeh.index[id].model.document.clear();\n",
+              "      delete Bokeh.index[id];\n",
+              "    }\n",
+              "\n",
+              "    if (server_id !== undefined) {\n",
+              "      // Clean up Bokeh references\n",
+              "      var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n",
+              "      cell.notebook.kernel.execute(cmd, {\n",
+              "        iopub: {\n",
+              "          output: function(msg) {\n",
+              "            var id = msg.content.text.trim();\n",
+              "            if (id in Bokeh.index) {\n",
+              "              Bokeh.index[id].model.document.clear();\n",
+              "              delete Bokeh.index[id];\n",
+              "            }\n",
+              "          }\n",
+              "        }\n",
+              "      });\n",
+              "      // Destroy server and session\n",
+              "      var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n",
+              "      cell.notebook.kernel.execute(cmd);\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "  /**\n",
+              "   * Handle when a new output is added\n",
+              "   */\n",
+              "  function handleAddOutput(event, handle) {\n",
+              "    var output_area = handle.output_area;\n",
+              "    var output = handle.output;\n",
+              "\n",
+              "    // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n",
+              "    if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n",
+              "      return\n",
+              "    }\n",
+              "\n",
+              "    var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n",
+              "\n",
+              "    if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n",
+              "      toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n",
+              "      // store reference to embed id on output_area\n",
+              "      output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n",
+              "    }\n",
+              "    if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n",
+              "      var bk_div = document.createElement(\"div\");\n",
+              "      bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n",
+              "      var script_attrs = bk_div.children[0].attributes;\n",
+              "      for (var i = 0; i < script_attrs.length; i++) {\n",
+              "        toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n",
+              "      }\n",
+              "      // store reference to server id on output_area\n",
+              "      output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "  function register_renderer(events, OutputArea) {\n",
+              "\n",
+              "    function append_mime(data, metadata, element) {\n",
+              "      // create a DOM node to render to\n",
+              "      var toinsert = this.create_output_subarea(\n",
+              "        metadata,\n",
+              "        CLASS_NAME,\n",
+              "        EXEC_MIME_TYPE\n",
+              "      );\n",
+              "      this.keyboard_manager.register_events(toinsert);\n",
+              "      // Render to node\n",
+              "      var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n",
+              "      render(props, toinsert[toinsert.length - 1]);\n",
+              "      element.append(toinsert);\n",
+              "      return toinsert\n",
+              "    }\n",
+              "\n",
+              "    /* Handle when an output is cleared or removed */\n",
+              "    events.on('clear_output.CodeCell', handleClearOutput);\n",
+              "    events.on('delete.Cell', handleClearOutput);\n",
+              "\n",
+              "    /* Handle when a new output is added */\n",
+              "    events.on('output_added.OutputArea', handleAddOutput);\n",
+              "\n",
+              "    /**\n",
+              "     * Register the mime type and append_mime function with output_area\n",
+              "     */\n",
+              "    OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n",
+              "      /* Is output safe? */\n",
+              "      safe: true,\n",
+              "      /* Index of renderer in `output_area.display_order` */\n",
+              "      index: 0\n",
+              "    });\n",
+              "  }\n",
+              "\n",
+              "  // register the mime type if in Jupyter Notebook environment and previously unregistered\n",
+              "  if (root.Jupyter !== undefined) {\n",
+              "    var events = require('base/js/events');\n",
+              "    var OutputArea = require('notebook/js/outputarea').OutputArea;\n",
+              "\n",
+              "    if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n",
+              "      register_renderer(events, OutputArea);\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "  \n",
+              "  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n",
+              "    root._bokeh_timeout = Date.now() + 5000;\n",
+              "    root._bokeh_failed_load = false;\n",
+              "  }\n",
+              "\n",
+              "  var NB_LOAD_WARNING = {'data': {'text/html':\n",
+              "     \"<div style='background-color: #fdd'>\\n\"+\n",
+              "     \"<p>\\n\"+\n",
+              "     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n",
+              "     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n",
+              "     \"</p>\\n\"+\n",
+              "     \"<ul>\\n\"+\n",
+              "     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n",
+              "     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n",
+              "     \"</ul>\\n\"+\n",
+              "     \"<code>\\n\"+\n",
+              "     \"from bokeh.resources import INLINE\\n\"+\n",
+              "     \"output_notebook(resources=INLINE)\\n\"+\n",
+              "     \"</code>\\n\"+\n",
+              "     \"</div>\"}};\n",
+              "\n",
+              "  function display_loaded() {\n",
+              "    var el = document.getElementById(null);\n",
+              "    if (el != null) {\n",
+              "      el.textContent = \"BokehJS is loading...\";\n",
+              "    }\n",
+              "    if (root.Bokeh !== undefined) {\n",
+              "      if (el != null) {\n",
+              "        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n",
+              "      }\n",
+              "    } else if (Date.now() < root._bokeh_timeout) {\n",
+              "      setTimeout(display_loaded, 100)\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "\n",
+              "  function run_callbacks() {\n",
+              "    try {\n",
+              "      root._bokeh_onload_callbacks.forEach(function(callback) {\n",
+              "        if (callback != null)\n",
+              "          callback();\n",
+              "      });\n",
+              "    } finally {\n",
+              "      delete root._bokeh_onload_callbacks\n",
+              "    }\n",
+              "    console.debug(\"Bokeh: all callbacks have finished\");\n",
+              "  }\n",
+              "\n",
+              "  function load_libs(css_urls, js_urls, callback) {\n",
+              "    if (css_urls == null) css_urls = [];\n",
+              "    if (js_urls == null) js_urls = [];\n",
+              "\n",
+              "    root._bokeh_onload_callbacks.push(callback);\n",
+              "    if (root._bokeh_is_loading > 0) {\n",
+              "      console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n",
+              "      return null;\n",
+              "    }\n",
+              "    if (js_urls == null || js_urls.length === 0) {\n",
+              "      run_callbacks();\n",
+              "      return null;\n",
+              "    }\n",
+              "    console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n",
+              "    root._bokeh_is_loading = css_urls.length + js_urls.length;\n",
+              "\n",
+              "    function on_load() {\n",
+              "      root._bokeh_is_loading--;\n",
+              "      if (root._bokeh_is_loading === 0) {\n",
+              "        console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n",
+              "        run_callbacks()\n",
+              "      }\n",
+              "    }\n",
+              "\n",
+              "    function on_error() {\n",
+              "      console.error(\"failed to load \" + url);\n",
+              "    }\n",
+              "\n",
+              "    for (var i = 0; i < css_urls.length; i++) {\n",
+              "      var url = css_urls[i];\n",
+              "      const element = document.createElement(\"link\");\n",
+              "      element.onload = on_load;\n",
+              "      element.onerror = on_error;\n",
+              "      element.rel = \"stylesheet\";\n",
+              "      element.type = \"text/css\";\n",
+              "      element.href = url;\n",
+              "      console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n",
+              "      document.body.appendChild(element);\n",
+              "    }\n",
+              "\n",
+              "    for (var i = 0; i < js_urls.length; i++) {\n",
+              "      var url = js_urls[i];\n",
+              "      var element = document.createElement('script');\n",
+              "      element.onload = on_load;\n",
+              "      element.onerror = on_error;\n",
+              "      element.async = false;\n",
+              "      element.src = url;\n",
+              "      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
+              "      document.head.appendChild(element);\n",
+              "    }\n",
+              "  };\n",
+              "\n",
+              "  function inject_raw_css(css) {\n",
+              "    const element = document.createElement(\"style\");\n",
+              "    element.appendChild(document.createTextNode(css));\n",
+              "    document.body.appendChild(element);\n",
+              "  }\n",
+              "\n",
+              "  \n",
+              "  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n",
+              "  var css_urls = [];\n",
+              "  \n",
+              "\n",
+              "  var inline_js = [\n",
+              "    function(Bokeh) {\n",
+              "      Bokeh.set_log_level(\"info\");\n",
+              "    },\n",
+              "    function(Bokeh) {\n",
+              "    \n",
+              "    \n",
+              "    }\n",
+              "  ];\n",
+              "\n",
+              "  function run_inline_js() {\n",
+              "    \n",
+              "    if (root.Bokeh !== undefined || force === true) {\n",
+              "      \n",
+              "    for (var i = 0; i < inline_js.length; i++) {\n",
+              "      inline_js[i].call(root, root.Bokeh);\n",
+              "    }\n",
+              "    } else if (Date.now() < root._bokeh_timeout) {\n",
+              "      setTimeout(run_inline_js, 100);\n",
+              "    } else if (!root._bokeh_failed_load) {\n",
+              "      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n",
+              "      root._bokeh_failed_load = true;\n",
+              "    } else if (force !== true) {\n",
+              "      var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n",
+              "      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n",
+              "    }\n",
+              "\n",
+              "  }\n",
+              "\n",
+              "  if (root._bokeh_is_loading === 0) {\n",
+              "    console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n",
+              "    run_inline_js();\n",
+              "  } else {\n",
+              "    load_libs(css_urls, js_urls, function() {\n",
+              "      console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n",
+              "      run_inline_js();\n",
+              "    });\n",
+              "  }\n",
+              "}(window));"
+            ],
+            "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n  function now() {\n    return new Date();\n  }\n\n  var force = true;\n\n  if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n    root._bokeh_onload_callbacks = [];\n    root._bokeh_is_loading = undefined;\n  }\n\n  \n\n  \n  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n    root._bokeh_timeout = Date.now() + 5000;\n    root._bokeh_failed_load = false;\n  }\n\n  var NB_LOAD_WARNING = {'data': {'text/html':\n     \"<div style='background-color: #fdd'>\\n\"+\n     \"<p>\\n\"+\n     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n     \"</p>\\n\"+\n     \"<ul>\\n\"+\n     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n     \"</ul>\\n\"+\n     \"<code>\\n\"+\n     \"from bokeh.resources import INLINE\\n\"+\n     \"output_notebook(resources=INLINE)\\n\"+\n     \"</code>\\n\"+\n     \"</div>\"}};\n\n  function display_loaded() {\n    var el = document.getElementById(null);\n    if (el != null) {\n      el.textContent = \"BokehJS is loading...\";\n    }\n    if (root.Bokeh !== undefined) {\n      if (el != null) {\n        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n      }\n    } else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(display_loaded, 100)\n    }\n  }\n\n\n  function run_callbacks() {\n    try {\n      root._bokeh_onload_callbacks.forEach(function(callback) {\n        if (callback != null)\n          callback();\n      });\n    } finally {\n      delete root._bokeh_onload_callbacks\n    }\n    console.debug(\"Bokeh: all callbacks have finished\");\n  }\n\n  function load_libs(css_urls, js_urls, callback) {\n    if (css_urls == null) css_urls = [];\n    if (js_urls == null) js_urls = [];\n\n    root._bokeh_onload_callbacks.push(callback);\n    if (root._bokeh_is_loading > 0) {\n      console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n      return null;\n    }\n    if (js_urls == null || js_urls.length === 0) {\n      run_callbacks();\n      return null;\n    }\n    console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n    root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n    function on_load() {\n      root._bokeh_is_loading--;\n      if (root._bokeh_is_loading === 0) {\n        console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n        run_callbacks()\n      }\n    }\n\n    function on_error() {\n      console.error(\"failed to load \" + url);\n    }\n\n    for (var i = 0; i < css_urls.length; i++) {\n      var url = css_urls[i];\n      const element = document.createElement(\"link\");\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.rel = \"stylesheet\";\n      element.type = \"text/css\";\n      element.href = url;\n      console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n      document.body.appendChild(element);\n    }\n\n    for (var i = 0; i < js_urls.length; i++) {\n      var url = js_urls[i];\n      var element = document.createElement('script');\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.async = false;\n      element.src = url;\n      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n      document.head.appendChild(element);\n    }\n  };\n\n  function inject_raw_css(css) {\n    const element = document.createElement(\"style\");\n    element.appendChild(document.createTextNode(css));\n    document.body.appendChild(element);\n  }\n\n  \n  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n  var css_urls = [];\n  \n\n  var inline_js = [\n    function(Bokeh) {\n      Bokeh.set_log_level(\"info\");\n    },\n    function(Bokeh) {\n    \n    \n    }\n  ];\n\n  function run_inline_js() {\n    \n    if (root.Bokeh !== undefined || force === true) {\n      \n    for (var i = 0; i < inline_js.length; i++) {\n      inline_js[i].call(root, root.Bokeh);\n    }\n    } else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(run_inline_js, 100);\n    } else if (!root._bokeh_failed_load) {\n      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n      root._bokeh_failed_load = true;\n    } else if (force !== true) {\n      var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n    }\n\n  }\n\n  if (root._bokeh_is_loading === 0) {\n    console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n    run_inline_js();\n  } else {\n    load_libs(css_urls, js_urls, function() {\n      console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n      run_inline_js();\n    });\n  }\n}(window));"
+          },
+          "metadata": {
+            "tags": []
+          },
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "\n",
+              "\n",
+              "\n",
+              "\n",
+              "\n",
+              "\n",
+              "  <div class=\"bk-root\" id=\"5b8e7d08-b7e7-4a05-a22d-c27aa1873e6d\" data-root-id=\"1587\"></div>\n"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/javascript": [
+              "(function(root) {\n",
+              "  function embed_document(root) {\n",
+              "    \n",
+              "  var docs_json = {\"e65d5833-780d-410f-a7be-915f30e03b59\":{\"roots\":{\"references\":[{\"attributes\":{\"above\":[{\"id\":\"1598\",\"type\":\"CategoricalAxis\"}],\"center\":[{\"id\":\"1601\",\"type\":\"Grid\"},{\"id\":\"1605\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1602\",\"type\":\"CategoricalAxis\"}],\"min_border_right\":300,\"plot_width\":1200,\"renderers\":[{\"id\":\"1615\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1588\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1608\",\"type\":\"Toolbar\"},\"toolbar_location\":\"below\",\"x_range\":{\"id\":\"1590\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"1594\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"1592\",\"type\":\"FactorRange\"},\"y_scale\":{\"id\":\"1596\",\"type\":\"CategoricalScale\"}},\"id\":\"1587\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{},\"id\":\"1664\",\"type\":\"Selection\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1603\",\"type\":\"CategoricalTicker\"}},\"id\":\"1605\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"1665\",\"type\":\"UnionRenderers\"},{\"attributes\":{},\"id\":\"1603\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1660\",\"type\":\"CategoricalTickFormatter\"},\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1603\",\"type\":\"CategoricalTicker\"}},\"id\":\"1602\",\"type\":\"CategoricalAxis\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1662\",\"type\":\"CategoricalTickFormatter\"},\"major_label_orientation\":0.7853981633974483,\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1599\",\"type\":\"CategoricalTicker\"}},\"id\":\"1598\",\"type\":\"CategoricalAxis\"},{\"attributes\":{\"callback\":null,\"tooltips\":[[\"pair\",\"@embeddings_1 ||| @embeddings_2\"],[\"sim\",\"@sim\"]]},\"id\":\"1607\",\"type\":\"HoverTool\"},{\"attributes\":{},\"id\":\"1599\",\"type\":\"CategoricalTicker\"},{\"attributes\":{},\"id\":\"1596\",\"type\":\"CategoricalScale\"},{\"attributes\":{},\"id\":\"1594\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"callback\":null,\"factors\":[\"\\u6211\\u559c\\u6b22\\u548c\\u6211\\u7684\\u72d7\\u4e00\\u8d77\\u6cbf\\u7740\\u6d77\\u6ee9\\u6563\\u6b65\\u3002\",\"\\u5c0f\\u72d7\\u5f88\\u597d\\u3002\",\"\\u72d7\"]},\"id\":\"1592\",\"type\":\"FactorRange\"},{\"attributes\":{\"callback\":null,\"factors\":[\"dog\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\"]},\"id\":\"1590\",\"type\":\"FactorRange\"},{\"attributes\":{\"callback\":null,\"data\":{\"embeddings_1\":[\"dog\",\"dog\",\"dog\",\"Puppies are nice.\",\"Puppies are nice.\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\"],\"embeddings_2\":[\"\\u72d7\",\"\\u5c0f\\u72d7\\u5f88\\u597d\\u3002\",\"\\u6211\\u559c\\u6b22\\u548c\\u6211\\u7684\\u72d7\\u4e00\\u8d77\\u6cbf\\u7740\\u6d77\\u6ee9\\u6563\\u6b65\\u3002\",\"\\u72d7\",\"\\u5c0f\\u72d7\\u5f88\\u597d\\u3002\",\"\\u6211\\u559c\\u6b22\\u548c\\u6211\\u7684\\u72d7\\u4e00\\u8d77\\u6cbf\\u7740\\u6d77\\u6ee9\\u6563\\u6b65\\u3002\",\"\\u72d7\",\"\\u5c0f\\u72d7\\u5f88\\u597d\\u3002\",\"\\u6211\\u559c\\u6b22\\u548c\\u6211\\u7684\\u72d7\\u4e00\\u8d77\\u6cbf\\u7740\\u6d77\\u6ee9\\u6563\\u6b65\\u3002\"],\"index\":[0,1,2,3,4,5,6,7,8],\"sim\":{\"__ndarray__\":\"AAAAAF0H7D8AAAAgK6rlPwAAACCmLeM/AAAAgLC95D8AAABAJcfnPwAAAMDQZOM/AAAAAFSj4j8AAACg4TfjPwAAAIBszug/\",\"dtype\":\"float64\",\"shape\":[9]}},\"selected\":{\"id\":\"1664\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1665\",\"type\":\"UnionRenderers\"}},\"id\":\"1611\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1606\",\"type\":\"SaveTool\"},{\"id\":\"1607\",\"type\":\"HoverTool\"}]},\"id\":\"1608\",\"type\":\"Toolbar\"},{\"attributes\":{\"text\":\"English-Chinese Similarity\",\"text_font_size\":{\"value\":\"12pt\"}},\"id\":\"1588\",\"type\":\"Title\"},{\"attributes\":{},\"id\":\"1662\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"high\":0.8758988380432129,\"low\":0.5824375152587891,\"palette\":[\"#ffffcc\",\"#ffeda0\",\"#fed976\",\"#feb24c\",\"#fd8d3c\",\"#fc4e2a\",\"#e31a1c\",\"#bd0026\",\"#800026\"]},\"id\":\"1586\",\"type\":\"LinearColorMapper\"},{\"attributes\":{\"source\":{\"id\":\"1611\",\"type\":\"ColumnDataSource\"}},\"id\":\"1616\",\"type\":\"CDSView\"},{\"attributes\":{},\"id\":\"1660\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"data_source\":{\"id\":\"1611\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1613\",\"type\":\"Rect\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1614\",\"type\":\"Rect\"},\"selection_glyph\":null,\"view\":{\"id\":\"1616\",\"type\":\"CDSView\"}},\"id\":\"1615\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"fill_color\":{\"field\":\"sim\",\"transform\":{\"id\":\"1586\",\"type\":\"LinearColorMapper\"}},\"height\":{\"units\":\"data\",\"value\":1},\"line_color\":{\"value\":null},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1613\",\"type\":\"Rect\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"height\":{\"units\":\"data\",\"value\":1},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1614\",\"type\":\"Rect\"},{\"attributes\":{},\"id\":\"1606\",\"type\":\"SaveTool\"},{\"attributes\":{\"ticker\":{\"id\":\"1599\",\"type\":\"CategoricalTicker\"}},\"id\":\"1601\",\"type\":\"Grid\"}],\"root_ids\":[\"1587\"]},\"title\":\"Bokeh Application\",\"version\":\"1.4.0\"}};\n",
+              "  var render_items = [{\"docid\":\"e65d5833-780d-410f-a7be-915f30e03b59\",\"roots\":{\"1587\":\"5b8e7d08-b7e7-4a05-a22d-c27aa1873e6d\"}}];\n",
+              "  root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n",
+              "\n",
+              "  }\n",
+              "  if (root.Bokeh !== undefined) {\n",
+              "    embed_document(root);\n",
+              "  } else {\n",
+              "    var attempts = 0;\n",
+              "    var timer = setInterval(function(root) {\n",
+              "      if (root.Bokeh !== undefined) {\n",
+              "        clearInterval(timer);\n",
+              "        embed_document(root);\n",
+              "      } else {\n",
+              "        attempts++;\n",
+              "        if (attempts > 100) {\n",
+              "          clearInterval(timer);\n",
+              "          console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n",
+              "        }\n",
+              "      }\n",
+              "    }, 10, root)\n",
+              "  }\n",
+              "})(window);"
+            ],
+            "application/vnd.bokehjs_exec.v0+json": ""
+          },
+          "metadata": {
+            "application/vnd.bokehjs_exec.v0+json": {
+              "id": "1587"
+            },
+            "tags": []
+          },
+          "output_type": "display_data"
+        }
+      ],
+      "source": [
+        "visualize_similarity(en_result, zh_result, english_sentences, chinese_sentences, 'English-Chinese Similarity')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8zV1BJc3mL3W"
+      },
+      "source": [
+        "### English-Korean Similarity"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "iqWy1e1UmQeX"
+      },
+      "outputs": [
+        {
+          "data": {
+            "application/javascript": [
+              "\n",
+              "(function(root) {\n",
+              "  function now() {\n",
+              "    return new Date();\n",
+              "  }\n",
+              "\n",
+              "  var force = true;\n",
+              "\n",
+              "  if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n",
+              "    root._bokeh_onload_callbacks = [];\n",
+              "    root._bokeh_is_loading = undefined;\n",
+              "  }\n",
+              "\n",
+              "  var JS_MIME_TYPE = 'application/javascript';\n",
+              "  var HTML_MIME_TYPE = 'text/html';\n",
+              "  var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n",
+              "  var CLASS_NAME = 'output_bokeh rendered_html';\n",
+              "\n",
+              "  /**\n",
+              "   * Render data to the DOM node\n",
+              "   */\n",
+              "  function render(props, node) {\n",
+              "    var script = document.createElement(\"script\");\n",
+              "    node.appendChild(script);\n",
+              "  }\n",
+              "\n",
+              "  /**\n",
+              "   * Handle when an output is cleared or removed\n",
+              "   */\n",
+              "  function handleClearOutput(event, handle) {\n",
+              "    var cell = handle.cell;\n",
+              "\n",
+              "    var id = cell.output_area._bokeh_element_id;\n",
+              "    var server_id = cell.output_area._bokeh_server_id;\n",
+              "    // Clean up Bokeh references\n",
+              "    if (id != null && id in Bokeh.index) {\n",
+              "      Bokeh.index[id].model.document.clear();\n",
+              "      delete Bokeh.index[id];\n",
+              "    }\n",
+              "\n",
+              "    if (server_id !== undefined) {\n",
+              "      // Clean up Bokeh references\n",
+              "      var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n",
+              "      cell.notebook.kernel.execute(cmd, {\n",
+              "        iopub: {\n",
+              "          output: function(msg) {\n",
+              "            var id = msg.content.text.trim();\n",
+              "            if (id in Bokeh.index) {\n",
+              "              Bokeh.index[id].model.document.clear();\n",
+              "              delete Bokeh.index[id];\n",
+              "            }\n",
+              "          }\n",
+              "        }\n",
+              "      });\n",
+              "      // Destroy server and session\n",
+              "      var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n",
+              "      cell.notebook.kernel.execute(cmd);\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "  /**\n",
+              "   * Handle when a new output is added\n",
+              "   */\n",
+              "  function handleAddOutput(event, handle) {\n",
+              "    var output_area = handle.output_area;\n",
+              "    var output = handle.output;\n",
+              "\n",
+              "    // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n",
+              "    if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n",
+              "      return\n",
+              "    }\n",
+              "\n",
+              "    var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n",
+              "\n",
+              "    if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n",
+              "      toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n",
+              "      // store reference to embed id on output_area\n",
+              "      output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n",
+              "    }\n",
+              "    if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n",
+              "      var bk_div = document.createElement(\"div\");\n",
+              "      bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n",
+              "      var script_attrs = bk_div.children[0].attributes;\n",
+              "      for (var i = 0; i < script_attrs.length; i++) {\n",
+              "        toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n",
+              "      }\n",
+              "      // store reference to server id on output_area\n",
+              "      output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "  function register_renderer(events, OutputArea) {\n",
+              "\n",
+              "    function append_mime(data, metadata, element) {\n",
+              "      // create a DOM node to render to\n",
+              "      var toinsert = this.create_output_subarea(\n",
+              "        metadata,\n",
+              "        CLASS_NAME,\n",
+              "        EXEC_MIME_TYPE\n",
+              "      );\n",
+              "      this.keyboard_manager.register_events(toinsert);\n",
+              "      // Render to node\n",
+              "      var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n",
+              "      render(props, toinsert[toinsert.length - 1]);\n",
+              "      element.append(toinsert);\n",
+              "      return toinsert\n",
+              "    }\n",
+              "\n",
+              "    /* Handle when an output is cleared or removed */\n",
+              "    events.on('clear_output.CodeCell', handleClearOutput);\n",
+              "    events.on('delete.Cell', handleClearOutput);\n",
+              "\n",
+              "    /* Handle when a new output is added */\n",
+              "    events.on('output_added.OutputArea', handleAddOutput);\n",
+              "\n",
+              "    /**\n",
+              "     * Register the mime type and append_mime function with output_area\n",
+              "     */\n",
+              "    OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n",
+              "      /* Is output safe? */\n",
+              "      safe: true,\n",
+              "      /* Index of renderer in `output_area.display_order` */\n",
+              "      index: 0\n",
+              "    });\n",
+              "  }\n",
+              "\n",
+              "  // register the mime type if in Jupyter Notebook environment and previously unregistered\n",
+              "  if (root.Jupyter !== undefined) {\n",
+              "    var events = require('base/js/events');\n",
+              "    var OutputArea = require('notebook/js/outputarea').OutputArea;\n",
+              "\n",
+              "    if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n",
+              "      register_renderer(events, OutputArea);\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "  \n",
+              "  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n",
+              "    root._bokeh_timeout = Date.now() + 5000;\n",
+              "    root._bokeh_failed_load = false;\n",
+              "  }\n",
+              "\n",
+              "  var NB_LOAD_WARNING = {'data': {'text/html':\n",
+              "     \"<div style='background-color: #fdd'>\\n\"+\n",
+              "     \"<p>\\n\"+\n",
+              "     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n",
+              "     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n",
+              "     \"</p>\\n\"+\n",
+              "     \"<ul>\\n\"+\n",
+              "     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n",
+              "     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n",
+              "     \"</ul>\\n\"+\n",
+              "     \"<code>\\n\"+\n",
+              "     \"from bokeh.resources import INLINE\\n\"+\n",
+              "     \"output_notebook(resources=INLINE)\\n\"+\n",
+              "     \"</code>\\n\"+\n",
+              "     \"</div>\"}};\n",
+              "\n",
+              "  function display_loaded() {\n",
+              "    var el = document.getElementById(null);\n",
+              "    if (el != null) {\n",
+              "      el.textContent = \"BokehJS is loading...\";\n",
+              "    }\n",
+              "    if (root.Bokeh !== undefined) {\n",
+              "      if (el != null) {\n",
+              "        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n",
+              "      }\n",
+              "    } else if (Date.now() < root._bokeh_timeout) {\n",
+              "      setTimeout(display_loaded, 100)\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "\n",
+              "  function run_callbacks() {\n",
+              "    try {\n",
+              "      root._bokeh_onload_callbacks.forEach(function(callback) {\n",
+              "        if (callback != null)\n",
+              "          callback();\n",
+              "      });\n",
+              "    } finally {\n",
+              "      delete root._bokeh_onload_callbacks\n",
+              "    }\n",
+              "    console.debug(\"Bokeh: all callbacks have finished\");\n",
+              "  }\n",
+              "\n",
+              "  function load_libs(css_urls, js_urls, callback) {\n",
+              "    if (css_urls == null) css_urls = [];\n",
+              "    if (js_urls == null) js_urls = [];\n",
+              "\n",
+              "    root._bokeh_onload_callbacks.push(callback);\n",
+              "    if (root._bokeh_is_loading > 0) {\n",
+              "      console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n",
+              "      return null;\n",
+              "    }\n",
+              "    if (js_urls == null || js_urls.length === 0) {\n",
+              "      run_callbacks();\n",
+              "      return null;\n",
+              "    }\n",
+              "    console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n",
+              "    root._bokeh_is_loading = css_urls.length + js_urls.length;\n",
+              "\n",
+              "    function on_load() {\n",
+              "      root._bokeh_is_loading--;\n",
+              "      if (root._bokeh_is_loading === 0) {\n",
+              "        console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n",
+              "        run_callbacks()\n",
+              "      }\n",
+              "    }\n",
+              "\n",
+              "    function on_error() {\n",
+              "      console.error(\"failed to load \" + url);\n",
+              "    }\n",
+              "\n",
+              "    for (var i = 0; i < css_urls.length; i++) {\n",
+              "      var url = css_urls[i];\n",
+              "      const element = document.createElement(\"link\");\n",
+              "      element.onload = on_load;\n",
+              "      element.onerror = on_error;\n",
+              "      element.rel = \"stylesheet\";\n",
+              "      element.type = \"text/css\";\n",
+              "      element.href = url;\n",
+              "      console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n",
+              "      document.body.appendChild(element);\n",
+              "    }\n",
+              "\n",
+              "    for (var i = 0; i < js_urls.length; i++) {\n",
+              "      var url = js_urls[i];\n",
+              "      var element = document.createElement('script');\n",
+              "      element.onload = on_load;\n",
+              "      element.onerror = on_error;\n",
+              "      element.async = false;\n",
+              "      element.src = url;\n",
+              "      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
+              "      document.head.appendChild(element);\n",
+              "    }\n",
+              "  };\n",
+              "\n",
+              "  function inject_raw_css(css) {\n",
+              "    const element = document.createElement(\"style\");\n",
+              "    element.appendChild(document.createTextNode(css));\n",
+              "    document.body.appendChild(element);\n",
+              "  }\n",
+              "\n",
+              "  \n",
+              "  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n",
+              "  var css_urls = [];\n",
+              "  \n",
+              "\n",
+              "  var inline_js = [\n",
+              "    function(Bokeh) {\n",
+              "      Bokeh.set_log_level(\"info\");\n",
+              "    },\n",
+              "    function(Bokeh) {\n",
+              "    \n",
+              "    \n",
+              "    }\n",
+              "  ];\n",
+              "\n",
+              "  function run_inline_js() {\n",
+              "    \n",
+              "    if (root.Bokeh !== undefined || force === true) {\n",
+              "      \n",
+              "    for (var i = 0; i < inline_js.length; i++) {\n",
+              "      inline_js[i].call(root, root.Bokeh);\n",
+              "    }\n",
+              "    } else if (Date.now() < root._bokeh_timeout) {\n",
+              "      setTimeout(run_inline_js, 100);\n",
+              "    } else if (!root._bokeh_failed_load) {\n",
+              "      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n",
+              "      root._bokeh_failed_load = true;\n",
+              "    } else if (force !== true) {\n",
+              "      var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n",
+              "      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n",
+              "    }\n",
+              "\n",
+              "  }\n",
+              "\n",
+              "  if (root._bokeh_is_loading === 0) {\n",
+              "    console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n",
+              "    run_inline_js();\n",
+              "  } else {\n",
+              "    load_libs(css_urls, js_urls, function() {\n",
+              "      console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n",
+              "      run_inline_js();\n",
+              "    });\n",
+              "  }\n",
+              "}(window));"
+            ],
+            "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n  function now() {\n    return new Date();\n  }\n\n  var force = true;\n\n  if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n    root._bokeh_onload_callbacks = [];\n    root._bokeh_is_loading = undefined;\n  }\n\n  \n\n  \n  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n    root._bokeh_timeout = Date.now() + 5000;\n    root._bokeh_failed_load = false;\n  }\n\n  var NB_LOAD_WARNING = {'data': {'text/html':\n     \"<div style='background-color: #fdd'>\\n\"+\n     \"<p>\\n\"+\n     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n     \"</p>\\n\"+\n     \"<ul>\\n\"+\n     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n     \"</ul>\\n\"+\n     \"<code>\\n\"+\n     \"from bokeh.resources import INLINE\\n\"+\n     \"output_notebook(resources=INLINE)\\n\"+\n     \"</code>\\n\"+\n     \"</div>\"}};\n\n  function display_loaded() {\n    var el = document.getElementById(null);\n    if (el != null) {\n      el.textContent = \"BokehJS is loading...\";\n    }\n    if (root.Bokeh !== undefined) {\n      if (el != null) {\n        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n      }\n    } else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(display_loaded, 100)\n    }\n  }\n\n\n  function run_callbacks() {\n    try {\n      root._bokeh_onload_callbacks.forEach(function(callback) {\n        if (callback != null)\n          callback();\n      });\n    } finally {\n      delete root._bokeh_onload_callbacks\n    }\n    console.debug(\"Bokeh: all callbacks have finished\");\n  }\n\n  function load_libs(css_urls, js_urls, callback) {\n    if (css_urls == null) css_urls = [];\n    if (js_urls == null) js_urls = [];\n\n    root._bokeh_onload_callbacks.push(callback);\n    if (root._bokeh_is_loading > 0) {\n      console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n      return null;\n    }\n    if (js_urls == null || js_urls.length === 0) {\n      run_callbacks();\n      return null;\n    }\n    console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n    root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n    function on_load() {\n      root._bokeh_is_loading--;\n      if (root._bokeh_is_loading === 0) {\n        console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n        run_callbacks()\n      }\n    }\n\n    function on_error() {\n      console.error(\"failed to load \" + url);\n    }\n\n    for (var i = 0; i < css_urls.length; i++) {\n      var url = css_urls[i];\n      const element = document.createElement(\"link\");\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.rel = \"stylesheet\";\n      element.type = \"text/css\";\n      element.href = url;\n      console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n      document.body.appendChild(element);\n    }\n\n    for (var i = 0; i < js_urls.length; i++) {\n      var url = js_urls[i];\n      var element = document.createElement('script');\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.async = false;\n      element.src = url;\n      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n      document.head.appendChild(element);\n    }\n  };\n\n  function inject_raw_css(css) {\n    const element = document.createElement(\"style\");\n    element.appendChild(document.createTextNode(css));\n    document.body.appendChild(element);\n  }\n\n  \n  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n  var css_urls = [];\n  \n\n  var inline_js = [\n    function(Bokeh) {\n      Bokeh.set_log_level(\"info\");\n    },\n    function(Bokeh) {\n    \n    \n    }\n  ];\n\n  function run_inline_js() {\n    \n    if (root.Bokeh !== undefined || force === true) {\n      \n    for (var i = 0; i < inline_js.length; i++) {\n      inline_js[i].call(root, root.Bokeh);\n    }\n    } else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(run_inline_js, 100);\n    } else if (!root._bokeh_failed_load) {\n      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n      root._bokeh_failed_load = true;\n    } else if (force !== true) {\n      var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n    }\n\n  }\n\n  if (root._bokeh_is_loading === 0) {\n    console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n    run_inline_js();\n  } else {\n    load_libs(css_urls, js_urls, function() {\n      console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n      run_inline_js();\n    });\n  }\n}(window));"
+          },
+          "metadata": {
+            "tags": []
+          },
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "\n",
+              "\n",
+              "\n",
+              "\n",
+              "\n",
+              "\n",
+              "  <div class=\"bk-root\" id=\"7b449243-0dbd-46b6-8b02-a89fdf92645e\" data-root-id=\"1709\"></div>\n"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/javascript": [
+              "(function(root) {\n",
+              "  function embed_document(root) {\n",
+              "    \n",
+              "  var docs_json = {\"1bbe715c-608d-49a6-8927-e818fa752480\":{\"roots\":{\"references\":[{\"attributes\":{\"above\":[{\"id\":\"1720\",\"type\":\"CategoricalAxis\"}],\"center\":[{\"id\":\"1723\",\"type\":\"Grid\"},{\"id\":\"1727\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1724\",\"type\":\"CategoricalAxis\"}],\"min_border_right\":300,\"plot_width\":1200,\"renderers\":[{\"id\":\"1737\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1710\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1730\",\"type\":\"Toolbar\"},\"toolbar_location\":\"below\",\"x_range\":{\"id\":\"1712\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"1716\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"1714\",\"type\":\"FactorRange\"},\"y_scale\":{\"id\":\"1718\",\"type\":\"CategoricalScale\"}},\"id\":\"1709\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{},\"id\":\"1794\",\"type\":\"UnionRenderers\"},{\"attributes\":{},\"id\":\"1725\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1789\",\"type\":\"CategoricalTickFormatter\"},\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1725\",\"type\":\"CategoricalTicker\"}},\"id\":\"1724\",\"type\":\"CategoricalAxis\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1725\",\"type\":\"CategoricalTicker\"}},\"id\":\"1727\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"1721\",\"type\":\"CategoricalTicker\"},{\"attributes\":{},\"id\":\"1728\",\"type\":\"SaveTool\"},{\"attributes\":{\"callback\":null,\"factors\":[\"dog\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\"]},\"id\":\"1712\",\"type\":\"FactorRange\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1728\",\"type\":\"SaveTool\"},{\"id\":\"1729\",\"type\":\"HoverTool\"}]},\"id\":\"1730\",\"type\":\"Toolbar\"},{\"attributes\":{},\"id\":\"1718\",\"type\":\"CategoricalScale\"},{\"attributes\":{},\"id\":\"1793\",\"type\":\"Selection\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1791\",\"type\":\"CategoricalTickFormatter\"},\"major_label_orientation\":0.7853981633974483,\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1721\",\"type\":\"CategoricalTicker\"}},\"id\":\"1720\",\"type\":\"CategoricalAxis\"},{\"attributes\":{\"callback\":null,\"tooltips\":[[\"pair\",\"@embeddings_1 ||| @embeddings_2\"],[\"sim\",\"@sim\"]]},\"id\":\"1729\",\"type\":\"HoverTool\"},{\"attributes\":{},\"id\":\"1716\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"fill_color\":{\"field\":\"sim\",\"transform\":{\"id\":\"1708\",\"type\":\"LinearColorMapper\"}},\"height\":{\"units\":\"data\",\"value\":1},\"line_color\":{\"value\":null},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1735\",\"type\":\"Rect\"},{\"attributes\":{\"callback\":null,\"factors\":[\"\\ub098\\ub294 \\ub098\\uc758 \\uc0b0\\ucc45\\uc744 \\ud574\\ubcc0\\uc744 \\ub530\\ub77c \\uae38\\uac8c \\uc0b0\\ucc45\\ud558\\ub294 \\uac83\\uc744 \\uc990\\uae34\\ub2e4.\",\"\\uac15\\uc544\\uc9c0\\uac00 \\uc88b\\ub2e4.\",\"\\uac1c\"]},\"id\":\"1714\",\"type\":\"FactorRange\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"height\":{\"units\":\"data\",\"value\":1},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1736\",\"type\":\"Rect\"},{\"attributes\":{\"source\":{\"id\":\"1733\",\"type\":\"ColumnDataSource\"}},\"id\":\"1738\",\"type\":\"CDSView\"},{\"attributes\":{\"data_source\":{\"id\":\"1733\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1735\",\"type\":\"Rect\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1736\",\"type\":\"Rect\"},\"selection_glyph\":null,\"view\":{\"id\":\"1738\",\"type\":\"CDSView\"}},\"id\":\"1737\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"text\":\"English-Korean Similarity\",\"text_font_size\":{\"value\":\"12pt\"}},\"id\":\"1710\",\"type\":\"Title\"},{\"attributes\":{\"callback\":null,\"data\":{\"embeddings_1\":[\"dog\",\"dog\",\"dog\",\"Puppies are nice.\",\"Puppies are nice.\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\"],\"embeddings_2\":[\"\\uac1c\",\"\\uac15\\uc544\\uc9c0\\uac00 \\uc88b\\ub2e4.\",\"\\ub098\\ub294 \\ub098\\uc758 \\uc0b0\\ucc45\\uc744 \\ud574\\ubcc0\\uc744 \\ub530\\ub77c \\uae38\\uac8c \\uc0b0\\ucc45\\ud558\\ub294 \\uac83\\uc744 \\uc990\\uae34\\ub2e4.\",\"\\uac1c\",\"\\uac15\\uc544\\uc9c0\\uac00 \\uc88b\\ub2e4.\",\"\\ub098\\ub294 \\ub098\\uc758 \\uc0b0\\ucc45\\uc744 \\ud574\\ubcc0\\uc744 \\ub530\\ub77c \\uae38\\uac8c \\uc0b0\\ucc45\\ud558\\ub294 \\uac83\\uc744 \\uc990\\uae34\\ub2e4.\",\"\\uac1c\",\"\\uac15\\uc544\\uc9c0\\uac00 \\uc88b\\ub2e4.\",\"\\ub098\\ub294 \\ub098\\uc758 \\uc0b0\\ucc45\\uc744 \\ud574\\ubcc0\\uc744 \\ub530\\ub77c \\uae38\\uac8c \\uc0b0\\ucc45\\ud558\\ub294 \\uac83\\uc744 \\uc990\\uae34\\ub2e4.\"],\"index\":[0,1,2,3,4,5,6,7,8],\"sim\":{\"__ndarray__\":\"AAAAgICf6z8AAAAAKEjlPwAAAKBNk+A/AAAAwFZl5D8AAAAAUpnnPwAAAID69uA/AAAAQJua4j8AAADgCQ3jPwAAAIC9gOg/\",\"dtype\":\"float64\",\"shape\":[9]}},\"selected\":{\"id\":\"1793\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1794\",\"type\":\"UnionRenderers\"}},\"id\":\"1733\",\"type\":\"ColumnDataSource\"},{\"attributes\":{},\"id\":\"1789\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{},\"id\":\"1791\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"ticker\":{\"id\":\"1721\",\"type\":\"CategoricalTicker\"}},\"id\":\"1723\",\"type\":\"Grid\"},{\"attributes\":{\"high\":0.8632204532623291,\"low\":0.5179813504219055,\"palette\":[\"#ffffcc\",\"#ffeda0\",\"#fed976\",\"#feb24c\",\"#fd8d3c\",\"#fc4e2a\",\"#e31a1c\",\"#bd0026\",\"#800026\"]},\"id\":\"1708\",\"type\":\"LinearColorMapper\"}],\"root_ids\":[\"1709\"]},\"title\":\"Bokeh Application\",\"version\":\"1.4.0\"}};\n",
+              "  var render_items = [{\"docid\":\"1bbe715c-608d-49a6-8927-e818fa752480\",\"roots\":{\"1709\":\"7b449243-0dbd-46b6-8b02-a89fdf92645e\"}}];\n",
+              "  root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n",
+              "\n",
+              "  }\n",
+              "  if (root.Bokeh !== undefined) {\n",
+              "    embed_document(root);\n",
+              "  } else {\n",
+              "    var attempts = 0;\n",
+              "    var timer = setInterval(function(root) {\n",
+              "      if (root.Bokeh !== undefined) {\n",
+              "        clearInterval(timer);\n",
+              "        embed_document(root);\n",
+              "      } else {\n",
+              "        attempts++;\n",
+              "        if (attempts > 100) {\n",
+              "          clearInterval(timer);\n",
+              "          console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n",
+              "        }\n",
+              "      }\n",
+              "    }, 10, root)\n",
+              "  }\n",
+              "})(window);"
+            ],
+            "application/vnd.bokehjs_exec.v0+json": ""
+          },
+          "metadata": {
+            "application/vnd.bokehjs_exec.v0+json": {
+              "id": "1709"
+            },
+            "tags": []
+          },
+          "output_type": "display_data"
+        }
+      ],
+      "source": [
+        "visualize_similarity(en_result, ko_result, english_sentences, korean_sentences, 'English-Korean Similarity')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dfTj-JaunFTv"
+      },
+      "source": [
+        "### Chinese-Korean Similarity"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "MndSgKGPnJuF"
+      },
+      "outputs": [
+        {
+          "data": {
+            "application/javascript": [
+              "\n",
+              "(function(root) {\n",
+              "  function now() {\n",
+              "    return new Date();\n",
+              "  }\n",
+              "\n",
+              "  var force = true;\n",
+              "\n",
+              "  if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n",
+              "    root._bokeh_onload_callbacks = [];\n",
+              "    root._bokeh_is_loading = undefined;\n",
+              "  }\n",
+              "\n",
+              "  var JS_MIME_TYPE = 'application/javascript';\n",
+              "  var HTML_MIME_TYPE = 'text/html';\n",
+              "  var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n",
+              "  var CLASS_NAME = 'output_bokeh rendered_html';\n",
+              "\n",
+              "  /**\n",
+              "   * Render data to the DOM node\n",
+              "   */\n",
+              "  function render(props, node) {\n",
+              "    var script = document.createElement(\"script\");\n",
+              "    node.appendChild(script);\n",
+              "  }\n",
+              "\n",
+              "  /**\n",
+              "   * Handle when an output is cleared or removed\n",
+              "   */\n",
+              "  function handleClearOutput(event, handle) {\n",
+              "    var cell = handle.cell;\n",
+              "\n",
+              "    var id = cell.output_area._bokeh_element_id;\n",
+              "    var server_id = cell.output_area._bokeh_server_id;\n",
+              "    // Clean up Bokeh references\n",
+              "    if (id != null && id in Bokeh.index) {\n",
+              "      Bokeh.index[id].model.document.clear();\n",
+              "      delete Bokeh.index[id];\n",
+              "    }\n",
+              "\n",
+              "    if (server_id !== undefined) {\n",
+              "      // Clean up Bokeh references\n",
+              "      var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n",
+              "      cell.notebook.kernel.execute(cmd, {\n",
+              "        iopub: {\n",
+              "          output: function(msg) {\n",
+              "            var id = msg.content.text.trim();\n",
+              "            if (id in Bokeh.index) {\n",
+              "              Bokeh.index[id].model.document.clear();\n",
+              "              delete Bokeh.index[id];\n",
+              "            }\n",
+              "          }\n",
+              "        }\n",
+              "      });\n",
+              "      // Destroy server and session\n",
+              "      var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n",
+              "      cell.notebook.kernel.execute(cmd);\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "  /**\n",
+              "   * Handle when a new output is added\n",
+              "   */\n",
+              "  function handleAddOutput(event, handle) {\n",
+              "    var output_area = handle.output_area;\n",
+              "    var output = handle.output;\n",
+              "\n",
+              "    // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n",
+              "    if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n",
+              "      return\n",
+              "    }\n",
+              "\n",
+              "    var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n",
+              "\n",
+              "    if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n",
+              "      toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n",
+              "      // store reference to embed id on output_area\n",
+              "      output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n",
+              "    }\n",
+              "    if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n",
+              "      var bk_div = document.createElement(\"div\");\n",
+              "      bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n",
+              "      var script_attrs = bk_div.children[0].attributes;\n",
+              "      for (var i = 0; i < script_attrs.length; i++) {\n",
+              "        toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n",
+              "      }\n",
+              "      // store reference to server id on output_area\n",
+              "      output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "  function register_renderer(events, OutputArea) {\n",
+              "\n",
+              "    function append_mime(data, metadata, element) {\n",
+              "      // create a DOM node to render to\n",
+              "      var toinsert = this.create_output_subarea(\n",
+              "        metadata,\n",
+              "        CLASS_NAME,\n",
+              "        EXEC_MIME_TYPE\n",
+              "      );\n",
+              "      this.keyboard_manager.register_events(toinsert);\n",
+              "      // Render to node\n",
+              "      var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n",
+              "      render(props, toinsert[toinsert.length - 1]);\n",
+              "      element.append(toinsert);\n",
+              "      return toinsert\n",
+              "    }\n",
+              "\n",
+              "    /* Handle when an output is cleared or removed */\n",
+              "    events.on('clear_output.CodeCell', handleClearOutput);\n",
+              "    events.on('delete.Cell', handleClearOutput);\n",
+              "\n",
+              "    /* Handle when a new output is added */\n",
+              "    events.on('output_added.OutputArea', handleAddOutput);\n",
+              "\n",
+              "    /**\n",
+              "     * Register the mime type and append_mime function with output_area\n",
+              "     */\n",
+              "    OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n",
+              "      /* Is output safe? */\n",
+              "      safe: true,\n",
+              "      /* Index of renderer in `output_area.display_order` */\n",
+              "      index: 0\n",
+              "    });\n",
+              "  }\n",
+              "\n",
+              "  // register the mime type if in Jupyter Notebook environment and previously unregistered\n",
+              "  if (root.Jupyter !== undefined) {\n",
+              "    var events = require('base/js/events');\n",
+              "    var OutputArea = require('notebook/js/outputarea').OutputArea;\n",
+              "\n",
+              "    if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n",
+              "      register_renderer(events, OutputArea);\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "  \n",
+              "  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n",
+              "    root._bokeh_timeout = Date.now() + 5000;\n",
+              "    root._bokeh_failed_load = false;\n",
+              "  }\n",
+              "\n",
+              "  var NB_LOAD_WARNING = {'data': {'text/html':\n",
+              "     \"<div style='background-color: #fdd'>\\n\"+\n",
+              "     \"<p>\\n\"+\n",
+              "     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n",
+              "     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n",
+              "     \"</p>\\n\"+\n",
+              "     \"<ul>\\n\"+\n",
+              "     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n",
+              "     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n",
+              "     \"</ul>\\n\"+\n",
+              "     \"<code>\\n\"+\n",
+              "     \"from bokeh.resources import INLINE\\n\"+\n",
+              "     \"output_notebook(resources=INLINE)\\n\"+\n",
+              "     \"</code>\\n\"+\n",
+              "     \"</div>\"}};\n",
+              "\n",
+              "  function display_loaded() {\n",
+              "    var el = document.getElementById(null);\n",
+              "    if (el != null) {\n",
+              "      el.textContent = \"BokehJS is loading...\";\n",
+              "    }\n",
+              "    if (root.Bokeh !== undefined) {\n",
+              "      if (el != null) {\n",
+              "        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n",
+              "      }\n",
+              "    } else if (Date.now() < root._bokeh_timeout) {\n",
+              "      setTimeout(display_loaded, 100)\n",
+              "    }\n",
+              "  }\n",
+              "\n",
+              "\n",
+              "  function run_callbacks() {\n",
+              "    try {\n",
+              "      root._bokeh_onload_callbacks.forEach(function(callback) {\n",
+              "        if (callback != null)\n",
+              "          callback();\n",
+              "      });\n",
+              "    } finally {\n",
+              "      delete root._bokeh_onload_callbacks\n",
+              "    }\n",
+              "    console.debug(\"Bokeh: all callbacks have finished\");\n",
+              "  }\n",
+              "\n",
+              "  function load_libs(css_urls, js_urls, callback) {\n",
+              "    if (css_urls == null) css_urls = [];\n",
+              "    if (js_urls == null) js_urls = [];\n",
+              "\n",
+              "    root._bokeh_onload_callbacks.push(callback);\n",
+              "    if (root._bokeh_is_loading > 0) {\n",
+              "      console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n",
+              "      return null;\n",
+              "    }\n",
+              "    if (js_urls == null || js_urls.length === 0) {\n",
+              "      run_callbacks();\n",
+              "      return null;\n",
+              "    }\n",
+              "    console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n",
+              "    root._bokeh_is_loading = css_urls.length + js_urls.length;\n",
+              "\n",
+              "    function on_load() {\n",
+              "      root._bokeh_is_loading--;\n",
+              "      if (root._bokeh_is_loading === 0) {\n",
+              "        console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n",
+              "        run_callbacks()\n",
+              "      }\n",
+              "    }\n",
+              "\n",
+              "    function on_error() {\n",
+              "      console.error(\"failed to load \" + url);\n",
+              "    }\n",
+              "\n",
+              "    for (var i = 0; i < css_urls.length; i++) {\n",
+              "      var url = css_urls[i];\n",
+              "      const element = document.createElement(\"link\");\n",
+              "      element.onload = on_load;\n",
+              "      element.onerror = on_error;\n",
+              "      element.rel = \"stylesheet\";\n",
+              "      element.type = \"text/css\";\n",
+              "      element.href = url;\n",
+              "      console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n",
+              "      document.body.appendChild(element);\n",
+              "    }\n",
+              "\n",
+              "    for (var i = 0; i < js_urls.length; i++) {\n",
+              "      var url = js_urls[i];\n",
+              "      var element = document.createElement('script');\n",
+              "      element.onload = on_load;\n",
+              "      element.onerror = on_error;\n",
+              "      element.async = false;\n",
+              "      element.src = url;\n",
+              "      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
+              "      document.head.appendChild(element);\n",
+              "    }\n",
+              "  };\n",
+              "\n",
+              "  function inject_raw_css(css) {\n",
+              "    const element = document.createElement(\"style\");\n",
+              "    element.appendChild(document.createTextNode(css));\n",
+              "    document.body.appendChild(element);\n",
+              "  }\n",
+              "\n",
+              "  \n",
+              "  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n",
+              "  var css_urls = [];\n",
+              "  \n",
+              "\n",
+              "  var inline_js = [\n",
+              "    function(Bokeh) {\n",
+              "      Bokeh.set_log_level(\"info\");\n",
+              "    },\n",
+              "    function(Bokeh) {\n",
+              "    \n",
+              "    \n",
+              "    }\n",
+              "  ];\n",
+              "\n",
+              "  function run_inline_js() {\n",
+              "    \n",
+              "    if (root.Bokeh !== undefined || force === true) {\n",
+              "      \n",
+              "    for (var i = 0; i < inline_js.length; i++) {\n",
+              "      inline_js[i].call(root, root.Bokeh);\n",
+              "    }\n",
+              "    } else if (Date.now() < root._bokeh_timeout) {\n",
+              "      setTimeout(run_inline_js, 100);\n",
+              "    } else if (!root._bokeh_failed_load) {\n",
+              "      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n",
+              "      root._bokeh_failed_load = true;\n",
+              "    } else if (force !== true) {\n",
+              "      var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n",
+              "      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n",
+              "    }\n",
+              "\n",
+              "  }\n",
+              "\n",
+              "  if (root._bokeh_is_loading === 0) {\n",
+              "    console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n",
+              "    run_inline_js();\n",
+              "  } else {\n",
+              "    load_libs(css_urls, js_urls, function() {\n",
+              "      console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n",
+              "      run_inline_js();\n",
+              "    });\n",
+              "  }\n",
+              "}(window));"
+            ],
+            "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n  function now() {\n    return new Date();\n  }\n\n  var force = true;\n\n  if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n    root._bokeh_onload_callbacks = [];\n    root._bokeh_is_loading = undefined;\n  }\n\n  \n\n  \n  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n    root._bokeh_timeout = Date.now() + 5000;\n    root._bokeh_failed_load = false;\n  }\n\n  var NB_LOAD_WARNING = {'data': {'text/html':\n     \"<div style='background-color: #fdd'>\\n\"+\n     \"<p>\\n\"+\n     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n     \"</p>\\n\"+\n     \"<ul>\\n\"+\n     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n     \"</ul>\\n\"+\n     \"<code>\\n\"+\n     \"from bokeh.resources import INLINE\\n\"+\n     \"output_notebook(resources=INLINE)\\n\"+\n     \"</code>\\n\"+\n     \"</div>\"}};\n\n  function display_loaded() {\n    var el = document.getElementById(null);\n    if (el != null) {\n      el.textContent = \"BokehJS is loading...\";\n    }\n    if (root.Bokeh !== undefined) {\n      if (el != null) {\n        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n      }\n    } else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(display_loaded, 100)\n    }\n  }\n\n\n  function run_callbacks() {\n    try {\n      root._bokeh_onload_callbacks.forEach(function(callback) {\n        if (callback != null)\n          callback();\n      });\n    } finally {\n      delete root._bokeh_onload_callbacks\n    }\n    console.debug(\"Bokeh: all callbacks have finished\");\n  }\n\n  function load_libs(css_urls, js_urls, callback) {\n    if (css_urls == null) css_urls = [];\n    if (js_urls == null) js_urls = [];\n\n    root._bokeh_onload_callbacks.push(callback);\n    if (root._bokeh_is_loading > 0) {\n      console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n      return null;\n    }\n    if (js_urls == null || js_urls.length === 0) {\n      run_callbacks();\n      return null;\n    }\n    console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n    root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n    function on_load() {\n      root._bokeh_is_loading--;\n      if (root._bokeh_is_loading === 0) {\n        console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n        run_callbacks()\n      }\n    }\n\n    function on_error() {\n      console.error(\"failed to load \" + url);\n    }\n\n    for (var i = 0; i < css_urls.length; i++) {\n      var url = css_urls[i];\n      const element = document.createElement(\"link\");\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.rel = \"stylesheet\";\n      element.type = \"text/css\";\n      element.href = url;\n      console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n      document.body.appendChild(element);\n    }\n\n    for (var i = 0; i < js_urls.length; i++) {\n      var url = js_urls[i];\n      var element = document.createElement('script');\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.async = false;\n      element.src = url;\n      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n      document.head.appendChild(element);\n    }\n  };\n\n  function inject_raw_css(css) {\n    const element = document.createElement(\"style\");\n    element.appendChild(document.createTextNode(css));\n    document.body.appendChild(element);\n  }\n\n  \n  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n  var css_urls = [];\n  \n\n  var inline_js = [\n    function(Bokeh) {\n      Bokeh.set_log_level(\"info\");\n    },\n    function(Bokeh) {\n    \n    \n    }\n  ];\n\n  function run_inline_js() {\n    \n    if (root.Bokeh !== undefined || force === true) {\n      \n    for (var i = 0; i < inline_js.length; i++) {\n      inline_js[i].call(root, root.Bokeh);\n    }\n    } else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(run_inline_js, 100);\n    } else if (!root._bokeh_failed_load) {\n      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n      root._bokeh_failed_load = true;\n    } else if (force !== true) {\n      var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n    }\n\n  }\n\n  if (root._bokeh_is_loading === 0) {\n    console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n    run_inline_js();\n  } else {\n    load_libs(css_urls, js_urls, function() {\n      console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n      run_inline_js();\n    });\n  }\n}(window));"
+          },
+          "metadata": {
+            "tags": []
+          },
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/html": [
+              "\n",
+              "\n",
+              "\n",
+              "\n",
+              "\n",
+              "\n",
+              "  <div class=\"bk-root\" id=\"63952aa4-d54a-4445-ad10-ef5bef98f1ef\" data-root-id=\"1838\"></div>\n"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/javascript": [
+              "(function(root) {\n",
+              "  function embed_document(root) {\n",
+              "    \n",
+              "  var docs_json = {\"3d4a0aff-b8c3-43fb-a5af-6dfe1b9d0e1f\":{\"roots\":{\"references\":[{\"attributes\":{\"above\":[{\"id\":\"1849\",\"type\":\"CategoricalAxis\"}],\"center\":[{\"id\":\"1852\",\"type\":\"Grid\"},{\"id\":\"1856\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1853\",\"type\":\"CategoricalAxis\"}],\"min_border_right\":300,\"plot_width\":1200,\"renderers\":[{\"id\":\"1866\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1839\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1859\",\"type\":\"Toolbar\"},\"toolbar_location\":\"below\",\"x_range\":{\"id\":\"1841\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"1845\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"1843\",\"type\":\"FactorRange\"},\"y_scale\":{\"id\":\"1847\",\"type\":\"CategoricalScale\"}},\"id\":\"1838\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{},\"id\":\"1925\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1927\",\"type\":\"CategoricalTickFormatter\"},\"major_label_orientation\":0.7853981633974483,\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1850\",\"type\":\"CategoricalTicker\"}},\"id\":\"1849\",\"type\":\"CategoricalAxis\"},{\"attributes\":{\"callback\":null,\"factors\":[\"\\u72d7\",\"\\u5c0f\\u72d7\\u5f88\\u597d\\u3002\",\"\\u6211\\u559c\\u6b22\\u548c\\u6211\\u7684\\u72d7\\u4e00\\u8d77\\u6cbf\\u7740\\u6d77\\u6ee9\\u6563\\u6b65\\u3002\"]},\"id\":\"1841\",\"type\":\"FactorRange\"},{\"attributes\":{},\"id\":\"1929\",\"type\":\"Selection\"},{\"attributes\":{},\"id\":\"1850\",\"type\":\"CategoricalTicker\"},{\"attributes\":{},\"id\":\"1927\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"ticker\":{\"id\":\"1850\",\"type\":\"CategoricalTicker\"}},\"id\":\"1852\",\"type\":\"Grid\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1925\",\"type\":\"CategoricalTickFormatter\"},\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1854\",\"type\":\"CategoricalTicker\"}},\"id\":\"1853\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1854\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1854\",\"type\":\"CategoricalTicker\"}},\"id\":\"1856\",\"type\":\"Grid\"},{\"attributes\":{\"text\":\"Chinese-Korean Similarity\",\"text_font_size\":{\"value\":\"12pt\"}},\"id\":\"1839\",\"type\":\"Title\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1857\",\"type\":\"SaveTool\"},{\"id\":\"1858\",\"type\":\"HoverTool\"}]},\"id\":\"1859\",\"type\":\"Toolbar\"},{\"attributes\":{},\"id\":\"1857\",\"type\":\"SaveTool\"},{\"attributes\":{\"callback\":null,\"tooltips\":[[\"pair\",\"@embeddings_1 ||| @embeddings_2\"],[\"sim\",\"@sim\"]]},\"id\":\"1858\",\"type\":\"HoverTool\"},{\"attributes\":{\"callback\":null,\"data\":{\"embeddings_1\":[\"\\u72d7\",\"\\u72d7\",\"\\u72d7\",\"\\u5c0f\\u72d7\\u5f88\\u597d\\u3002\",\"\\u5c0f\\u72d7\\u5f88\\u597d\\u3002\",\"\\u5c0f\\u72d7\\u5f88\\u597d\\u3002\",\"\\u6211\\u559c\\u6b22\\u548c\\u6211\\u7684\\u72d7\\u4e00\\u8d77\\u6cbf\\u7740\\u6d77\\u6ee9\\u6563\\u6b65\\u3002\",\"\\u6211\\u559c\\u6b22\\u548c\\u6211\\u7684\\u72d7\\u4e00\\u8d77\\u6cbf\\u7740\\u6d77\\u6ee9\\u6563\\u6b65\\u3002\",\"\\u6211\\u559c\\u6b22\\u548c\\u6211\\u7684\\u72d7\\u4e00\\u8d77\\u6cbf\\u7740\\u6d77\\u6ee9\\u6563\\u6b65\\u3002\"],\"embeddings_2\":[\"\\uac1c\",\"\\uac15\\uc544\\uc9c0\\uac00 \\uc88b\\ub2e4.\",\"\\ub098\\ub294 \\ub098\\uc758 \\uc0b0\\ucc45\\uc744 \\ud574\\ubcc0\\uc744 \\ub530\\ub77c \\uae38\\uac8c \\uc0b0\\ucc45\\ud558\\ub294 \\uac83\\uc744 \\uc990\\uae34\\ub2e4.\",\"\\uac1c\",\"\\uac15\\uc544\\uc9c0\\uac00 \\uc88b\\ub2e4.\",\"\\ub098\\ub294 \\ub098\\uc758 \\uc0b0\\ucc45\\uc744 \\ud574\\ubcc0\\uc744 \\ub530\\ub77c \\uae38\\uac8c \\uc0b0\\ucc45\\ud558\\ub294 \\uac83\\uc744 \\uc990\\uae34\\ub2e4.\",\"\\uac1c\",\"\\uac15\\uc544\\uc9c0\\uac00 \\uc88b\\ub2e4.\",\"\\ub098\\ub294 \\ub098\\uc758 \\uc0b0\\ucc45\\uc744 \\ud574\\ubcc0\\uc744 \\ub530\\ub77c \\uae38\\uac8c \\uc0b0\\ucc45\\ud558\\ub294 \\uac83\\uc744 \\uc990\\uae34\\ub2e4.\"],\"index\":[0,1,2,3,4,5,6,7,8],\"sim\":{\"__ndarray__\":\"AAAAwIKP6z8AAACAHL7lPwAAAKDsSuA/AAAAoN0A5j8AAACgWsboPwAAAIANGeE/AAAAQMFJ4z8AAADA8D7jPwAAAABna+c/\",\"dtype\":\"float64\",\"shape\":[9]}},\"selected\":{\"id\":\"1929\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1930\",\"type\":\"UnionRenderers\"}},\"id\":\"1862\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"height\":{\"units\":\"data\",\"value\":1},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1865\",\"type\":\"Rect\"},{\"attributes\":{\"fill_color\":{\"field\":\"sim\",\"transform\":{\"id\":\"1837\",\"type\":\"LinearColorMapper\"}},\"height\":{\"units\":\"data\",\"value\":1},\"line_color\":{\"value\":null},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1864\",\"type\":\"Rect\"},{\"attributes\":{},\"id\":\"1847\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"data_source\":{\"id\":\"1862\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1864\",\"type\":\"Rect\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1865\",\"type\":\"Rect\"},\"selection_glyph\":null,\"view\":{\"id\":\"1867\",\"type\":\"CDSView\"}},\"id\":\"1866\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"source\":{\"id\":\"1862\",\"type\":\"ColumnDataSource\"}},\"id\":\"1867\",\"type\":\"CDSView\"},{\"attributes\":{\"high\":0.8612684011459351,\"low\":0.5091460347175598,\"palette\":[\"#ffffcc\",\"#ffeda0\",\"#fed976\",\"#feb24c\",\"#fd8d3c\",\"#fc4e2a\",\"#e31a1c\",\"#bd0026\",\"#800026\"]},\"id\":\"1837\",\"type\":\"LinearColorMapper\"},{\"attributes\":{\"callback\":null,\"factors\":[\"\\ub098\\ub294 \\ub098\\uc758 \\uc0b0\\ucc45\\uc744 \\ud574\\ubcc0\\uc744 \\ub530\\ub77c \\uae38\\uac8c \\uc0b0\\ucc45\\ud558\\ub294 \\uac83\\uc744 \\uc990\\uae34\\ub2e4.\",\"\\uac15\\uc544\\uc9c0\\uac00 \\uc88b\\ub2e4.\",\"\\uac1c\"]},\"id\":\"1843\",\"type\":\"FactorRange\"},{\"attributes\":{},\"id\":\"1845\",\"type\":\"CategoricalScale\"},{\"attributes\":{},\"id\":\"1930\",\"type\":\"UnionRenderers\"}],\"root_ids\":[\"1838\"]},\"title\":\"Bokeh Application\",\"version\":\"1.4.0\"}};\n",
+              "  var render_items = [{\"docid\":\"3d4a0aff-b8c3-43fb-a5af-6dfe1b9d0e1f\",\"roots\":{\"1838\":\"63952aa4-d54a-4445-ad10-ef5bef98f1ef\"}}];\n",
+              "  root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n",
+              "\n",
+              "  }\n",
+              "  if (root.Bokeh !== undefined) {\n",
+              "    embed_document(root);\n",
+              "  } else {\n",
+              "    var attempts = 0;\n",
+              "    var timer = setInterval(function(root) {\n",
+              "      if (root.Bokeh !== undefined) {\n",
+              "        clearInterval(timer);\n",
+              "        embed_document(root);\n",
+              "      } else {\n",
+              "        attempts++;\n",
+              "        if (attempts > 100) {\n",
+              "          clearInterval(timer);\n",
+              "          console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n",
+              "        }\n",
+              "      }\n",
+              "    }, 10, root)\n",
+              "  }\n",
+              "})(window);"
+            ],
+            "application/vnd.bokehjs_exec.v0+json": ""
+          },
+          "metadata": {
+            "application/vnd.bokehjs_exec.v0+json": {
+              "id": "1838"
+            },
+            "tags": []
+          },
+          "output_type": "display_data"
+        }
+      ],
+      "source": [
+        "visualize_similarity(zh_result, ko_result, chinese_sentences, korean_sentences, 'Chinese-Korean Similarity')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rRabHHQYQfLr"
+      },
+      "source": [
+        "### And more...\n",
+        "\n",
+        "The above examples can be extended to any language pair from **English, Arabic, Chinese, Dutch, French, German, Italian, Japanese, Korean, Polish, Portuguese, Russian, Spanish, Thai and Turkish**. Happy coding!"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mxAFAJI9xsAU"
+      },
+      "source": [
+        "# Creating a Multilingual Semantic-Similarity Search Engine\n",
+        "\n",
+        "Whereas in the previous example we visualized a handful of sentences, in this section we will build a semantic-search index of about 200,000 sentences from a Wikipedia Corpus. About half will be in English and the other half in Spanish to demonstrate the multilingual capabilities of the Universal Sentence Encoder.\n",
+        "\n",
+        "## Download Data to Index\n",
+        "First, we will download news sentences in multiples languages from the [News Commentary Corpus](http://opus.nlpl.eu/News-Commentary-v11.php) [1].  Without loss of generality, this approach should also work for indexing the rest of the supported languages.\n",
+        "\n",
+        "To speed up the demo, we limit to 1000 sentences per language."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "587I9ye6yXEU"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Downloading data from http://opus.nlpl.eu/download.php?f=News-Commentary/v11/moses/ar-en.txt.zip\n",
+            "24715264/24714354 [==============================] - 2s 0us/step\n",
+            "1,000 Arabic sentences\n",
+            "Downloading data from http://opus.nlpl.eu/download.php?f=News-Commentary/v11/moses/en-zh.txt.zip\n",
+            "18104320/18101984 [==============================] - 2s 0us/step\n",
+            "1,000 Chinese sentences\n",
+            "Downloading data from http://opus.nlpl.eu/download.php?f=News-Commentary/v11/moses/en-es.txt.zip\n",
+            "28106752/28106064 [==============================] - 2s 0us/step\n",
+            "1,000 English sentences\n",
+            "Downloading data from http://opus.nlpl.eu/download.php?f=News-Commentary/v11/moses/en-ru.txt.zip\n",
+            "24854528/24849511 [==============================] - 2s 0us/step\n",
+            "1,000 Russian sentences\n",
+            "1,000 Spanish sentences\n"
+          ]
+        }
+      ],
+      "source": [
+        "corpus_metadata = [\n",
+        "    ('ar', 'ar-en.txt.zip', 'News-Commentary.ar-en.ar', 'Arabic'),\n",
+        "    ('zh', 'en-zh.txt.zip', 'News-Commentary.en-zh.zh', 'Chinese'),\n",
+        "    ('en', 'en-es.txt.zip', 'News-Commentary.en-es.en', 'English'),\n",
+        "    ('ru', 'en-ru.txt.zip', 'News-Commentary.en-ru.ru', 'Russian'),\n",
+        "    ('es', 'en-es.txt.zip', 'News-Commentary.en-es.es', 'Spanish'),\n",
+        "]\n",
+        "\n",
+        "language_to_sentences = {}\n",
+        "language_to_news_path = {}\n",
+        "for language_code, zip_file, news_file, language_name in corpus_metadata:\n",
+        "  zip_path = tf.keras.utils.get_file(\n",
+        "      fname=zip_file,\n",
+        "      origin='http://opus.nlpl.eu/download.php?f=News-Commentary/v11/moses/' + zip_file,\n",
+        "      extract=True)\n",
+        "  news_path = os.path.join(os.path.dirname(zip_path), news_file)\n",
+        "  language_to_sentences[language_code] = pd.read_csv(news_path, sep='\\t', header=None)[0][:1000]\n",
+        "  language_to_news_path[language_code] = news_path\n",
+        "\n",
+        "  print('{:,} {} sentences'.format(len(language_to_sentences[language_code]), language_name))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "m3DIT9uT7Z34"
+      },
+      "source": [
+        "## Using a pre-trained model to transform sentences into vectors\n",
+        "\n",
+        "We compute embeddings in _batches_ so that they fit in the GPU's RAM."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "yRoRT5qCEIYy"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "\r  0%|          | 0/1000 [00:00<?, ?it/s]"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "\n",
+            "Computing Arabic embeddings\n"
+          ]
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "83178it [00:30, 2768.60it/s]\n",
+            "  0%|          | 0/1000 [00:00<?, ?it/s]"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "\n",
+            "Computing Chinese embeddings\n"
+          ]
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "69206it [00:18, 3664.60it/s]\n",
+            "  0%|          | 0/1000 [00:00<?, ?it/s]"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "\n",
+            "Computing English embeddings\n"
+          ]
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "238853it [00:37, 6319.00it/s]\n",
+            "  0%|          | 0/1000 [00:00<?, ?it/s]"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "\n",
+            "Computing Russian embeddings\n"
+          ]
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "190092it [00:34, 5589.16it/s]\n",
+            "  0%|          | 0/1000 [00:00<?, ?it/s]"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "\n",
+            "Computing Spanish embeddings\n"
+          ]
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "238819it [00:41, 5754.02it/s]\n"
+          ]
+        }
+      ],
+      "source": [
+        "# Takes about 3 minutes\n",
+        "\n",
+        "batch_size = 2048\n",
+        "language_to_embeddings = {}\n",
+        "for language_code, zip_file, news_file, language_name in corpus_metadata:\n",
+        "  print('\\nComputing {} embeddings'.format(language_name))\n",
+        "  with tqdm(total=len(language_to_sentences[language_code])) as pbar:\n",
+        "    for batch in pd.read_csv(language_to_news_path[language_code], sep='\\t',header=None, chunksize=batch_size):\n",
+        "      language_to_embeddings.setdefault(language_code, []).extend(embed_text(batch[0]))\n",
+        "      pbar.update(len(batch))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "oeBqoE8e-scg"
+      },
+      "source": [
+        "## Building an index of semantic vectors\n",
+        "\n",
+        "We use the [SimpleNeighbors](https://pypi.org/project/simpleneighbors/) library---which is a wrapper for the [Annoy](https://github.com/spotify/annoy) library---to efficiently look up results from the corpus."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "lv_SOduAF1oi"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "  0%|          | 1/1000 [00:00<02:21,  7.04it/s]"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "\n",
+            "Adding Arabic embeddings to index\n"
+          ]
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "100%|██████████| 1000/1000 [02:06<00:00,  7.90it/s]\n",
+            "  0%|          | 1/1000 [00:00<01:53,  8.84it/s]"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Building Arabic index with 40 trees...\n",
+            "\n",
+            "Adding Chinese embeddings to index\n"
+          ]
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "100%|██████████| 1000/1000 [02:05<00:00,  7.99it/s]\n",
+            "  0%|          | 1/1000 [00:00<01:59,  8.39it/s]"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Building Chinese index with 40 trees...\n",
+            "\n",
+            "Adding English embeddings to index\n"
+          ]
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "100%|██████████| 1000/1000 [02:07<00:00,  7.86it/s]\n",
+            "  0%|          | 1/1000 [00:00<02:17,  7.26it/s]"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Building English index with 40 trees...\n",
+            "\n",
+            "Adding Russian embeddings to index\n"
+          ]
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "100%|██████████| 1000/1000 [02:06<00:00,  7.91it/s]\n",
+            "  0%|          | 1/1000 [00:00<02:03,  8.06it/s]"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Building Russian index with 40 trees...\n",
+            "\n",
+            "Adding Spanish embeddings to index\n"
+          ]
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "100%|██████████| 1000/1000 [02:07<00:00,  7.84it/s]"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Building Spanish index with 40 trees...\n",
+            "CPU times: user 11min 21s, sys: 2min 14s, total: 13min 35s\n",
+            "Wall time: 10min 33s\n"
+          ]
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "\n"
+          ]
+        }
+      ],
+      "source": [
+        "%%time\n",
+        "\n",
+        "# Takes about 8 minutes\n",
+        "\n",
+        "num_index_trees = 40\n",
+        "language_name_to_index = {}\n",
+        "embedding_dimensions = len(list(language_to_embeddings.values())[0][0])\n",
+        "for language_code, zip_file, news_file, language_name in corpus_metadata:\n",
+        "  print('\\nAdding {} embeddings to index'.format(language_name))\n",
+        "  index = SimpleNeighbors(embedding_dimensions, metric='dot')\n",
+        "\n",
+        "  for i in trange(len(language_to_sentences[language_code])):\n",
+        "    index.add_one(language_to_sentences[language_code][i], language_to_embeddings[language_code][i])\n",
+        "\n",
+        "  print('Building {} index with {} trees...'.format(language_name, num_index_trees))\n",
+        "  index.build(n=num_index_trees)\n",
+        "  language_name_to_index[language_name] = index"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "0aqGwIuLGrtu"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "  0%|          | 1/1000 [00:00<02:00,  8.29it/s]"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Computing mixed-language index\n",
+            "Adding Arabic embeddings to mixed-language index\n"
+          ]
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "100%|██████████| 1000/1000 [02:06<00:00,  7.92it/s]\n",
+            "  0%|          | 1/1000 [00:00<02:24,  6.89it/s]"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Adding Chinese embeddings to mixed-language index\n"
+          ]
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "100%|██████████| 1000/1000 [02:05<00:00,  7.95it/s]\n",
+            "  0%|          | 1/1000 [00:00<02:05,  7.98it/s]"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Adding English embeddings to mixed-language index\n"
+          ]
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "100%|██████████| 1000/1000 [02:06<00:00,  7.88it/s]\n",
+            "  0%|          | 1/1000 [00:00<02:18,  7.20it/s]"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Adding Russian embeddings to mixed-language index\n"
+          ]
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "100%|██████████| 1000/1000 [02:04<00:00,  8.03it/s]\n",
+            "  0%|          | 1/1000 [00:00<02:17,  7.28it/s]"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Adding Spanish embeddings to mixed-language index\n"
+          ]
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "100%|██████████| 1000/1000 [02:06<00:00,  7.90it/s]\n"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Building mixed-language index with 60 trees...\n",
+            "CPU times: user 11min 18s, sys: 2min 13s, total: 13min 32s\n",
+            "Wall time: 10min 30s\n"
+          ]
+        }
+      ],
+      "source": [
+        "%%time\n",
+        "\n",
+        "# Takes about 13 minutes\n",
+        "\n",
+        "num_index_trees = 60\n",
+        "print('Computing mixed-language index')\n",
+        "combined_index = SimpleNeighbors(embedding_dimensions, metric='dot')\n",
+        "for language_code, zip_file, news_file, language_name in corpus_metadata:\n",
+        "  print('Adding {} embeddings to mixed-language index'.format(language_name))\n",
+        "  for i in trange(len(language_to_sentences[language_code])):\n",
+        "    annotated_sentence = '({}) {}'.format(language_name, language_to_sentences[language_code][i])\n",
+        "    combined_index.add_one(annotated_sentence, language_to_embeddings[language_code][i])\n",
+        "\n",
+        "print('Building mixed-language index with {} trees...'.format(num_index_trees))\n",
+        "combined_index.build(n=num_index_trees)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kg9cw0S2_ntQ"
+      },
+      "source": [
+        "## Verify that the semantic-similarity search engine works\n",
+        "\n",
+        "In this section we will demonstrate:\n",
+        "\n",
+        "1.   Semantic-search capabilities: retrieving sentences from the corpus that are semantically similar to the given query.\n",
+        "2.   Multilingual capabilities: doing so in multiple languages when they query language and index language match\n",
+        "3.   Cross-lingual capabilities: issuing queries in a distinct language than the indexed corpus\n",
+        "4.   Mixed-language corpus: all of the above on a single index containing entries from all languages\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Dxu66S8wJIG9"
+      },
+      "source": [
+        "### Semantic-search cross-lingual capabilities\n",
+        "\n",
+        "In this section we show how to retrieve sentences related to a set of sample English sentences. Things to try:\n",
+        "\n",
+        "*   Try a few different sample sentences\n",
+        "*   Try changing the number of returned results (they are returned in order of similarity)\n",
+        "*   Try cross-lingual capabilities by returning results in different languages (might want to use [Google Translate](http://translate.google.com) on some results to your native language for sanity check)\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "_EFSd65B_mq8"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "English sentences similar to: \"The stock market fell four points.\"\n",
+            "\n"
+          ]
+        },
+        {
+          "data": {
+            "text/plain": [
+              "['Nobel laureate Amartya Sen attributed the European crisis to four failures – political, economic, social, and intellectual.',\n",
+              " 'Just last December, fellow economists Martin Feldstein and Nouriel Roubini each penned op-eds bravely questioning bullish market sentiment, sensibly pointing out gold’s risks.',\n",
+              " 'His ratings have dipped below 50% for the first time.',\n",
+              " 'As a result, markets were deregulated, making it easier to trade assets that were perceived to be safe, but were in fact not.',\n",
+              " 'Consider the advanced economies.',\n",
+              " 'But the agreement has three major flaws.',\n",
+              " 'This “predetermined equilibrium” thinking – reflected in the view that markets always self-correct – led to policy paralysis until the Great Depression, when John Maynard Keynes’s argument for government intervention to address unemployment and output gaps gained traction.',\n",
+              " 'Officials underestimated tail risks.',\n",
+              " 'Consider a couple of notorious examples.',\n",
+              " 'Stalin was content to settle for an empire in Eastern Europe.']"
+            ]
+          },
+          "execution_count": 20,
+          "metadata": {
+            "tags": []
+          },
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "sample_query = 'The stock market fell four points.'  #@param [\"Global warming\", \"Researchers made a surprising new discovery last week.\", \"The stock market fell four points.\", \"Lawmakers will vote on the proposal tomorrow.\"] {allow-input: true}\n",
+        "index_language = 'English'  #@param [\"Arabic\", \"Chinese\", \"English\", \"French\", \"German\", \"Russian\", \"Spanish\"]\n",
+        "num_results = 10  #@param {type:\"slider\", min:0, max:100, step:10}\n",
+        "\n",
+        "query_embedding = embed_text(sample_query)[0]\n",
+        "search_results = language_name_to_index[index_language].nearest(query_embedding, n=num_results)\n",
+        "\n",
+        "print('{} sentences similar to: \"{}\"\\n'.format(index_language, sample_query))\n",
+        "search_results"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ybgj9o7hKDZV"
+      },
+      "source": [
+        "### Mixed-corpus capabilities\n",
+        "\n",
+        "We will now issue a query in English, but the results will come from the any of the indexed languages."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "MJeTzuj0KU41"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "English sentences similar to: \"The stock market fell four points.\"\n",
+            "\n"
+          ]
+        },
+        {
+          "data": {
+            "text/plain": [
+              "['Nobel laureate Amartya Sen attributed the European crisis to four failures – political, economic, social, and intellectual.',\n",
+              " 'It was part of the 1945 consensus.',\n",
+              " 'The end of the East-West ideological divide and the end of absolute faith in markets are historical turning points.',\n",
+              " 'Just last December, fellow economists Martin Feldstein and Nouriel Roubini each penned op-eds bravely questioning bullish market sentiment, sensibly pointing out gold’s risks.',\n",
+              " 'His ratings have dipped below 50% for the first time.',\n",
+              " 'As a result, markets were deregulated, making it easier to trade assets that were perceived to be safe, but were in fact not.',\n",
+              " 'Consider the advanced economies.',\n",
+              " 'Since their articles appeared, the price of gold has moved up still further.',\n",
+              " 'But the agreement has three major flaws.',\n",
+              " 'Gold prices even hit a record-high $1,300 recently.',\n",
+              " 'This “predetermined equilibrium” thinking – reflected in the view that markets always self-correct – led to policy paralysis until the Great Depression, when John Maynard Keynes’s argument for government intervention to address unemployment and output gaps gained traction.',\n",
+              " 'What Failed in 2008?',\n",
+              " 'Officials underestimated tail risks.',\n",
+              " 'Consider a couple of notorious examples.',\n",
+              " 'One of these species, orange roughy, has been caught commercially for only around a quarter-century, but already is being fished to the point of collapse.',\n",
+              " 'Meanwhile, policymakers were lulled into complacency by the widespread acceptance of economic theories such as the “efficient-market hypothesis,” which assumes that investors act rationally and use all available information when making their decisions.',\n",
+              " 'Stalin was content to settle for an empire in Eastern Europe.',\n",
+              " 'Intelligence assets have been redirected.',\n",
+              " 'A new wave of what the economist Joseph Schumpeter famously called “creative destruction” is under way: even as central banks struggle to maintain stability by flooding markets with liquidity, credit to business and households is shrinking.',\n",
+              " 'It all came about in a number of ways.',\n",
+              " 'The UN, like the dream of European unity, was also part of the 1945 consensus.',\n",
+              " 'The End of 1945',\n",
+              " 'The Global Economy’s New Path',\n",
+              " 'But this scenario failed to materialize.',\n",
+              " 'Gold prices are extremely sensitive to global interest-rate movements.',\n",
+              " 'Fukushima has presented the world with a far-reaching, fundamental choice.',\n",
+              " 'It was Japan, the high-tech country par excellence (not the latter-day Soviet Union) that proved unable to take adequate precautions to avert disaster in four reactor blocks.',\n",
+              " 'Some European academics tried to argue that there was no need for US-like fiscal transfers, because any desired degree of risk sharing can, in theory, be achieved through financial markets.',\n",
+              " '$10,000 Gold?',\n",
+              " 'One answer, of course, is a complete collapse of the US dollar.',\n",
+              " '1929 or 1989?',\n",
+              " 'The goods we made were what economists call “rival\" and “excludible\" commodities.',\n",
+              " 'This dream quickly faded when the Cold War divided the world into two hostile blocs. But in some ways the 1945 consensus, in the West, was strengthened by Cold War politics.',\n",
+              " 'The first flaw is that the spending reductions are badly timed: coming as they do when the US economy is weak, they risk triggering another recession.',\n",
+              " 'One successful gold investor recently explained to me that stock prices languished for a more than a decade before the Dow Jones index crossed the 1,000 mark in the early 1980’s.',\n",
+              " 'Eichengreen traces our tepid response to the crisis to the triumph of monetarist economists, the disciples of Milton Friedman, over their Keynesian and Minskyite peers – at least when it comes to interpretations of the causes and consequences of the Great Depression.',\n",
+              " \"However, America's unilateral options are limited.\",\n",
+              " 'Once it was dark, a screen was set up and Mark showed home videos from space.',\n",
+              " 'These aspirations were often voiced in the United Nations, founded in 1945.',\n",
+              " 'Then I got distracted for about 40 years.']"
+            ]
+          },
+          "execution_count": 21,
+          "metadata": {
+            "tags": []
+          },
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "sample_query = 'The stock market fell four points.'  #@param [\"Global warming\", \"Researchers made a surprising new discovery last week.\", \"The stock market fell four points.\", \"Lawmakers will vote on the proposal tomorrow.\"] {allow-input: true}\n",
+        "num_results = 40  #@param {type:\"slider\", min:0, max:100, step:10}\n",
+        "\n",
+        "query_embedding = embed_text(sample_query)[0]\n",
+        "search_results = language_name_to_index[index_language].nearest(query_embedding, n=num_results)\n",
+        "\n",
+        "print('{} sentences similar to: \"{}\"\\n'.format(index_language, sample_query))\n",
+        "search_results"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tqIRtHIL2jAw"
+      },
+      "source": [
+        "Try your own queries:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "ZK5ID6XF2n8k"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "English sentences similar to: \"The stock market fell four points.\"\n",
+            "\n"
+          ]
+        },
+        {
+          "data": {
+            "text/plain": [
+              "['(Chinese) 新兴市场的号角',\n",
+              " '(English) It was part of the 1945 consensus.',\n",
+              " '(Russian) Брюссель. Цунами, пронёсшееся по финансовым рынкам, является глобальной катастрофой.',\n",
+              " '(Arabic) هناك أربعة شروط مسبقة لتحقيق النجاح الأوروبي في أفغانستان:',\n",
+              " '(Spanish) Su índice de popularidad ha caído por primera vez por debajo del 50 por ciento.',\n",
+              " '(English) His ratings have dipped below 50% for the first time.',\n",
+              " '(Russian) Впервые его рейтинг опустился ниже 50%.',\n",
+              " '(English) As a result, markets were deregulated, making it easier to trade assets that were perceived to be safe, but were in fact not.',\n",
+              " '(Arabic) وكانت التطورات التي شهدتها سوق العمل أكثر تشجيعا، فهي على النقيض من أسواق الأصول تعكس النتائج وليس التوقعات. وهنا أيضاً كانت الأخبار طيبة. فقد أصبحت سوق العمل أكثر إحكاما، حيث ظلت البطالة عند مستوى 3.5% وكانت نسبة الوظائف إلى الطلبات المقدمة فوق مستوى التعادل.',\n",
+              " '(Russian) Это было частью консенсуса 1945 года.',\n",
+              " '(English) Consider the advanced economies.',\n",
+              " '(English) Since their articles appeared, the price of gold has moved up still further.',\n",
+              " '(Russian) Тогда они не только смогут накормить свои семьи, но и начать получать рыночную прибыль и откладывать деньги на будущее.',\n",
+              " '(English) Gold prices even hit a record-high $1,300 recently.',\n",
+              " '(Chinese) 另一种金融危机',\n",
+              " '(Russian) Европейская мечта находится в кризисе.',\n",
+              " '(English) What Failed in 2008?',\n",
+              " '(Spanish) Pero el acuerdo alcanzado tiene tres grandes defectos.',\n",
+              " '(English) Officials underestimated tail risks.',\n",
+              " '(English) Consider a couple of notorious examples.',\n",
+              " '(Spanish) Los mercados financieros pueden ser frágiles y ofrecen muy poca capacidad de compartir los riesgos relacionados con el ingreso de los trabajadores, que constituye la mayor parte de la renta de cualquier economía avanzada.',\n",
+              " '(Chinese) 2008年败在何处？',\n",
+              " '(Spanish) Consideremos las economías avanzadas.',\n",
+              " '(Spanish) Los bienes producidos se caracterizaron por ser, como señalaron algunos economistas, mercancías “rivales” y “excluyentes”.',\n",
+              " '(Arabic) إغلاق الفجوة الاستراتيجية في أوروبا',\n",
+              " '(English) Stalin was content to settle for an empire in Eastern Europe.',\n",
+              " '(English) Intelligence assets have been redirected.',\n",
+              " '(Spanish) Hoy, envalentonados por la apreciación continua, algunos están sugiriendo que el oro podría llegar incluso a superar esa cifra.',\n",
+              " '(Russian) Цены на золото чрезвычайно чувствительны к мировым движениям процентных ставок.',\n",
+              " '(Russian) Однако у достигнутой договоренности есть три основных недостатка.']"
+            ]
+          },
+          "execution_count": 22,
+          "metadata": {
+            "tags": []
+          },
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "query = 'The stock market fell four points.'  #@param {type:\"string\"}\n",
+        "num_results = 30  #@param {type:\"slider\", min:0, max:100, step:10}\n",
+        "\n",
+        "query_embedding = embed_text(sample_query)[0]\n",
+        "search_results = combined_index.nearest(query_embedding, n=num_results)\n",
+        "\n",
+        "print('{} sentences similar to: \"{}\"\\n'.format(index_language, query))\n",
+        "search_results"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "IPPwnhUNdOfc"
+      },
+      "source": [
+        "# Further topics\n",
+        "\n",
+        "## Multilingual\n",
+        "\n",
+        "Finally, we encourage you to try queries in any of the supported languages: **English, Arabic, Chinese, Dutch, French, German, Italian, Japanese, Korean, Polish, Portuguese, Russian, Spanish, Thai and Turkish**.\n",
+        "\n",
+        "Also, even though we only indexed in a subset of the languages, you can also index content in any of the supported languages.\n",
+        "\n",
+        "\n",
+        "## Model variations\n",
+        "\n",
+        "We offer variations of the Universal Encoder models optimized for various things like memory, latency and/or quality. Please feel free to experiment with them to find a suitable one.\n",
+        "\n",
+        "## Nearest neighbor libraries\n",
+        "\n",
+        "We used Annoy to efficiently look up nearest neighbors. See the [tradeoffs section](https://github.com/spotify/annoy/blob/master/README.rst#tradeoffs) to read about the number of trees (memory-dependent) and number of items to search (latency-dependent)---SimpleNeighbors only allows to control the number of trees, but refactoring the code to use Annoy directly should be simple, we just wanted to keep this code as simple as possible for the general user.\n",
+        "\n",
+        "If Annoy does not scale for your application, please also check out [FAISS](https://github.com/facebookresearch/faiss)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5yj9VcfnbS-q"
+      },
+      "source": [
+        "*All the best building your multilingual semantic applications!*"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "X4oOfvSOKnjS"
+      },
+      "source": [
+        "[1] J. Tiedemann, 2012, [Parallel Data, Tools and Interfaces in OPUS](http://www.lrec-conf.org/proceedings/lrec2012/pdf/463_Paper.pdf). In Proceedings of the 8th International Conference on Language Resources and Evaluation (LREC 2012)"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [],
+      "name": "cross_lingual_similarity_with_tf_hub_multilingual_universal_encoder.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/hrnet_semantic_segmentation.ipynb b/site/en/hub/tutorials/hrnet_semantic_segmentation.ipynb
new file mode 100644
index 00000000000..ab33fc4361e
--- /dev/null
+++ b/site/en/hub/tutorials/hrnet_semantic_segmentation.ipynb
@@ -0,0 +1,196 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "58MWWgq75lMh"
+      },
+      "source": [
+        "##### Copyright 2022 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "jM3hCI1UUzar"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Copyright 2022 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4_NEJlxKKjyI"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/hrnet_semantic_segmentation\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/hrnet_semantic_segmentation.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/hrnet_semantic_segmentation.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/hrnet_semantic_segmentation.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/google/HRNet/\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub model</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "U5POcTVNB_dv"
+      },
+      "source": [
+        "# HRNet based model for semantic segmentation\n",
+        "\n",
+        "In this notebook, you will:\n",
+        "\n",
+        "- Choose and load one of the 17 pre-trained HRNet models on different semantic segmentation datasets\n",
+        "- Run inference to extract features from the model backbone and predictions from the model head"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "_XgTpm9ZxoN9"
+      },
+      "outputs": [],
+      "source": [
+        "import tensorflow as tf\n",
+        "import tensorflow_hub as hub\n",
+        "import matplotlib.pyplot as plt\n",
+        "from PIL import Image\n",
+        "import numpy as np"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "UVtEyxDFpKE1"
+      },
+      "source": [
+        "## Loading models from TensorFlow Hub\n",
+        "\n",
+        "Here you can choose the pre-trained HRNet model to load, different models means a different training dataset used. All models have the same architecture, except for the model head, which has a different dimension based on the number of classes contained in the training dataset (dataset_output_classes). For more information about the different datasets we refer to the links above and the [factors of influence dataset collection](https://github.com/google-research/google-research/tree/master/factors_of_influence)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "y8_ctG55-uTX"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Choose a pre-trained HRNet model to load.\n",
+        "\n",
+        "hrnet_model_name = 'ade20k-hrnetv2-w48/1'  #@param [\"ade20k-hrnetv2-w48/1\", \"isprs-hrnetv2-w48/1\", \"vkitti2-hrnetv2-w48/1\", \"vgallery-hrnetv2-w48/1\", \"sunrgbd-hrnetv2-w48/1\", \"suim-hrnetv2-w48/1\", \"scannet-hrnetv2-w48/1\", \"pvoc-hrnetv2-w48/1\", \"msegpcontext-hrnetv2-w48/1\", \"mapillary-hrnetv2-w48/1\", \"kitti-hrnetv2-w48/1\", \"isaid-hrnetv2-w48/1\", \"idd-hrnetv2-w48/1\", \"coco-hrnetv2-w48/1\", \"city-hrnetv2-w48/1\", \"camvid-hrnetv2-w48/1\", \"bdd-hrnetv2-w48/1\"]\n",
+        "\n",
+        "tfhub_model_name = 'https://tfhub.dev/google/HRNet/' + hrnet_model_name\n",
+        "\n",
+        "print('HRNet model selected           :', tfhub_model_name)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "T-yHJ5X55kWN"
+      },
+      "outputs": [],
+      "source": [
+        "hrnet_model = hub.load(tfhub_model_name)\n",
+        "\n",
+        "print('HRNet model loaded           :', tfhub_model_name)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "pMP_7v9x6kol"
+      },
+      "source": [
+        "## Loading an image and running inference\n",
+        "\n",
+        "This is a demonstration on how to run inference for extracting features and predictions from an image. The image was taken from the scene150 dataset.\n",
+        "\n",
+        "To perform inference on the datasets that were used during training we refer to the [factors of influence dataset collection](https://github.com/google-research/google-research/tree/master/factors_of_influence)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "GNzjieS66td_"
+      },
+      "outputs": [],
+      "source": [
+        "img_file = tf.keras.utils.get_file(origin=\"https://tensorflow.org/images/bedroom_hrnet_tutorial.jpg\")\n",
+        "img = np.array(Image.open(img_file))/255.0"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Lp54vD_FZuHw"
+      },
+      "outputs": [],
+      "source": [
+        "plt.imshow(img)\n",
+        "plt.show()\n",
+        "\n",
+        "# Predictions will have shape (batch_size, h, w, dataset_output_classes)\n",
+        "predictions = hrnet_model.predict([img])\n",
+        "plt.imshow(predictions[0,:,:,1])\n",
+        "plt.title('Predictions for class #1')\n",
+        "plt.show() \n",
+        "# Features will have shape (batch_size, h/4, w/4, 720)\n",
+        "features = hrnet_model.get_features([img])\n",
+        "plt.imshow(features[0,:,:,1])\n",
+        "plt.title('Feature #1 out of 720')\n",
+        "plt.show()"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "hrnet_semantic_segmentation.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/image_classification.ipynb b/site/en/hub/tutorials/image_classification.ipynb
new file mode 100644
index 00000000000..91aadab727e
--- /dev/null
+++ b/site/en/hub/tutorials/image_classification.ipynb
@@ -0,0 +1,463 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ScitaPqhKtuW"
+      },
+      "source": [
+        "##### Copyright 2021 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "jvztxQ6VsK2k"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2021 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "7t7KGfIwHaXz"
+      },
+      "source": [
+        "# Image Classification with TensorFlow Hub\n",
+        "\n",
+        "In this colab, you'll try multiple image classification models from TensorFlow Hub and decide which one is best for your use case.\n",
+        "\n",
+        "Because TF Hub encourages a [consistent input convention](https://www.tensorflow.org/hub/common_saved_model_apis/images#image_input) for models that operate on images, it's easy to experiment with different architectures to find the one that best fits your needs."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfBg1C5NB3X0"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/image_classification\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/image_classification.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/image_classification.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/image_classification.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/google/collections/image/1\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub models</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "N8H5ufxkc2mk"
+      },
+      "outputs": [],
+      "source": [
+        "import tensorflow as tf\n",
+        "import tensorflow_hub as hub\n",
+        "\n",
+        "import requests\n",
+        "from PIL import Image\n",
+        "from io import BytesIO\n",
+        "\n",
+        "import matplotlib.pyplot as plt\n",
+        "import numpy as np"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "oKvj6lY6kZx8"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Helper functions for loading image (hidden)\n",
+        "\n",
+        "original_image_cache = {}\n",
+        "\n",
+        "def preprocess_image(image):\n",
+        "  image = np.array(image)\n",
+        "  # reshape into shape [batch_size, height, width, num_channels]\n",
+        "  img_reshaped = tf.reshape(image, [1, image.shape[0], image.shape[1], image.shape[2]])\n",
+        "  # Use `convert_image_dtype` to convert to floats in the [0,1] range.\n",
+        "  image = tf.image.convert_image_dtype(img_reshaped, tf.float32)\n",
+        "  return image\n",
+        "\n",
+        "def load_image_from_url(img_url):\n",
+        "  \"\"\"Returns an image with shape [1, height, width, num_channels].\"\"\"\n",
+        "  user_agent = {'User-agent': 'Colab Sample (https://tensorflow.org)'}\n",
+        "  response = requests.get(img_url, headers=user_agent)\n",
+        "  image = Image.open(BytesIO(response.content))\n",
+        "  image = preprocess_image(image)\n",
+        "  return image\n",
+        "\n",
+        "def load_image(image_url, image_size=256, dynamic_size=False, max_dynamic_size=512):\n",
+        "  \"\"\"Loads and preprocesses images.\"\"\"\n",
+        "  # Cache image file locally.\n",
+        "  if image_url in original_image_cache:\n",
+        "    img = original_image_cache[image_url]\n",
+        "  elif image_url.startswith('https://'):\n",
+        "    img = load_image_from_url(image_url)\n",
+        "  else:\n",
+        "    fd = tf.io.gfile.GFile(image_url, 'rb')\n",
+        "    img = preprocess_image(Image.open(fd))\n",
+        "  original_image_cache[image_url] = img\n",
+        "  # Load and convert to float32 numpy array, add batch dimension, and normalize to range [0, 1].\n",
+        "  img_raw = img\n",
+        "  if tf.reduce_max(img) > 1.0:\n",
+        "    img = img / 255.\n",
+        "  if len(img.shape) == 3:\n",
+        "    img = tf.stack([img, img, img], axis=-1)\n",
+        "  if not dynamic_size:\n",
+        "    img = tf.image.resize_with_pad(img, image_size, image_size)\n",
+        "  elif img.shape[1] > max_dynamic_size or img.shape[2] > max_dynamic_size:\n",
+        "    img = tf.image.resize_with_pad(img, max_dynamic_size, max_dynamic_size)\n",
+        "  return img, img_raw\n",
+        "\n",
+        "def show_image(image, title=''):\n",
+        "  image_size = image.shape[1]\n",
+        "  w = (image_size * 6) // 320\n",
+        "  plt.figure(figsize=(w, w))\n",
+        "  plt.imshow(image[0], aspect='equal')\n",
+        "  plt.axis('off')\n",
+        "  plt.title(title)\n",
+        "  plt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ws1AMDT_CDPq"
+      },
+      "source": [
+        "Select an Image Classification Model. After that, some internal variables are set and the labels file is downloaded and prepared for use.\n",
+        "\n",
+        "There are some technical differences between the models, like different input size, model size, accuracy, and inference time. Here you can change the model you are using until you find the one most suitable for your use case.\n",
+        "\n",
+        "The handle (url) of the model is printed for your convenience. More documentation about each model is available there.\n",
+        "\n",
+        "Note: All these models were trained on the ImageNet dataset"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "iQ3aamrBfs-c"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Select an Image Classification model\n",
+        "\n",
+        "image_size = 224\n",
+        "dynamic_size = False\n",
+        "\n",
+        "model_name = \"efficientnetv2-s\" # @param ['efficientnetv2-s', 'efficientnetv2-m', 'efficientnetv2-l', 'efficientnetv2-s-21k', 'efficientnetv2-m-21k', 'efficientnetv2-l-21k', 'efficientnetv2-xl-21k', 'efficientnetv2-b0-21k', 'efficientnetv2-b1-21k', 'efficientnetv2-b2-21k', 'efficientnetv2-b3-21k', 'efficientnetv2-s-21k-ft1k', 'efficientnetv2-m-21k-ft1k', 'efficientnetv2-l-21k-ft1k', 'efficientnetv2-xl-21k-ft1k', 'efficientnetv2-b0-21k-ft1k', 'efficientnetv2-b1-21k-ft1k', 'efficientnetv2-b2-21k-ft1k', 'efficientnetv2-b3-21k-ft1k', 'efficientnetv2-b0', 'efficientnetv2-b1', 'efficientnetv2-b2', 'efficientnetv2-b3', 'efficientnet_b0', 'efficientnet_b1', 'efficientnet_b2', 'efficientnet_b3', 'efficientnet_b4', 'efficientnet_b5', 'efficientnet_b6', 'efficientnet_b7', 'bit_s-r50x1', 'inception_v3', 'inception_resnet_v2', 'resnet_v1_50', 'resnet_v1_101', 'resnet_v1_152', 'resnet_v2_50', 'resnet_v2_101', 'resnet_v2_152', 'nasnet_large', 'nasnet_mobile', 'pnasnet_large', 'mobilenet_v2_100_224', 'mobilenet_v2_130_224', 'mobilenet_v2_140_224', 'mobilenet_v3_small_100_224', 'mobilenet_v3_small_075_224', 'mobilenet_v3_large_100_224', 'mobilenet_v3_large_075_224']\n",
+        "\n",
+        "model_handle_map = {\n",
+        "  \"efficientnetv2-s\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_s/classification/2\",\n",
+        "  \"efficientnetv2-m\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_m/classification/2\",\n",
+        "  \"efficientnetv2-l\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_l/classification/2\",\n",
+        "  \"efficientnetv2-s-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_s/classification/2\",\n",
+        "  \"efficientnetv2-m-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_m/classification/2\",\n",
+        "  \"efficientnetv2-l-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_l/classification/2\",\n",
+        "  \"efficientnetv2-xl-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_xl/classification/2\",\n",
+        "  \"efficientnetv2-b0-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b0/classification/2\",\n",
+        "  \"efficientnetv2-b1-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b1/classification/2\",\n",
+        "  \"efficientnetv2-b2-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b2/classification/2\",\n",
+        "  \"efficientnetv2-b3-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b3/classification/2\",\n",
+        "  \"efficientnetv2-s-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_s/classification/2\",\n",
+        "  \"efficientnetv2-m-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_m/classification/2\",\n",
+        "  \"efficientnetv2-l-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_l/classification/2\",\n",
+        "  \"efficientnetv2-xl-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_xl/classification/2\",\n",
+        "  \"efficientnetv2-b0-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b0/classification/2\",\n",
+        "  \"efficientnetv2-b1-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b1/classification/2\",\n",
+        "  \"efficientnetv2-b2-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b2/classification/2\",\n",
+        "  \"efficientnetv2-b3-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b3/classification/2\",\n",
+        "  \"efficientnetv2-b0\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b0/classification/2\",\n",
+        "  \"efficientnetv2-b1\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b1/classification/2\",\n",
+        "  \"efficientnetv2-b2\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b2/classification/2\",\n",
+        "  \"efficientnetv2-b3\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b3/classification/2\",\n",
+        "  \"efficientnet_b0\": \"https://tfhub.dev/tensorflow/efficientnet/b0/classification/1\",\n",
+        "  \"efficientnet_b1\": \"https://tfhub.dev/tensorflow/efficientnet/b1/classification/1\",\n",
+        "  \"efficientnet_b2\": \"https://tfhub.dev/tensorflow/efficientnet/b2/classification/1\",\n",
+        "  \"efficientnet_b3\": \"https://tfhub.dev/tensorflow/efficientnet/b3/classification/1\",\n",
+        "  \"efficientnet_b4\": \"https://tfhub.dev/tensorflow/efficientnet/b4/classification/1\",\n",
+        "  \"efficientnet_b5\": \"https://tfhub.dev/tensorflow/efficientnet/b5/classification/1\",\n",
+        "  \"efficientnet_b6\": \"https://tfhub.dev/tensorflow/efficientnet/b6/classification/1\",\n",
+        "  \"efficientnet_b7\": \"https://tfhub.dev/tensorflow/efficientnet/b7/classification/1\",\n",
+        "  \"bit_s-r50x1\": \"https://tfhub.dev/google/bit/s-r50x1/ilsvrc2012_classification/1\",\n",
+        "  \"inception_v3\": \"https://tfhub.dev/google/imagenet/inception_v3/classification/4\",\n",
+        "  \"inception_resnet_v2\": \"https://tfhub.dev/google/imagenet/inception_resnet_v2/classification/4\",\n",
+        "  \"resnet_v1_50\": \"https://tfhub.dev/google/imagenet/resnet_v1_50/classification/4\",\n",
+        "  \"resnet_v1_101\": \"https://tfhub.dev/google/imagenet/resnet_v1_101/classification/4\",\n",
+        "  \"resnet_v1_152\": \"https://tfhub.dev/google/imagenet/resnet_v1_152/classification/4\",\n",
+        "  \"resnet_v2_50\": \"https://tfhub.dev/google/imagenet/resnet_v2_50/classification/4\",\n",
+        "  \"resnet_v2_101\": \"https://tfhub.dev/google/imagenet/resnet_v2_101/classification/4\",\n",
+        "  \"resnet_v2_152\": \"https://tfhub.dev/google/imagenet/resnet_v2_152/classification/4\",\n",
+        "  \"nasnet_large\": \"https://tfhub.dev/google/imagenet/nasnet_large/classification/4\",\n",
+        "  \"nasnet_mobile\": \"https://tfhub.dev/google/imagenet/nasnet_mobile/classification/4\",\n",
+        "  \"pnasnet_large\": \"https://tfhub.dev/google/imagenet/pnasnet_large/classification/4\",\n",
+        "  \"mobilenet_v2_100_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/classification/4\",\n",
+        "  \"mobilenet_v2_130_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v2_130_224/classification/4\",\n",
+        "  \"mobilenet_v2_140_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v2_140_224/classification/4\",\n",
+        "  \"mobilenet_v3_small_100_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v3_small_100_224/classification/5\",\n",
+        "  \"mobilenet_v3_small_075_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v3_small_075_224/classification/5\",\n",
+        "  \"mobilenet_v3_large_100_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v3_large_100_224/classification/5\",\n",
+        "  \"mobilenet_v3_large_075_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v3_large_075_224/classification/5\",\n",
+        "}\n",
+        "\n",
+        "model_image_size_map = {\n",
+        "  \"efficientnetv2-s\": 384,\n",
+        "  \"efficientnetv2-m\": 480,\n",
+        "  \"efficientnetv2-l\": 480,\n",
+        "  \"efficientnetv2-b0\": 224,\n",
+        "  \"efficientnetv2-b1\": 240,\n",
+        "  \"efficientnetv2-b2\": 260,\n",
+        "  \"efficientnetv2-b3\": 300,\n",
+        "  \"efficientnetv2-s-21k\": 384,\n",
+        "  \"efficientnetv2-m-21k\": 480,\n",
+        "  \"efficientnetv2-l-21k\": 480,\n",
+        "  \"efficientnetv2-xl-21k\": 512,\n",
+        "  \"efficientnetv2-b0-21k\": 224,\n",
+        "  \"efficientnetv2-b1-21k\": 240,\n",
+        "  \"efficientnetv2-b2-21k\": 260,\n",
+        "  \"efficientnetv2-b3-21k\": 300,\n",
+        "  \"efficientnetv2-s-21k-ft1k\": 384,\n",
+        "  \"efficientnetv2-m-21k-ft1k\": 480,\n",
+        "  \"efficientnetv2-l-21k-ft1k\": 480,\n",
+        "  \"efficientnetv2-xl-21k-ft1k\": 512,\n",
+        "  \"efficientnetv2-b0-21k-ft1k\": 224,\n",
+        "  \"efficientnetv2-b1-21k-ft1k\": 240,\n",
+        "  \"efficientnetv2-b2-21k-ft1k\": 260,\n",
+        "  \"efficientnetv2-b3-21k-ft1k\": 300, \n",
+        "  \"efficientnet_b0\": 224,\n",
+        "  \"efficientnet_b1\": 240,\n",
+        "  \"efficientnet_b2\": 260,\n",
+        "  \"efficientnet_b3\": 300,\n",
+        "  \"efficientnet_b4\": 380,\n",
+        "  \"efficientnet_b5\": 456,\n",
+        "  \"efficientnet_b6\": 528,\n",
+        "  \"efficientnet_b7\": 600,\n",
+        "  \"inception_v3\": 299,\n",
+        "  \"inception_resnet_v2\": 299,\n",
+        "  \"mobilenet_v2_100_224\": 224,\n",
+        "  \"mobilenet_v2_130_224\": 224,\n",
+        "  \"mobilenet_v2_140_224\": 224,\n",
+        "  \"nasnet_large\": 331,\n",
+        "  \"nasnet_mobile\": 224,\n",
+        "  \"pnasnet_large\": 331,\n",
+        "  \"resnet_v1_50\": 224,\n",
+        "  \"resnet_v1_101\": 224,\n",
+        "  \"resnet_v1_152\": 224,\n",
+        "  \"resnet_v2_50\": 224,\n",
+        "  \"resnet_v2_101\": 224,\n",
+        "  \"resnet_v2_152\": 224,\n",
+        "  \"mobilenet_v3_small_100_224\": 224,\n",
+        "  \"mobilenet_v3_small_075_224\": 224,\n",
+        "  \"mobilenet_v3_large_100_224\": 224,\n",
+        "  \"mobilenet_v3_large_075_224\": 224,\n",
+        "}\n",
+        "\n",
+        "model_handle = model_handle_map[model_name]\n",
+        "\n",
+        "print(f\"Selected model: {model_name} : {model_handle}\")\n",
+        "\n",
+        "\n",
+        "max_dynamic_size = 512\n",
+        "if model_name in model_image_size_map:\n",
+        "  image_size = model_image_size_map[model_name]\n",
+        "  dynamic_size = False\n",
+        "  print(f\"Images will be converted to {image_size}x{image_size}\")\n",
+        "else:\n",
+        "  dynamic_size = True\n",
+        "  print(f\"Images will be capped to a max size of {max_dynamic_size}x{max_dynamic_size}\")\n",
+        "\n",
+        "labels_file = \"https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt\"\n",
+        "\n",
+        "#download labels and creates a maps\n",
+        "downloaded_file = tf.keras.utils.get_file(\"labels.txt\", origin=labels_file)\n",
+        "\n",
+        "classes = []\n",
+        "\n",
+        "with open(downloaded_file) as f:\n",
+        "  labels = f.readlines()\n",
+        "  classes = [l.strip() for l in labels]\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vxcASidjBAE8"
+      },
+      "source": [
+        "You can select one of the images below, or use your own image. Just remember that the input size for the models vary and some of them use a dynamic input size (enabling inference on the unscaled image). Given that, the method `load_image` will already rescale the image to the expected format."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "o2rMsr4CgET2"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Select an Input Image\n",
+        "\n",
+        "image_name = \"turtle\" # @param ['tiger', 'bus', 'car', 'cat', 'dog', 'apple', 'banana', 'turtle', 'flamingo', 'piano', 'honeycomb', 'teapot']\n",
+        "\n",
+        "images_for_test_map = {\n",
+        "    \"tiger\": \"https://upload.wikimedia.org/wikipedia/commons/b/b0/Bengal_tiger_%28Panthera_tigris_tigris%29_female_3_crop.jpg\",\n",
+        "    #by Charles James Sharp, CC BY-SA 4.0 <https://creativecommons.org/licenses/by-sa/4.0>, via Wikimedia Commons\n",
+        "    \"bus\": \"https://upload.wikimedia.org/wikipedia/commons/6/63/LT_471_%28LTZ_1471%29_Arriva_London_New_Routemaster_%2819522859218%29.jpg\",\n",
+        "    #by Martin49 from London, England, CC BY 2.0 <https://creativecommons.org/licenses/by/2.0>, via Wikimedia Commons\n",
+        "    \"car\": \"https://upload.wikimedia.org/wikipedia/commons/4/49/2013-2016_Toyota_Corolla_%28ZRE172R%29_SX_sedan_%282018-09-17%29_01.jpg\",\n",
+        "    #by EurovisionNim, CC BY-SA 4.0 <https://creativecommons.org/licenses/by-sa/4.0>, via Wikimedia Commons\n",
+        "    \"cat\": \"https://upload.wikimedia.org/wikipedia/commons/4/4d/Cat_November_2010-1a.jpg\",\n",
+        "    #by Alvesgaspar, CC BY-SA 3.0 <https://creativecommons.org/licenses/by-sa/3.0>, via Wikimedia Commons\n",
+        "    \"dog\": \"https://upload.wikimedia.org/wikipedia/commons/archive/a/a9/20090914031557%21Saluki_dog_breed.jpg\",\n",
+        "    #by Craig Pemberton, CC BY-SA 3.0 <https://creativecommons.org/licenses/by-sa/3.0>, via Wikimedia Commons\n",
+        "    \"apple\": \"https://upload.wikimedia.org/wikipedia/commons/1/15/Red_Apple.jpg\",\n",
+        "    #by Abhijit Tembhekar from Mumbai, India, CC BY 2.0 <https://creativecommons.org/licenses/by/2.0>, via Wikimedia Commons\n",
+        "    \"banana\": \"https://upload.wikimedia.org/wikipedia/commons/1/1c/Bananas_white_background.jpg\",\n",
+        "    #by fir0002  flagstaffotos [at] gmail.com\t\tCanon 20D + Tamron 28-75mm f/2.8, GFDL 1.2 <http://www.gnu.org/licenses/old-licenses/fdl-1.2.html>, via Wikimedia Commons\n",
+        "    \"turtle\": \"https://upload.wikimedia.org/wikipedia/commons/8/80/Turtle_golfina_escobilla_oaxaca_mexico_claudio_giovenzana_2010.jpg\",\n",
+        "    #by Claudio Giovenzana, CC BY-SA 3.0 <https://creativecommons.org/licenses/by-sa/3.0>, via Wikimedia Commons\n",
+        "    \"flamingo\": \"https://upload.wikimedia.org/wikipedia/commons/b/b8/James_Flamingos_MC.jpg\",\n",
+        "    #by Christian Mehlführer, User:Chmehl, CC BY 3.0 <https://creativecommons.org/licenses/by/3.0>, via Wikimedia Commons\n",
+        "    \"piano\": \"https://upload.wikimedia.org/wikipedia/commons/d/da/Steinway_%26_Sons_upright_piano%2C_model_K-132%2C_manufactured_at_Steinway%27s_factory_in_Hamburg%2C_Germany.png\",\n",
+        "    #by \"Photo: © Copyright Steinway & Sons\", CC BY-SA 3.0 <https://creativecommons.org/licenses/by-sa/3.0>, via Wikimedia Commons\n",
+        "    \"honeycomb\": \"https://upload.wikimedia.org/wikipedia/commons/f/f7/Honey_comb.jpg\",\n",
+        "    #by Merdal, CC BY-SA 3.0 <http://creativecommons.org/licenses/by-sa/3.0/>, via Wikimedia Commons\n",
+        "    \"teapot\": \"https://upload.wikimedia.org/wikipedia/commons/4/44/Black_tea_pot_cropped.jpg\",\n",
+        "    #by Mendhak, CC BY-SA 2.0 <https://creativecommons.org/licenses/by-sa/2.0>, via Wikimedia Commons\n",
+        "}\n",
+        "\n",
+        "img_url = images_for_test_map[image_name]\n",
+        "image, original_image = load_image(img_url, image_size, dynamic_size, max_dynamic_size)\n",
+        "show_image(image, 'Scaled image')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "CMwWx8_8Aw3X"
+      },
+      "source": [
+        "Now that the model was chosen, loading it with TensorFlow Hub is simple.\n",
+        "\n",
+        "This also calls the model with a random input as a \"warmup\" run. Subsequent calls are often much faster, and you can compare this with the latency below.\n",
+        "\n",
+        "*Note:* models that use a dynamic size might need a fresh \"warmup\" run for each image size."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "LRAccT3UhRga"
+      },
+      "outputs": [],
+      "source": [
+        "classifier = hub.load(model_handle)\n",
+        "\n",
+        "input_shape = image.shape\n",
+        "warmup_input = tf.random.uniform(input_shape, 0, 1.0)\n",
+        "%time warmup_logits = classifier(warmup_input).numpy()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "e7vkdUqpBkfE"
+      },
+      "source": [
+        "Everything is ready for inference. Here you can see the top 5 results from the model for the selected image."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "I0QNHg3bk-G1"
+      },
+      "outputs": [],
+      "source": [
+        "# Run model on image\n",
+        "%time probabilities = tf.nn.softmax(classifier(image)).numpy()\n",
+        "\n",
+        "top_5 = tf.argsort(probabilities, axis=-1, direction=\"DESCENDING\")[0][:5].numpy()\n",
+        "np_classes = np.array(classes)\n",
+        "\n",
+        "# Some models include an additional 'background' class in the predictions, so\n",
+        "# we must account for this when reading the class labels.\n",
+        "includes_background_class = probabilities.shape[1] == 1001\n",
+        "\n",
+        "for i, item in enumerate(top_5):\n",
+        "  class_index = item if includes_background_class else item + 1\n",
+        "  line = f'({i+1}) {class_index:4} - {classes[class_index]}: {probabilities[0][top_5][i]}'\n",
+        "  print(line)\n",
+        "\n",
+        "show_image(image, '')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "e4IJrq5eZDWl"
+      },
+      "source": [
+        "## Learn More\n",
+        "\n",
+        "If you want to learn more and try how to do Transfer Learning with these models you can try this tutorial: [Transfer Learning for Image classification](https://www.tensorflow.org/hub/tutorials/tf2_image_retraining) \n",
+        "\n",
+        "If you want to check on more image models you can check them out on [tfhub.dev](https://tfhub.dev/s?module-type=image-augmentation,image-classification,image-classification-logits,image-classifier,image-feature-vector,image-generator,image-object-detection,image-others,image-pose-detection,image-segmentation,image-style-transfer,image-super-resolution,image-rnn-agent)"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [],
+      "name": "image_classification.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/image_enhancing.ipynb b/site/en/hub/tutorials/image_enhancing.ipynb
new file mode 100644
index 00000000000..3710ebd6d66
--- /dev/null
+++ b/site/en/hub/tutorials/image_enhancing.ipynb
@@ -0,0 +1,455 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GeerbrLA0uju"
+      },
+      "source": [
+        "##### Copyright 2019 The TensorFlow Hub Authors.\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "\n",
+        "Created by @[Adrish Dey](https://github.com/captain-pool) for [Google Summer of Code](https://summerofcode.withgoogle.com/) 2019"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "yntM0JbY0uj5"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS, \n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "UJeo2a5C0uj2"
+      },
+      "source": [
+        "# Image Super Resolution using ESRGAN"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ps4toA1d_tkc"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/image_enhancing\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/image_enhancing.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/image_enhancing.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/image_enhancing.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/captain-pool/esrgan-tf2/1\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub model</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "LkW9jAmt_zjB"
+      },
+      "source": [
+        "This colab demonstrates use of TensorFlow Hub Module for Enhanced Super Resolution Generative Adversarial Network (*by Xintao Wang et.al.*) [[Paper](https://arxiv.org/pdf/1809.00219.pdf)] [[Code](https://github.com/captain-pool/GSOC/)]\n",
+        "\n",
+        "for image enhancing. *(Preferrably bicubically downsampled images).*\n",
+        "\n",
+        "Model trained on DIV2K Dataset (on bicubically downsampled images) on image patches of size 128 x 128."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "LBGty4O_0ukJ"
+      },
+      "source": [
+        "**Preparing Environment**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "lnyLTyUt0ukN"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "import time\n",
+        "from PIL import Image\n",
+        "import numpy as np\n",
+        "import tensorflow as tf\n",
+        "import tensorflow_hub as hub\n",
+        "import matplotlib.pyplot as plt\n",
+        "os.environ[\"TFHUB_DOWNLOAD_PROGRESS\"] = \"True\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dremsFdh0ukX"
+      },
+      "outputs": [],
+      "source": [
+        "!wget \"https://user-images.githubusercontent.com/12981474/40157448-eff91f06-5953-11e8-9a37-f6b5693fa03f.png\" -O original.png"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "DXot2kru0ukh"
+      },
+      "outputs": [],
+      "source": [
+        "# Declaring Constants\n",
+        "IMAGE_PATH = \"original.png\"\n",
+        "SAVED_MODEL_PATH = \"https://tfhub.dev/captain-pool/esrgan-tf2/1\""
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KF_tHde-p3rn"
+      },
+      "source": [
+        "**Defining Helper Functions**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "IslbQmTj0ukz"
+      },
+      "outputs": [],
+      "source": [
+        "def preprocess_image(image_path):\n",
+        "  \"\"\" Loads image from path and preprocesses to make it model ready\n",
+        "      Args:\n",
+        "        image_path: Path to the image file\n",
+        "  \"\"\"\n",
+        "  hr_image = tf.image.decode_image(tf.io.read_file(image_path))\n",
+        "  # If PNG, remove the alpha channel. The model only supports\n",
+        "  # images with 3 color channels.\n",
+        "  if hr_image.shape[-1] == 4:\n",
+        "    hr_image = hr_image[...,:-1]\n",
+        "  hr_size = (tf.convert_to_tensor(hr_image.shape[:-1]) // 4) * 4\n",
+        "  hr_image = tf.image.crop_to_bounding_box(hr_image, 0, 0, hr_size[0], hr_size[1])\n",
+        "  hr_image = tf.cast(hr_image, tf.float32)\n",
+        "  return tf.expand_dims(hr_image, 0)\n",
+        "\n",
+        "def save_image(image, filename):\n",
+        "  \"\"\"\n",
+        "    Saves unscaled Tensor Images.\n",
+        "    Args:\n",
+        "      image: 3D image tensor. [height, width, channels]\n",
+        "      filename: Name of the file to save.\n",
+        "  \"\"\"\n",
+        "  if not isinstance(image, Image.Image):\n",
+        "    image = tf.clip_by_value(image, 0, 255)\n",
+        "    image = Image.fromarray(tf.cast(image, tf.uint8).numpy())\n",
+        "  image.save(\"%s.jpg\" % filename)\n",
+        "  print(\"Saved as %s.jpg\" % filename)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "uh1E2rBpnWxV"
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline\n",
+        "def plot_image(image, title=\"\"):\n",
+        "  \"\"\"\n",
+        "    Plots images from image tensors.\n",
+        "    Args:\n",
+        "      image: 3D image tensor. [height, width, channels].\n",
+        "      title: Title to display in the plot.\n",
+        "  \"\"\"\n",
+        "  image = np.asarray(image)\n",
+        "  image = tf.clip_by_value(image, 0, 255)\n",
+        "  image = Image.fromarray(tf.cast(image, tf.uint8).numpy())\n",
+        "  plt.imshow(image)\n",
+        "  plt.axis(\"off\")\n",
+        "  plt.title(title)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ycrCTvmlqBMD"
+      },
+      "source": [
+        "#### Performing Super Resolution of images loaded from path"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "L7XpMk8Y0uk7"
+      },
+      "outputs": [],
+      "source": [
+        "hr_image = preprocess_image(IMAGE_PATH)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "hWgCbUa_0ulG"
+      },
+      "outputs": [],
+      "source": [
+        "# Plotting Original Resolution image\n",
+        "plot_image(tf.squeeze(hr_image), title=\"Original Image\")\n",
+        "save_image(tf.squeeze(hr_image), filename=\"Original Image\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ouwEyKLA0ulO"
+      },
+      "outputs": [],
+      "source": [
+        "model = hub.load(SAVED_MODEL_PATH)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dz79ncnT0ulX"
+      },
+      "outputs": [],
+      "source": [
+        "start = time.time()\n",
+        "fake_image = model(hr_image)\n",
+        "fake_image = tf.squeeze(fake_image)\n",
+        "print(\"Time Taken: %f\" % (time.time() - start))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ABjkkJHC2jNL"
+      },
+      "outputs": [],
+      "source": [
+        "# Plotting Super Resolution Image\n",
+        "plot_image(tf.squeeze(fake_image), title=\"Super Resolution\")\n",
+        "save_image(tf.squeeze(fake_image), filename=\"Super Resolution\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tuKu18UYptkx"
+      },
+      "source": [
+        "### Evaluating Performance of the Model"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Qdz55sxMgiwO"
+      },
+      "outputs": [],
+      "source": [
+        "!wget \"https://lh4.googleusercontent.com/-Anmw5df4gj0/AAAAAAAAAAI/AAAAAAAAAAc/6HxU8XFLnQE/photo.jpg64\" -O test.jpg\n",
+        "IMAGE_PATH = \"test.jpg\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "F6tMNtqy0ukq"
+      },
+      "outputs": [],
+      "source": [
+        "# Defining helper functions\n",
+        "def downscale_image(image):\n",
+        "  \"\"\"\n",
+        "      Scales down images using bicubic downsampling.\n",
+        "      Args:\n",
+        "          image: 3D or 4D tensor of preprocessed image\n",
+        "  \"\"\"\n",
+        "  image_size = []\n",
+        "  if len(image.shape) == 3:\n",
+        "    image_size = [image.shape[1], image.shape[0]]\n",
+        "  else:\n",
+        "    raise ValueError(\"Dimension mismatch. Can work only on single image.\")\n",
+        "\n",
+        "  image = tf.squeeze(\n",
+        "      tf.cast(\n",
+        "          tf.clip_by_value(image, 0, 255), tf.uint8))\n",
+        "\n",
+        "  lr_image = np.asarray(\n",
+        "    Image.fromarray(image.numpy())\n",
+        "    .resize([image_size[0] // 4, image_size[1] // 4],\n",
+        "              Image.BICUBIC))\n",
+        "\n",
+        "  lr_image = tf.expand_dims(lr_image, 0)\n",
+        "  lr_image = tf.cast(lr_image, tf.float32)\n",
+        "  return lr_image"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "r2ANR1XDy77I"
+      },
+      "outputs": [],
+      "source": [
+        "hr_image = preprocess_image(IMAGE_PATH)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "r_defaultO6qbTV"
+      },
+      "outputs": [],
+      "source": [
+        "lr_image = downscale_image(tf.squeeze(hr_image))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "jRw1x6xY0ulj"
+      },
+      "outputs": [],
+      "source": [
+        "# Plotting Low Resolution Image\n",
+        "plot_image(tf.squeeze(lr_image), title=\"Low Resolution\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "g--yyHg7qXCw"
+      },
+      "outputs": [],
+      "source": [
+        "model = hub.load(SAVED_MODEL_PATH)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ZX-deZlhqaYz"
+      },
+      "outputs": [],
+      "source": [
+        "start = time.time()\n",
+        "fake_image = model(lr_image)\n",
+        "fake_image = tf.squeeze(fake_image)\n",
+        "print(\"Time Taken: %f\" % (time.time() - start))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "AmSga6MSq1PB"
+      },
+      "outputs": [],
+      "source": [
+        "plot_image(tf.squeeze(fake_image), title=\"Super Resolution\")\n",
+        "# Calculating PSNR wrt Original Image\n",
+        "psnr = tf.image.psnr(\n",
+        "    tf.clip_by_value(fake_image, 0, 255),\n",
+        "    tf.clip_by_value(hr_image, 0, 255), max_val=255)\n",
+        "print(\"PSNR Achieved: %f\" % psnr)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5YTBKCXPq9UZ"
+      },
+      "source": [
+        "**Comparing Outputs size by side.**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ubdupldDypCy"
+      },
+      "outputs": [],
+      "source": [
+        "plt.rcParams['figure.figsize'] = [15, 10]\n",
+        "fig, axes = plt.subplots(1, 3)\n",
+        "fig.tight_layout()\n",
+        "plt.subplot(131)\n",
+        "plot_image(tf.squeeze(hr_image), title=\"Original\")\n",
+        "plt.subplot(132)\n",
+        "fig.tight_layout()\n",
+        "plot_image(tf.squeeze(lr_image), \"x4 Bicubic\")\n",
+        "plt.subplot(133)\n",
+        "fig.tight_layout()\n",
+        "plot_image(tf.squeeze(fake_image), \"Super Resolution\")\n",
+        "plt.savefig(\"ESRGAN_DIV2K.jpg\", bbox_inches=\"tight\")\n",
+        "print(\"PSNR: %f\" % psnr)"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "image_enhancing.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/image_feature_vector.ipynb b/site/en/hub/tutorials/image_feature_vector.ipynb
new file mode 100644
index 00000000000..b5283c45b3d
--- /dev/null
+++ b/site/en/hub/tutorials/image_feature_vector.ipynb
@@ -0,0 +1,533 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ScitaPqhKtuW"
+      },
+      "source": [
+        "##### Copyright 2018 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "bNnChGfZK2_w"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9Z_ZvMk5JPFV"
+      },
+      "source": [
+        "# Classify Flowers with Transfer Learning\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfBg1C5NB3X0"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/image_feature_vector\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/image_feature_vector.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/image_feature_vector.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/image_feature_vector.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/google/imagenet/mobilenet_v2_035_128/feature_vector/2\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub model</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gh-LWtlqLtgH"
+      },
+      "source": [
+        "Have you ever seen a beautiful flower and wondered what kind of flower it is? Well, you're not the first, so let's build a way to identify the type of flower from a photo!\n",
+        "\n",
+        "For classifying images, a particular type of *deep neural network*, called a *convolutional neural network* has proved to be particularly powerful. However, modern convolutional neural networks have millions of parameters. Training them from scratch requires a lot of labeled training data and a lot of computing power (hundreds of GPU-hours or more). We only have about three thousand labeled photos and want to spend much less time, so we need to be more clever.\n",
+        "\n",
+        "We will use a technique called *transfer learning* where we take a pre-trained network (trained on about a million general images), use it to extract features, and train a new layer on top for our own task of classifying images of flowers.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ORy-KvWXGXBo"
+      },
+      "source": [
+        "## Setup\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "NTrs9zBKJK1c"
+      },
+      "outputs": [],
+      "source": [
+        "import collections\n",
+        "import io\n",
+        "import math\n",
+        "import os\n",
+        "import random\n",
+        "from six.moves import urllib\n",
+        "\n",
+        "from IPython.display import clear_output, Image, display, HTML\n",
+        "\n",
+        "import tensorflow.compat.v1 as tf\n",
+        "tf.disable_v2_behavior()\n",
+        "\n",
+        "import tensorflow_hub as hub\n",
+        "\n",
+        "import numpy as np\n",
+        "import matplotlib.pyplot as plt\n",
+        "import seaborn as sns\n",
+        "import sklearn.metrics as sk_metrics\n",
+        "import time"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Do-T63G7NCSB"
+      },
+      "source": [
+        "## The flowers dataset\n",
+        "\n",
+        "The flowers dataset consists of images of flowers with 5 possible class labels.\n",
+        "\n",
+        "When training a machine learning model, we split our data into training and test datasets. We will train the model on our training data and then evaluate how well the model performs on data it has never seen - the test set.\n",
+        "\n",
+        "Let's download our training and test examples (it may take a while) and split them into train and test sets.\n",
+        "\n",
+        "Run the following two cells:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "both",
+        "id": "HYQr1SILIxSK"
+      },
+      "outputs": [],
+      "source": [
+        "FLOWERS_DIR = './flower_photos'\n",
+        "TRAIN_FRACTION = 0.8\n",
+        "RANDOM_SEED = 2018\n",
+        "\n",
+        "\n",
+        "def download_images():\n",
+        "  \"\"\"If the images aren't already downloaded, save them to FLOWERS_DIR.\"\"\"\n",
+        "  if not os.path.exists(FLOWERS_DIR):\n",
+        "    DOWNLOAD_URL = 'http://download.tensorflow.org/example_images/flower_photos.tgz'\n",
+        "    print('Downloading flower images from %s...' % DOWNLOAD_URL)\n",
+        "    urllib.request.urlretrieve(DOWNLOAD_URL, 'flower_photos.tgz')\n",
+        "    !tar xfz flower_photos.tgz\n",
+        "  print('Flower photos are located in %s' % FLOWERS_DIR)\n",
+        "\n",
+        "\n",
+        "def make_train_and_test_sets():\n",
+        "  \"\"\"Split the data into train and test sets and get the label classes.\"\"\"\n",
+        "  train_examples, test_examples = [], []\n",
+        "  shuffler = random.Random(RANDOM_SEED)\n",
+        "  is_root = True\n",
+        "  for (dirname, subdirs, filenames) in tf.gfile.Walk(FLOWERS_DIR):\n",
+        "    # The root directory gives us the classes\n",
+        "    if is_root:\n",
+        "      subdirs = sorted(subdirs)\n",
+        "      classes = collections.OrderedDict(enumerate(subdirs))\n",
+        "      label_to_class = dict([(x, i) for i, x in enumerate(subdirs)])\n",
+        "      is_root = False\n",
+        "    # The sub directories give us the image files for training.\n",
+        "    else:\n",
+        "      filenames.sort()\n",
+        "      shuffler.shuffle(filenames)\n",
+        "      full_filenames = [os.path.join(dirname, f) for f in filenames]\n",
+        "      label = dirname.split('/')[-1]\n",
+        "      label_class = label_to_class[label]\n",
+        "      # An example is the image file and it's label class.\n",
+        "      examples = list(zip(full_filenames, [label_class] * len(filenames)))\n",
+        "      num_train = int(len(filenames) * TRAIN_FRACTION)\n",
+        "      train_examples.extend(examples[:num_train])\n",
+        "      test_examples.extend(examples[num_train:])\n",
+        "\n",
+        "  shuffler.shuffle(train_examples)\n",
+        "  shuffler.shuffle(test_examples)\n",
+        "  return train_examples, test_examples, classes\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "_9NklpcANhtB"
+      },
+      "outputs": [],
+      "source": [
+        "# Download the images and split the images into train and test sets.\n",
+        "download_images()\n",
+        "TRAIN_EXAMPLES, TEST_EXAMPLES, CLASSES = make_train_and_test_sets()\n",
+        "NUM_CLASSES = len(CLASSES)\n",
+        "\n",
+        "print('\\nThe dataset has %d label classes: %s' % (NUM_CLASSES, CLASSES.values()))\n",
+        "print('There are %d training images' % len(TRAIN_EXAMPLES))\n",
+        "print('there are %d test images' % len(TEST_EXAMPLES))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tHF7bHTfnD6S"
+      },
+      "source": [
+        "## Explore the data\n",
+        "\n",
+        "The flowers dataset consists of examples which are labeled images of flowers. Each example contains a JPEG flower image and the class label: what type of flower it is. Let's display a few images together with their labels."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "both",
+        "id": "1friUvN6kPYM"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Show some labeled images\n",
+        "def get_label(example):\n",
+        "  \"\"\"Get the label (number) for given example.\"\"\"\n",
+        "  return example[1]\n",
+        "\n",
+        "def get_class(example):\n",
+        "  \"\"\"Get the class (string) of given example.\"\"\"\n",
+        "  return CLASSES[get_label(example)]\n",
+        "\n",
+        "def get_encoded_image(example):\n",
+        "  \"\"\"Get the image data (encoded jpg) of given example.\"\"\"\n",
+        "  image_path = example[0]\n",
+        "  return tf.gfile.GFile(image_path, 'rb').read()\n",
+        "\n",
+        "def get_image(example):\n",
+        "  \"\"\"Get image as np.array of pixels for given example.\"\"\"\n",
+        "  return plt.imread(io.BytesIO(get_encoded_image(example)), format='jpg')\n",
+        "\n",
+        "def display_images(images_and_classes, cols=5):\n",
+        "  \"\"\"Display given images and their labels in a grid.\"\"\"\n",
+        "  rows = int(math.ceil(len(images_and_classes) / cols))\n",
+        "  fig = plt.figure()\n",
+        "  fig.set_size_inches(cols * 3, rows * 3)\n",
+        "  for i, (image, flower_class) in enumerate(images_and_classes):\n",
+        "    plt.subplot(rows, cols, i + 1)\n",
+        "    plt.axis('off')\n",
+        "    plt.imshow(image)\n",
+        "    plt.title(flower_class)\n",
+        "\n",
+        "NUM_IMAGES = 15 #@param {type: 'integer'}\n",
+        "display_images([(get_image(example), get_class(example))\n",
+        "               for example in TRAIN_EXAMPLES[:NUM_IMAGES]])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Hyjr6PuboTAg"
+      },
+      "source": [
+        "## Build the model\n",
+        "\n",
+        "We will load a [TF-Hub](https://tensorflow.org/hub) image feature vector module, stack a linear classifier on it, and add training and evaluation ops. The following cell builds a TF graph describing the model and its training, but it doesn't run the training (that will be the next step)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "LbkSRaK_oW5Y"
+      },
+      "outputs": [],
+      "source": [
+        "LEARNING_RATE = 0.01\n",
+        "\n",
+        "tf.reset_default_graph()\n",
+        "\n",
+        "# Load a pre-trained TF-Hub module for extracting features from images. We've\n",
+        "# chosen this particular module for speed, but many other choices are available.\n",
+        "image_module = hub.Module('https://tfhub.dev/google/imagenet/mobilenet_v2_035_128/feature_vector/2')\n",
+        "\n",
+        "# Preprocessing images into tensors with size expected by the image module.\n",
+        "encoded_images = tf.placeholder(tf.string, shape=[None])\n",
+        "image_size = hub.get_expected_image_size(image_module)\n",
+        "\n",
+        "\n",
+        "def decode_and_resize_image(encoded):\n",
+        "  decoded = tf.image.decode_jpeg(encoded, channels=3)\n",
+        "  decoded = tf.image.convert_image_dtype(decoded, tf.float32)\n",
+        "  return tf.image.resize_images(decoded, image_size)\n",
+        "\n",
+        "\n",
+        "batch_images = tf.map_fn(decode_and_resize_image, encoded_images, dtype=tf.float32)\n",
+        "\n",
+        "# The image module can be applied as a function to extract feature vectors for a\n",
+        "# batch of images.\n",
+        "features = image_module(batch_images)\n",
+        "\n",
+        "\n",
+        "def create_model(features):\n",
+        "  \"\"\"Build a model for classification from extracted features.\"\"\"\n",
+        "  # Currently, the model is just a single linear layer. You can try to add\n",
+        "  # another layer, but be careful... two linear layers (when activation=None)\n",
+        "  # are equivalent to a single linear layer. You can create a nonlinear layer\n",
+        "  # like this:\n",
+        "  # layer = tf.layers.dense(inputs=..., units=..., activation=tf.nn.relu)\n",
+        "  layer = tf.layers.dense(inputs=features, units=NUM_CLASSES, activation=None)\n",
+        "  return layer\n",
+        "\n",
+        "\n",
+        "# For each class (kind of flower), the model outputs some real number as a score\n",
+        "# how much the input resembles this class. This vector of numbers is often\n",
+        "# called the \"logits\".\n",
+        "logits = create_model(features)\n",
+        "labels = tf.placeholder(tf.float32, [None, NUM_CLASSES])\n",
+        "\n",
+        "# Mathematically, a good way to measure how much the predicted probabilities\n",
+        "# diverge from the truth is the \"cross-entropy\" between the two probability\n",
+        "# distributions. For numerical stability, this is best done directly from the\n",
+        "# logits, not the probabilities extracted from them.\n",
+        "cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=labels)\n",
+        "cross_entropy_mean = tf.reduce_mean(cross_entropy)\n",
+        "\n",
+        "# Let's add an optimizer so we can train the network.\n",
+        "optimizer = tf.train.GradientDescentOptimizer(learning_rate=LEARNING_RATE)\n",
+        "train_op = optimizer.minimize(loss=cross_entropy_mean)\n",
+        "\n",
+        "# The \"softmax\" function transforms the logits vector into a vector of\n",
+        "# probabilities: non-negative numbers that sum up to one, and the i-th number\n",
+        "# says how likely the input comes from class i.\n",
+        "probabilities = tf.nn.softmax(logits)\n",
+        "\n",
+        "# We choose the highest one as the predicted class.\n",
+        "prediction = tf.argmax(probabilities, 1)\n",
+        "correct_prediction = tf.equal(prediction, tf.argmax(labels, 1))\n",
+        "\n",
+        "# The accuracy will allow us to eval on our test set. \n",
+        "accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0vvhYQ7-3AG_"
+      },
+      "source": [
+        "## Train the network\n",
+        "\n",
+        "Now that our model is built, let's train it and see how it performs on our test set."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "1YnBg7-OS3Dz"
+      },
+      "outputs": [],
+      "source": [
+        "# How long will we train the network (number of batches).\n",
+        "NUM_TRAIN_STEPS = 100 #@param {type: 'integer'}\n",
+        "# How many training examples we use in each step.\n",
+        "TRAIN_BATCH_SIZE = 10 #@param {type: 'integer'}\n",
+        "# How often to evaluate the model performance.\n",
+        "EVAL_EVERY = 10 #@param {type: 'integer'}\n",
+        "\n",
+        "def get_batch(batch_size=None, test=False):\n",
+        "  \"\"\"Get a random batch of examples.\"\"\"\n",
+        "  examples = TEST_EXAMPLES if test else TRAIN_EXAMPLES\n",
+        "  batch_examples = random.sample(examples, batch_size) if batch_size else examples\n",
+        "  return batch_examples\n",
+        "\n",
+        "def get_images_and_labels(batch_examples):\n",
+        "  images = [get_encoded_image(e) for e in batch_examples]\n",
+        "  one_hot_labels = [get_label_one_hot(e) for e in batch_examples]\n",
+        "  return images, one_hot_labels\n",
+        "\n",
+        "def get_label_one_hot(example):\n",
+        "  \"\"\"Get the one hot encoding vector for the example.\"\"\"\n",
+        "  one_hot_vector = np.zeros(NUM_CLASSES)\n",
+        "  np.put(one_hot_vector, get_label(example), 1)\n",
+        "  return one_hot_vector\n",
+        "\n",
+        "with tf.Session() as sess:\n",
+        "  sess.run(tf.global_variables_initializer())\n",
+        "  for i in range(NUM_TRAIN_STEPS):\n",
+        "    # Get a random batch of training examples.\n",
+        "    train_batch = get_batch(batch_size=TRAIN_BATCH_SIZE)\n",
+        "    batch_images, batch_labels = get_images_and_labels(train_batch)\n",
+        "    # Run the train_op to train the model.\n",
+        "    train_loss, _, train_accuracy = sess.run(\n",
+        "        [cross_entropy_mean, train_op, accuracy],\n",
+        "        feed_dict={encoded_images: batch_images, labels: batch_labels})\n",
+        "    is_final_step = (i == (NUM_TRAIN_STEPS - 1))\n",
+        "    if i % EVAL_EVERY == 0 or is_final_step:\n",
+        "      # Get a batch of test examples.\n",
+        "      test_batch = get_batch(batch_size=None, test=True)\n",
+        "      batch_images, batch_labels = get_images_and_labels(test_batch)\n",
+        "      # Evaluate how well our model performs on the test set.\n",
+        "      test_loss, test_accuracy, test_prediction, correct_predicate = sess.run(\n",
+        "        [cross_entropy_mean, accuracy, prediction, correct_prediction],\n",
+        "        feed_dict={encoded_images: batch_images, labels: batch_labels})\n",
+        "      print('Test accuracy at step %s: %.2f%%' % (i, (test_accuracy * 100)))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ZFUNJxuH2t0V"
+      },
+      "outputs": [],
+      "source": [
+        "def show_confusion_matrix(test_labels, predictions):\n",
+        "  \"\"\"Compute confusion matrix and normalize.\"\"\"\n",
+        "  confusion = sk_metrics.confusion_matrix(\n",
+        "    np.argmax(test_labels, axis=1), predictions)\n",
+        "  confusion_normalized = confusion.astype(\"float\") / confusion.sum(axis=1)\n",
+        "  axis_labels = list(CLASSES.values())\n",
+        "  ax = sns.heatmap(\n",
+        "      confusion_normalized, xticklabels=axis_labels, yticklabels=axis_labels,\n",
+        "      cmap='Blues', annot=True, fmt='.2f', square=True)\n",
+        "  plt.title(\"Confusion matrix\")\n",
+        "  plt.ylabel(\"True label\")\n",
+        "  plt.xlabel(\"Predicted label\")\n",
+        "\n",
+        "show_confusion_matrix(batch_labels, test_prediction)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Uu3vo8DK8BdL"
+      },
+      "source": [
+        "## Incorrect predictions\n",
+        "\n",
+        "Let's a take a closer look at the test examples that our model got wrong.\n",
+        "\n",
+        "- Are there any mislabeled examples in our test set?\n",
+        "- Is there any bad data in the test set - images that aren't actually pictures of flowers?\n",
+        "- Are there images where you can understand why the model made a mistake?"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "hqa0V3WN8C9M"
+      },
+      "outputs": [],
+      "source": [
+        "incorrect = [\n",
+        "    (example, CLASSES[prediction])\n",
+        "    for example, prediction, is_correct in zip(test_batch, test_prediction, correct_predicate)\n",
+        "    if not is_correct\n",
+        "]\n",
+        "display_images(\n",
+        "  [(get_image(example), \"prediction: {0}\\nlabel:{1}\".format(incorrect_prediction, get_class(example)))\n",
+        "   for (example, incorrect_prediction) in incorrect[:20]])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YN_s04Il8TvK"
+      },
+      "source": [
+        "## Exercises: Improve the model!\n",
+        "\n",
+        "We've trained a baseline model, now let's try to improve it to achieve better accuracy. (Remember that you'll need to re-run the cells when you make a change.)\n",
+        "\n",
+        "### Exercise 1:  Try a different image model.\n",
+        "With TF-Hub, trying a few different image models is simple. Just replace the `\"https://tfhub.dev/google/imagenet/mobilenet_v2_050_128/feature_vector/2\"` handle in the `hub.Module()` call with a handle of different module and rerun all the code. You can see all available image modules at [tfhub.dev](https://tfhub.dev/s?module-type=image-feature-vector). \n",
+        "\n",
+        "A good choice might be one of the other [MobileNet V2 modules](https://tfhub.dev/s?module-type=image-feature-vector&network-architecture=mobilenet-v2). Many of the modules -- including the MobileNet modules -- were trained on the [ImageNet dataset](https://www.tensorflow.org/datasets/catalog/imagenet2012) which contains over 1 million images and 1000 classes. Choosing a network architecture provides a tradeoff between speed and classification accuracy: models like MobileNet or NASNet Mobile are fast and small, more traditional architectures like Inception and ResNet were designed for accuracy.\n",
+        "\n",
+        "For the larger Inception V3 architecture, you can also explore the benefits of pre-training on a domain closer to your own task: it is also available as a [module trained on the iNaturalist dataset](https://tfhub.dev/google/inaturalist/inception_v3/feature_vector/1) of plants and animals.\n",
+        "\n",
+        "### Exercise 2: Add a hidden layer.\n",
+        "Stack a hidden layer between extracted image features and the linear classifier (in function `create_model()` above). To create a non-linear hidden layer with e.g. 100 nodes, use  [tf.layers.dense](https://www.tensorflow.org/api_docs/python/tf/compat/v1/layers/dense) with units set to 100 and activation set to `tf.nn.relu`. Does changing the size of the hidden layer affect the test accuracy? Does adding second hidden layer improve the accuracy?\n",
+        "\n",
+        "### Exercise 3: Change hyperparameters.\n",
+        "Does increasing  *number of training steps*  improves final accuracy? Can you *change the learning rate* to make your model converge more quickly? Does the training *batch size* affect your model's performance?\n",
+        "\n",
+        "### Exercise 4: Try a different optimizer.\n",
+        "\n",
+        "Replace the basic GradientDescentOptimizer with a more sophisticate optimizer, e.g. [AdagradOptimizer](https://www.tensorflow.org/api_docs/python/tf/compat/v1/train/AdagradOptimizer). Does it make a difference to your model training? If you want to learn more about the benefits of different optimization algorithms, check out [this post](http://ruder.io/optimizing-gradient-descent/)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kdwVXO1eJS5-"
+      },
+      "source": [
+        "## Want to learn more?\n",
+        "\n",
+        "If you are interested in a more advanced version of this tutorial, check out the [TensorFlow image retraining tutorial](https://www.tensorflow.org/hub/tutorials/image_retraining) which walks you through visualizing the training using TensorBoard, advanced techniques like dataset augmentation by distorting images, and replacing the flowers dataset to learn an image classifier on your own dataset.\n",
+        "\n",
+        "You can learn more about TensorFlow at [tensorflow.org](http://tensorflow.org) and see the TF-Hub API documentation is available at [tensorflow.org/hub](https://www.tensorflow.org/hub/). Find available TensorFlow Hub modules at [tfhub.dev](http://tfhub.dev) including more image feature vector modules and text embedding modules.\n",
+        "\n",
+        "Also check out the [Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course/) which is Google's fast-paced, practical introduction to machine learning."
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [
+        "ScitaPqhKtuW"
+      ],
+      "name": "image_feature_vector.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/movenet.ipynb b/site/en/hub/tutorials/movenet.ipynb
new file mode 100644
index 00000000000..f7955a5253b
--- /dev/null
+++ b/site/en/hub/tutorials/movenet.ipynb
@@ -0,0 +1,816 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "toCy3v03Dwx7"
+      },
+      "source": [
+        "##### Copyright 2021 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "QKe-ubNcDvgv"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2021 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KqtQzBCpIJ7Y"
+      },
+      "source": [
+        "# MoveNet: Ultra fast and accurate pose detection model."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MCmFOosnSkCd"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/movenet\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/movenet.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/movenet.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/movenet.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/s?q=movenet\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub models</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6x99e0aEY_d6"
+      },
+      "source": [
+        "**[MoveNet](https://t.co/QpfnVL0YYI?amp=1)** is an ultra fast and accurate model that detects 17 keypoints of a body. The model is offered on [TF Hub](https://tfhub.dev/s?q=movenet) with two variants, known as Lightning and Thunder. Lightning is intended for latency-critical applications, while Thunder is intended for applications that require high accuracy. Both models run faster than real time (30+ FPS) on most modern desktops, laptops, and phones, which proves crucial for live fitness, health, and wellness applications.\n",
+        "\n",
+        "\n",
+        "<img src=\"https://github.com/tensorflow/tfjs-models/raw/master/pose-detection/assets/combined_squat_dance.gif\" alt=\"drawing\"/>\n",
+        "\n",
+        "*Images downloaded from Pexels (https://www.pexels.com/)\n",
+        "\n",
+        "This Colab walks you through the details of how to load MoveNet, and run inference on the input image and video below.\n",
+        "\n",
+        "Note: check out the [live demo](https://storage.googleapis.com/tfjs-models/demos/pose-detection/index.html?model=movenet) for how the model works!"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "10_zkgbZBkIE"
+      },
+      "source": [
+        "# Human Pose Estimation with MoveNet"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9u_VGR6_BmbZ"
+      },
+      "source": [
+        "## Visualization libraries & Imports"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "TtcwSIcgbIVN"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install -q imageio\n",
+        "!pip install -q opencv-python\n",
+        "!pip install -q git+https://github.com/tensorflow/docs"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9BLeJv-pCCld"
+      },
+      "outputs": [],
+      "source": [
+        "import tensorflow as tf\n",
+        "import tensorflow_hub as hub\n",
+        "from tensorflow_docs.vis import embed\n",
+        "import numpy as np\n",
+        "import cv2\n",
+        "\n",
+        "# Import matplotlib libraries\n",
+        "from matplotlib import pyplot as plt\n",
+        "from matplotlib.collections import LineCollection\n",
+        "import matplotlib.patches as patches\n",
+        "\n",
+        "# Some modules to display an animation using imageio.\n",
+        "import imageio\n",
+        "from IPython.display import HTML, display"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "bEJBMeRb3YUy"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Helper functions for visualization\n",
+        "\n",
+        "# Dictionary that maps from joint names to keypoint indices.\n",
+        "KEYPOINT_DICT = {\n",
+        "    'nose': 0,\n",
+        "    'left_eye': 1,\n",
+        "    'right_eye': 2,\n",
+        "    'left_ear': 3,\n",
+        "    'right_ear': 4,\n",
+        "    'left_shoulder': 5,\n",
+        "    'right_shoulder': 6,\n",
+        "    'left_elbow': 7,\n",
+        "    'right_elbow': 8,\n",
+        "    'left_wrist': 9,\n",
+        "    'right_wrist': 10,\n",
+        "    'left_hip': 11,\n",
+        "    'right_hip': 12,\n",
+        "    'left_knee': 13,\n",
+        "    'right_knee': 14,\n",
+        "    'left_ankle': 15,\n",
+        "    'right_ankle': 16\n",
+        "}\n",
+        "\n",
+        "# Maps bones to a matplotlib color name.\n",
+        "KEYPOINT_EDGE_INDS_TO_COLOR = {\n",
+        "    (0, 1): 'm',\n",
+        "    (0, 2): 'c',\n",
+        "    (1, 3): 'm',\n",
+        "    (2, 4): 'c',\n",
+        "    (0, 5): 'm',\n",
+        "    (0, 6): 'c',\n",
+        "    (5, 7): 'm',\n",
+        "    (7, 9): 'm',\n",
+        "    (6, 8): 'c',\n",
+        "    (8, 10): 'c',\n",
+        "    (5, 6): 'y',\n",
+        "    (5, 11): 'm',\n",
+        "    (6, 12): 'c',\n",
+        "    (11, 12): 'y',\n",
+        "    (11, 13): 'm',\n",
+        "    (13, 15): 'm',\n",
+        "    (12, 14): 'c',\n",
+        "    (14, 16): 'c'\n",
+        "}\n",
+        "\n",
+        "def _keypoints_and_edges_for_display(keypoints_with_scores,\n",
+        "                                     height,\n",
+        "                                     width,\n",
+        "                                     keypoint_threshold=0.11):\n",
+        "  \"\"\"Returns high confidence keypoints and edges for visualization.\n",
+        "\n",
+        "  Args:\n",
+        "    keypoints_with_scores: A numpy array with shape [1, 1, 17, 3] representing\n",
+        "      the keypoint coordinates and scores returned from the MoveNet model.\n",
+        "    height: height of the image in pixels.\n",
+        "    width: width of the image in pixels.\n",
+        "    keypoint_threshold: minimum confidence score for a keypoint to be\n",
+        "      visualized.\n",
+        "\n",
+        "  Returns:\n",
+        "    A (keypoints_xy, edges_xy, edge_colors) containing:\n",
+        "      * the coordinates of all keypoints of all detected entities;\n",
+        "      * the coordinates of all skeleton edges of all detected entities;\n",
+        "      * the colors in which the edges should be plotted.\n",
+        "  \"\"\"\n",
+        "  keypoints_all = []\n",
+        "  keypoint_edges_all = []\n",
+        "  edge_colors = []\n",
+        "  num_instances, _, _, _ = keypoints_with_scores.shape\n",
+        "  for idx in range(num_instances):\n",
+        "    kpts_x = keypoints_with_scores[0, idx, :, 1]\n",
+        "    kpts_y = keypoints_with_scores[0, idx, :, 0]\n",
+        "    kpts_scores = keypoints_with_scores[0, idx, :, 2]\n",
+        "    kpts_absolute_xy = np.stack(\n",
+        "        [width * np.array(kpts_x), height * np.array(kpts_y)], axis=-1)\n",
+        "    kpts_above_thresh_absolute = kpts_absolute_xy[\n",
+        "        kpts_scores > keypoint_threshold, :]\n",
+        "    keypoints_all.append(kpts_above_thresh_absolute)\n",
+        "\n",
+        "    for edge_pair, color in KEYPOINT_EDGE_INDS_TO_COLOR.items():\n",
+        "      if (kpts_scores[edge_pair[0]] > keypoint_threshold and\n",
+        "          kpts_scores[edge_pair[1]] > keypoint_threshold):\n",
+        "        x_start = kpts_absolute_xy[edge_pair[0], 0]\n",
+        "        y_start = kpts_absolute_xy[edge_pair[0], 1]\n",
+        "        x_end = kpts_absolute_xy[edge_pair[1], 0]\n",
+        "        y_end = kpts_absolute_xy[edge_pair[1], 1]\n",
+        "        line_seg = np.array([[x_start, y_start], [x_end, y_end]])\n",
+        "        keypoint_edges_all.append(line_seg)\n",
+        "        edge_colors.append(color)\n",
+        "  if keypoints_all:\n",
+        "    keypoints_xy = np.concatenate(keypoints_all, axis=0)\n",
+        "  else:\n",
+        "    keypoints_xy = np.zeros((0, 17, 2))\n",
+        "\n",
+        "  if keypoint_edges_all:\n",
+        "    edges_xy = np.stack(keypoint_edges_all, axis=0)\n",
+        "  else:\n",
+        "    edges_xy = np.zeros((0, 2, 2))\n",
+        "  return keypoints_xy, edges_xy, edge_colors\n",
+        "\n",
+        "\n",
+        "def draw_prediction_on_image(\n",
+        "    image, keypoints_with_scores, crop_region=None, close_figure=False,\n",
+        "    output_image_height=None):\n",
+        "  \"\"\"Draws the keypoint predictions on image.\n",
+        "\n",
+        "  Args:\n",
+        "    image: A numpy array with shape [height, width, channel] representing the\n",
+        "      pixel values of the input image.\n",
+        "    keypoints_with_scores: A numpy array with shape [1, 1, 17, 3] representing\n",
+        "      the keypoint coordinates and scores returned from the MoveNet model.\n",
+        "    crop_region: A dictionary that defines the coordinates of the bounding box\n",
+        "      of the crop region in normalized coordinates (see the init_crop_region\n",
+        "      function below for more detail). If provided, this function will also\n",
+        "      draw the bounding box on the image.\n",
+        "    output_image_height: An integer indicating the height of the output image.\n",
+        "      Note that the image aspect ratio will be the same as the input image.\n",
+        "\n",
+        "  Returns:\n",
+        "    A numpy array with shape [out_height, out_width, channel] representing the\n",
+        "    image overlaid with keypoint predictions.\n",
+        "  \"\"\"\n",
+        "  height, width, channel = image.shape\n",
+        "  aspect_ratio = float(width) / height\n",
+        "  fig, ax = plt.subplots(figsize=(12 * aspect_ratio, 12))\n",
+        "  # To remove the huge white borders\n",
+        "  fig.tight_layout(pad=0)\n",
+        "  ax.margins(0)\n",
+        "  ax.set_yticklabels([])\n",
+        "  ax.set_xticklabels([])\n",
+        "  plt.axis('off')\n",
+        "\n",
+        "  im = ax.imshow(image)\n",
+        "  line_segments = LineCollection([], linewidths=(4), linestyle='solid')\n",
+        "  ax.add_collection(line_segments)\n",
+        "  # Turn off tick labels\n",
+        "  scat = ax.scatter([], [], s=60, color='#FF1493', zorder=3)\n",
+        "\n",
+        "  (keypoint_locs, keypoint_edges,\n",
+        "   edge_colors) = _keypoints_and_edges_for_display(\n",
+        "       keypoints_with_scores, height, width)\n",
+        "\n",
+        "  line_segments.set_segments(keypoint_edges)\n",
+        "  line_segments.set_color(edge_colors)\n",
+        "  if keypoint_edges.shape[0]:\n",
+        "    line_segments.set_segments(keypoint_edges)\n",
+        "    line_segments.set_color(edge_colors)\n",
+        "  if keypoint_locs.shape[0]:\n",
+        "    scat.set_offsets(keypoint_locs)\n",
+        "\n",
+        "  if crop_region is not None:\n",
+        "    xmin = max(crop_region['x_min'] * width, 0.0)\n",
+        "    ymin = max(crop_region['y_min'] * height, 0.0)\n",
+        "    rec_width = min(crop_region['x_max'], 0.99) * width - xmin\n",
+        "    rec_height = min(crop_region['y_max'], 0.99) * height - ymin\n",
+        "    rect = patches.Rectangle(\n",
+        "        (xmin,ymin),rec_width,rec_height,\n",
+        "        linewidth=1,edgecolor='b',facecolor='none')\n",
+        "    ax.add_patch(rect)\n",
+        "\n",
+        "  fig.canvas.draw()\n",
+        "  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)\n",
+        "  image_from_plot = image_from_plot.reshape(\n",
+        "      fig.canvas.get_width_height()[::-1] + (3,))\n",
+        "  plt.close(fig)\n",
+        "  if output_image_height is not None:\n",
+        "    output_image_width = int(output_image_height / height * width)\n",
+        "    image_from_plot = cv2.resize(\n",
+        "        image_from_plot, dsize=(output_image_width, output_image_height),\n",
+        "         interpolation=cv2.INTER_CUBIC)\n",
+        "  return image_from_plot\n",
+        "\n",
+        "def to_gif(images, duration):\n",
+        "  \"\"\"Converts image sequence (4D numpy array) to gif.\"\"\"\n",
+        "  imageio.mimsave('./animation.gif', images, duration=duration)\n",
+        "  return embed.embed_file('./animation.gif')\n",
+        "\n",
+        "def progress(value, max=100):\n",
+        "  return HTML(\"\"\"\n",
+        "      <progress\n",
+        "          value='{value}'\n",
+        "          max='{max}',\n",
+        "          style='width: 100%'\n",
+        "      >\n",
+        "          {value}\n",
+        "      </progress>\n",
+        "  \"\"\".format(value=value, max=max))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "UvrN0iQiOxhR"
+      },
+      "source": [
+        "## Load Model from TF hub"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "zeGHgANcT7a1"
+      },
+      "outputs": [],
+      "source": [
+        "model_name = \"movenet_lightning\" #@param [\"movenet_lightning\", \"movenet_thunder\", \"movenet_lightning_f16.tflite\", \"movenet_thunder_f16.tflite\", \"movenet_lightning_int8.tflite\", \"movenet_thunder_int8.tflite\"]\n",
+        "\n",
+        "if \"tflite\" in model_name:\n",
+        "  if \"movenet_lightning_f16\" in model_name:\n",
+        "    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/lightning/tflite/float16/4?lite-format=tflite\n",
+        "    input_size = 192\n",
+        "  elif \"movenet_thunder_f16\" in model_name:\n",
+        "    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/thunder/tflite/float16/4?lite-format=tflite\n",
+        "    input_size = 256\n",
+        "  elif \"movenet_lightning_int8\" in model_name:\n",
+        "    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/lightning/tflite/int8/4?lite-format=tflite\n",
+        "    input_size = 192\n",
+        "  elif \"movenet_thunder_int8\" in model_name:\n",
+        "    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/thunder/tflite/int8/4?lite-format=tflite\n",
+        "    input_size = 256\n",
+        "  else:\n",
+        "    raise ValueError(\"Unsupported model name: %s\" % model_name)\n",
+        "\n",
+        "  # Initialize the TFLite interpreter\n",
+        "  interpreter = tf.lite.Interpreter(model_path=\"model.tflite\")\n",
+        "  interpreter.allocate_tensors()\n",
+        "\n",
+        "  def movenet(input_image):\n",
+        "    \"\"\"Runs detection on an input image.\n",
+        "\n",
+        "    Args:\n",
+        "      input_image: A [1, height, width, 3] tensor represents the input image\n",
+        "        pixels. Note that the height/width should already be resized and match the\n",
+        "        expected input resolution of the model before passing into this function.\n",
+        "\n",
+        "    Returns:\n",
+        "      A [1, 1, 17, 3] float numpy array representing the predicted keypoint\n",
+        "      coordinates and scores.\n",
+        "    \"\"\"\n",
+        "    # TF Lite format expects tensor type of uint8.\n",
+        "    input_image = tf.cast(input_image, dtype=tf.uint8)\n",
+        "    input_details = interpreter.get_input_details()\n",
+        "    output_details = interpreter.get_output_details()\n",
+        "    interpreter.set_tensor(input_details[0]['index'], input_image.numpy())\n",
+        "    # Invoke inference.\n",
+        "    interpreter.invoke()\n",
+        "    # Get the model prediction.\n",
+        "    keypoints_with_scores = interpreter.get_tensor(output_details[0]['index'])\n",
+        "    return keypoints_with_scores\n",
+        "\n",
+        "else:\n",
+        "  if \"movenet_lightning\" in model_name:\n",
+        "    module = hub.load(\"https://tfhub.dev/google/movenet/singlepose/lightning/4\")\n",
+        "    input_size = 192\n",
+        "  elif \"movenet_thunder\" in model_name:\n",
+        "    module = hub.load(\"https://tfhub.dev/google/movenet/singlepose/thunder/4\")\n",
+        "    input_size = 256\n",
+        "  else:\n",
+        "    raise ValueError(\"Unsupported model name: %s\" % model_name)\n",
+        "\n",
+        "  def movenet(input_image):\n",
+        "    \"\"\"Runs detection on an input image.\n",
+        "\n",
+        "    Args:\n",
+        "      input_image: A [1, height, width, 3] tensor represents the input image\n",
+        "        pixels. Note that the height/width should already be resized and match the\n",
+        "        expected input resolution of the model before passing into this function.\n",
+        "\n",
+        "    Returns:\n",
+        "      A [1, 1, 17, 3] float numpy array representing the predicted keypoint\n",
+        "      coordinates and scores.\n",
+        "    \"\"\"\n",
+        "    model = module.signatures['serving_default']\n",
+        "\n",
+        "    # SavedModel format expects tensor type of int32.\n",
+        "    input_image = tf.cast(input_image, dtype=tf.int32)\n",
+        "    # Run model inference.\n",
+        "    outputs = model(input_image)\n",
+        "    # Output is a [1, 1, 17, 3] tensor.\n",
+        "    keypoints_with_scores = outputs['output_0'].numpy()\n",
+        "    return keypoints_with_scores"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-h1qHYaqD9ap"
+      },
+      "source": [
+        "## Single Image Example"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ymTVR2I9x22I"
+      },
+      "source": [
+        "This session demonstrates the minimum working example of running the model on a **single image** to predict the 17 human keypoints."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5I3xBq80E3N_"
+      },
+      "source": [
+        "### Load Input Image"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "GMO4B-wx5psP"
+      },
+      "outputs": [],
+      "source": [
+        "!curl -o input_image.jpeg https://images.pexels.com/photos/4384679/pexels-photo-4384679.jpeg --silent"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "lJZYQ8KYFQ6x"
+      },
+      "outputs": [],
+      "source": [
+        "# Load the input image.\n",
+        "image_path = 'input_image.jpeg'\n",
+        "image = tf.io.read_file(image_path)\n",
+        "image = tf.image.decode_jpeg(image)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "S_UWRdQxE6WN"
+      },
+      "source": [
+        "### Run Inference"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "VHmTwACwFW-v"
+      },
+      "outputs": [],
+      "source": [
+        "# Resize and pad the image to keep the aspect ratio and fit the expected size.\n",
+        "input_image = tf.expand_dims(image, axis=0)\n",
+        "input_image = tf.image.resize_with_pad(input_image, input_size, input_size)\n",
+        "\n",
+        "# Run model inference.\n",
+        "keypoints_with_scores = movenet(input_image)\n",
+        "\n",
+        "# Visualize the predictions with image.\n",
+        "display_image = tf.expand_dims(image, axis=0)\n",
+        "display_image = tf.cast(tf.image.resize_with_pad(\n",
+        "    display_image, 1280, 1280), dtype=tf.int32)\n",
+        "output_overlay = draw_prediction_on_image(\n",
+        "    np.squeeze(display_image.numpy(), axis=0), keypoints_with_scores)\n",
+        "\n",
+        "plt.figure(figsize=(5, 5))\n",
+        "plt.imshow(output_overlay)\n",
+        "_ = plt.axis('off')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rKm-B0eMYeg8"
+      },
+      "source": [
+        "## Video (Image Sequence) Example"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gdPFXabLyiKv"
+      },
+      "source": [
+        "This section demonstrates how to apply intelligent cropping based on detections from the previous frame when the input is a sequence of frames. This allows the model to devote its attention and resources to the main subject, resulting in much better prediction quality without sacrificing the speed.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "SYFdK-JHYhrv"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Cropping Algorithm\n",
+        "\n",
+        "# Confidence score to determine whether a keypoint prediction is reliable.\n",
+        "MIN_CROP_KEYPOINT_SCORE = 0.2\n",
+        "\n",
+        "def init_crop_region(image_height, image_width):\n",
+        "  \"\"\"Defines the default crop region.\n",
+        "\n",
+        "  The function provides the initial crop region (pads the full image from both\n",
+        "  sides to make it a square image) when the algorithm cannot reliably determine\n",
+        "  the crop region from the previous frame.\n",
+        "  \"\"\"\n",
+        "  if image_width > image_height:\n",
+        "    box_height = image_width / image_height\n",
+        "    box_width = 1.0\n",
+        "    y_min = (image_height / 2 - image_width / 2) / image_height\n",
+        "    x_min = 0.0\n",
+        "  else:\n",
+        "    box_height = 1.0\n",
+        "    box_width = image_height / image_width\n",
+        "    y_min = 0.0\n",
+        "    x_min = (image_width / 2 - image_height / 2) / image_width\n",
+        "\n",
+        "  return {\n",
+        "    'y_min': y_min,\n",
+        "    'x_min': x_min,\n",
+        "    'y_max': y_min + box_height,\n",
+        "    'x_max': x_min + box_width,\n",
+        "    'height': box_height,\n",
+        "    'width': box_width\n",
+        "  }\n",
+        "\n",
+        "def torso_visible(keypoints):\n",
+        "  \"\"\"Checks whether there are enough torso keypoints.\n",
+        "\n",
+        "  This function checks whether the model is confident at predicting one of the\n",
+        "  shoulders/hips which is required to determine a good crop region.\n",
+        "  \"\"\"\n",
+        "  return ((keypoints[0, 0, KEYPOINT_DICT['left_hip'], 2] >\n",
+        "           MIN_CROP_KEYPOINT_SCORE or\n",
+        "          keypoints[0, 0, KEYPOINT_DICT['right_hip'], 2] >\n",
+        "           MIN_CROP_KEYPOINT_SCORE) and\n",
+        "          (keypoints[0, 0, KEYPOINT_DICT['left_shoulder'], 2] >\n",
+        "           MIN_CROP_KEYPOINT_SCORE or\n",
+        "          keypoints[0, 0, KEYPOINT_DICT['right_shoulder'], 2] >\n",
+        "           MIN_CROP_KEYPOINT_SCORE))\n",
+        "\n",
+        "def determine_torso_and_body_range(\n",
+        "    keypoints, target_keypoints, center_y, center_x):\n",
+        "  \"\"\"Calculates the maximum distance from each keypoints to the center location.\n",
+        "\n",
+        "  The function returns the maximum distances from the two sets of keypoints:\n",
+        "  full 17 keypoints and 4 torso keypoints. The returned information will be\n",
+        "  used to determine the crop size. See determineCropRegion for more detail.\n",
+        "  \"\"\"\n",
+        "  torso_joints = ['left_shoulder', 'right_shoulder', 'left_hip', 'right_hip']\n",
+        "  max_torso_yrange = 0.0\n",
+        "  max_torso_xrange = 0.0\n",
+        "  for joint in torso_joints:\n",
+        "    dist_y = abs(center_y - target_keypoints[joint][0])\n",
+        "    dist_x = abs(center_x - target_keypoints[joint][1])\n",
+        "    if dist_y > max_torso_yrange:\n",
+        "      max_torso_yrange = dist_y\n",
+        "    if dist_x > max_torso_xrange:\n",
+        "      max_torso_xrange = dist_x\n",
+        "\n",
+        "  max_body_yrange = 0.0\n",
+        "  max_body_xrange = 0.0\n",
+        "  for joint in KEYPOINT_DICT.keys():\n",
+        "    if keypoints[0, 0, KEYPOINT_DICT[joint], 2] < MIN_CROP_KEYPOINT_SCORE:\n",
+        "      continue\n",
+        "    dist_y = abs(center_y - target_keypoints[joint][0]);\n",
+        "    dist_x = abs(center_x - target_keypoints[joint][1]);\n",
+        "    if dist_y > max_body_yrange:\n",
+        "      max_body_yrange = dist_y\n",
+        "\n",
+        "    if dist_x > max_body_xrange:\n",
+        "      max_body_xrange = dist_x\n",
+        "\n",
+        "  return [max_torso_yrange, max_torso_xrange, max_body_yrange, max_body_xrange]\n",
+        "\n",
+        "def determine_crop_region(\n",
+        "      keypoints, image_height,\n",
+        "      image_width):\n",
+        "  \"\"\"Determines the region to crop the image for the model to run inference on.\n",
+        "\n",
+        "  The algorithm uses the detected joints from the previous frame to estimate\n",
+        "  the square region that encloses the full body of the target person and\n",
+        "  centers at the midpoint of two hip joints. The crop size is determined by\n",
+        "  the distances between each joints and the center point.\n",
+        "  When the model is not confident with the four torso joint predictions, the\n",
+        "  function returns a default crop which is the full image padded to square.\n",
+        "  \"\"\"\n",
+        "  target_keypoints = {}\n",
+        "  for joint in KEYPOINT_DICT.keys():\n",
+        "    target_keypoints[joint] = [\n",
+        "      keypoints[0, 0, KEYPOINT_DICT[joint], 0] * image_height,\n",
+        "      keypoints[0, 0, KEYPOINT_DICT[joint], 1] * image_width\n",
+        "    ]\n",
+        "\n",
+        "  if torso_visible(keypoints):\n",
+        "    center_y = (target_keypoints['left_hip'][0] +\n",
+        "                target_keypoints['right_hip'][0]) / 2;\n",
+        "    center_x = (target_keypoints['left_hip'][1] +\n",
+        "                target_keypoints['right_hip'][1]) / 2;\n",
+        "\n",
+        "    (max_torso_yrange, max_torso_xrange,\n",
+        "      max_body_yrange, max_body_xrange) = determine_torso_and_body_range(\n",
+        "          keypoints, target_keypoints, center_y, center_x)\n",
+        "\n",
+        "    crop_length_half = np.amax(\n",
+        "        [max_torso_xrange * 1.9, max_torso_yrange * 1.9,\n",
+        "          max_body_yrange * 1.2, max_body_xrange * 1.2])\n",
+        "\n",
+        "    tmp = np.array(\n",
+        "        [center_x, image_width - center_x, center_y, image_height - center_y])\n",
+        "    crop_length_half = np.amin(\n",
+        "        [crop_length_half, np.amax(tmp)]);\n",
+        "\n",
+        "    crop_corner = [center_y - crop_length_half, center_x - crop_length_half];\n",
+        "\n",
+        "    if crop_length_half > max(image_width, image_height) / 2:\n",
+        "      return init_crop_region(image_height, image_width)\n",
+        "    else:\n",
+        "      crop_length = crop_length_half * 2;\n",
+        "      return {\n",
+        "        'y_min': crop_corner[0] / image_height,\n",
+        "        'x_min': crop_corner[1] / image_width,\n",
+        "        'y_max': (crop_corner[0] + crop_length) / image_height,\n",
+        "        'x_max': (crop_corner[1] + crop_length) / image_width,\n",
+        "        'height': (crop_corner[0] + crop_length) / image_height -\n",
+        "            crop_corner[0] / image_height,\n",
+        "        'width': (crop_corner[1] + crop_length) / image_width -\n",
+        "            crop_corner[1] / image_width\n",
+        "      }\n",
+        "  else:\n",
+        "    return init_crop_region(image_height, image_width)\n",
+        "\n",
+        "def crop_and_resize(image, crop_region, crop_size):\n",
+        "  \"\"\"Crops and resize the image to prepare for the model input.\"\"\"\n",
+        "  boxes=[[crop_region['y_min'], crop_region['x_min'],\n",
+        "          crop_region['y_max'], crop_region['x_max']]]\n",
+        "  output_image = tf.image.crop_and_resize(\n",
+        "      image, box_indices=[0], boxes=boxes, crop_size=crop_size)\n",
+        "  return output_image\n",
+        "\n",
+        "def run_inference(movenet, image, crop_region, crop_size):\n",
+        "  \"\"\"Runs model inference on the cropped region.\n",
+        "\n",
+        "  The function runs the model inference on the cropped region and updates the\n",
+        "  model output to the original image coordinate system.\n",
+        "  \"\"\"\n",
+        "  image_height, image_width, _ = image.shape\n",
+        "  input_image = crop_and_resize(\n",
+        "    tf.expand_dims(image, axis=0), crop_region, crop_size=crop_size)\n",
+        "  # Run model inference.\n",
+        "  keypoints_with_scores = movenet(input_image)\n",
+        "  # Update the coordinates.\n",
+        "  for idx in range(17):\n",
+        "    keypoints_with_scores[0, 0, idx, 0] = (\n",
+        "        crop_region['y_min'] * image_height +\n",
+        "        crop_region['height'] * image_height *\n",
+        "        keypoints_with_scores[0, 0, idx, 0]) / image_height\n",
+        "    keypoints_with_scores[0, 0, idx, 1] = (\n",
+        "        crop_region['x_min'] * image_width +\n",
+        "        crop_region['width'] * image_width *\n",
+        "        keypoints_with_scores[0, 0, idx, 1]) / image_width\n",
+        "  return keypoints_with_scores"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "L2JmA1xAEntQ"
+      },
+      "source": [
+        "### Load Input Image Sequence"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "CzJxbxDckWl2"
+      },
+      "outputs": [],
+      "source": [
+        "!wget -q -O dance.gif https://github.com/tensorflow/tfjs-models/raw/master/pose-detection/assets/dance_input.gif"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "IxbMFZJUkd6W"
+      },
+      "outputs": [],
+      "source": [
+        "# Load the input image.\n",
+        "image_path = 'dance.gif'\n",
+        "image = tf.io.read_file(image_path)\n",
+        "image = tf.image.decode_gif(image)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "CJKeQ4siEtU9"
+      },
+      "source": [
+        "### Run Inference with Cropping Algorithm"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9B57XS0NZPIy"
+      },
+      "outputs": [],
+      "source": [
+        "# Load the input image.\n",
+        "num_frames, image_height, image_width, _ = image.shape\n",
+        "crop_region = init_crop_region(image_height, image_width)\n",
+        "\n",
+        "output_images = []\n",
+        "bar = display(progress(0, num_frames-1), display_id=True)\n",
+        "for frame_idx in range(num_frames):\n",
+        "  keypoints_with_scores = run_inference(\n",
+        "      movenet, image[frame_idx, :, :, :], crop_region,\n",
+        "      crop_size=[input_size, input_size])\n",
+        "  output_images.append(draw_prediction_on_image(\n",
+        "      image[frame_idx, :, :, :].numpy().astype(np.int32),\n",
+        "      keypoints_with_scores, crop_region=None,\n",
+        "      close_figure=True, output_image_height=300))\n",
+        "  crop_region = determine_crop_region(\n",
+        "      keypoints_with_scores, image_height, image_width)\n",
+        "  bar.update(progress(frame_idx, num_frames-1))\n",
+        "\n",
+        "# Prepare gif visualization.\n",
+        "output = np.stack(output_images, axis=0)\n",
+        "to_gif(output, duration=100)"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [
+        "9u_VGR6_BmbZ",
+        "5I3xBq80E3N_",
+        "L2JmA1xAEntQ"
+      ],
+      "name": "movenet.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/movinet.ipynb b/site/en/hub/tutorials/movinet.ipynb
new file mode 100644
index 00000000000..24600256cf9
--- /dev/null
+++ b/site/en/hub/tutorials/movinet.ipynb
@@ -0,0 +1,1047 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "toCy3v03Dwx7"
+      },
+      "source": [
+        "##### Copyright 2021 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "QKe-ubNcDvgv"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2021 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "qFdPvlXBOdUN"
+      },
+      "source": [
+        "# MoViNet for streaming action recognition "
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfBg1C5NB3X0"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/movinet\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/movinet.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/movinet.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/movinet.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/google/collections/movinet/1\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub models</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-vxk2Kbc_KSP"
+      },
+      "source": [
+        "This tutorial demonstrates how to use a pretrained video classification model to classify an activity (such as dancing, swimming, biking etc) in the given video.   \n",
+        "\n",
+        "The model architecture used in this tutorial is called [MoViNet](https://arxiv.org/pdf/2103.11511.pdf) (Mobile Video Networks). MoVieNets are a family of efficient video classification models trained on huge dataset ([Kinetics 600](https://deepmind.com/research/open-source/kinetics)).\n",
+        "\n",
+        "In contrast to the [i3d models](https://tfhub.dev/s?q=i3d-kinetics) available on TF Hub, MoViNets also support frame-by-frame inference on streaming video. \n",
+        "\n",
+        "The pretrained models are available from [TF Hub](https://tfhub.dev/google/collections/movinet/1). The TF Hub collection also includes quantized models optimized for [TFLite](https://tensorflow.org/lite).\n",
+        "\n",
+        "The source for these models is available in the [TensorFlow Model Garden](https://github.com/tensorflow/models/tree/master/official/projects/movinet). This includes a [longer version of this tutorial](https://colab.sandbox.google.com/github/tensorflow/models/blob/master/official/projects/movinet/movinet_tutorial.ipynb) that also covers building and fine-tuning a MoViNet model. \n",
+        "\n",
+        "This MoViNet tutorial is part of a series of TensorFlow video tutorials. Here are the other three tutorials:\n",
+        "\n",
+        "- [Load video data](https://www.tensorflow.org/tutorials/load_data/video): This tutorial explains how to load and preprocess video data into a TensorFlow dataset pipeline from scratch.\n",
+        "- [Build a 3D CNN model for video classification](https://www.tensorflow.org/tutorials/video/video_classification). Note that this tutorial uses a (2+1)D CNN that decomposes the spatial and temporal aspects of 3D data; if you are using volumetric data such as an MRI scan, consider using a 3D CNN instead of a (2+1)D CNN.\n",
+        "- [Transfer learning for video classification with MoViNet](https://www.tensorflow.org/tutorials/video/transfer_learning_with_movinet): This tutorial explains how to use a pre-trained video classification model trained on a different dataset with the UCF-101 dataset.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3E96e1UKQ8uR"
+      },
+      "source": [
+        "![jumping jacks plot](https://storage.googleapis.com/tf_model_garden/vision/movinet/artifacts/jumpingjacks_plot.gif)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8_oLnvJy7kz5"
+      },
+      "source": [
+        "## Setup\n",
+        "\n",
+        "For inference on smaller models (A0-A2), CPU is sufficient for this Colab."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "GUgUMGmY1yq-"
+      },
+      "outputs": [],
+      "source": [
+        "!sudo apt install -y ffmpeg\n",
+        "!pip install -q mediapy"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "s3khsunT7kWa"
+      },
+      "outputs": [],
+      "source": [
+        "!pip uninstall -q -y opencv-python-headless\n",
+        "!pip install -q \"opencv-python-headless<4.3\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dI_1csl6Q-gH"
+      },
+      "outputs": [],
+      "source": [
+        "# Import libraries\n",
+        "import pathlib\n",
+        "\n",
+        "import matplotlib as mpl\n",
+        "import matplotlib.pyplot as plt\n",
+        "import mediapy as media\n",
+        "import numpy as np\n",
+        "import PIL\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "import tensorflow_hub as hub\n",
+        "import tqdm\n",
+        "\n",
+        "mpl.rcParams.update({\n",
+        "    'font.size': 10,\n",
+        "})"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Pn8K9oWbmREi"
+      },
+      "source": [
+        "Get the kinetics 600 label list, and print the first few labels:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2VJUAcjhkfb3"
+      },
+      "outputs": [],
+      "source": [
+        "labels_path = tf.keras.utils.get_file(\n",
+        "    fname='labels.txt',\n",
+        "    origin='https://raw.githubusercontent.com/tensorflow/models/f8af2291cced43fc9f1d9b41ddbf772ae7b0d7d2/official/projects/movinet/files/kinetics_600_labels.txt'\n",
+        ")\n",
+        "labels_path = pathlib.Path(labels_path)\n",
+        "\n",
+        "lines = labels_path.read_text().splitlines()\n",
+        "KINETICS_600_LABELS = np.array([line.strip() for line in lines])\n",
+        "KINETICS_600_LABELS[:20]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "G9BU5XsOmaq3"
+      },
+      "source": [
+        "To provide a simple example video for classification, we can load a short gif of jumping jacks being performed.\n",
+        "\n",
+        "![jumping jacks](https://github.com/tensorflow/models/raw/f8af2291cced43fc9f1d9b41ddbf772ae7b0d7d2/official/projects/movinet/files/jumpingjack.gif)\n",
+        "\n",
+        "Attribution: Footage shared by [Coach Bobby Bluford](https://www.youtube.com/watch?v=-AxHpj-EuPg) on YouTube under the CC-BY license."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8aFKMbr4mfSg"
+      },
+      "source": [
+        "Download the gif."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "w62jqXhaSb15"
+      },
+      "outputs": [],
+      "source": [
+        "jumpingjack_url = 'https://github.com/tensorflow/models/raw/f8af2291cced43fc9f1d9b41ddbf772ae7b0d7d2/official/projects/movinet/files/jumpingjack.gif'\n",
+        "jumpingjack_path = tf.keras.utils.get_file(\n",
+        "    fname='jumpingjack.gif',\n",
+        "    origin=jumpingjack_url,\n",
+        "    cache_dir='.', cache_subdir='.',\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hdRS_22PebfB"
+      },
+      "source": [
+        "Define a function to read a gif file into a `tf.Tensor`:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "mPhmCu6oSi5f"
+      },
+      "outputs": [],
+      "source": [
+        "#@title\n",
+        "# Read and process a video\n",
+        "def load_gif(file_path, image_size=(224, 224)):\n",
+        "  \"\"\"Loads a gif file into a TF tensor.\n",
+        "\n",
+        "  Use images resized to match what's expected by your model.\n",
+        "  The model pages say the \"A2\" models expect 224 x 224 images at 5 fps\n",
+        "\n",
+        "  Args:\n",
+        "    file_path: path to the location of a gif file.\n",
+        "    image_size: a tuple of target size.\n",
+        "\n",
+        "  Returns:\n",
+        "    a video of the gif file\n",
+        "  \"\"\"\n",
+        "  # Load a gif file, convert it to a TF tensor\n",
+        "  raw = tf.io.read_file(file_path)\n",
+        "  video = tf.io.decode_gif(raw)\n",
+        "  # Resize the video\n",
+        "  video = tf.image.resize(video, image_size)\n",
+        "  # change dtype to a float32\n",
+        "  # Hub models always want images normalized to [0,1]\n",
+        "  # ref: https://www.tensorflow.org/hub/common_signatures/images#input\n",
+        "  video = tf.cast(video, tf.float32) / 255.\n",
+        "  return video"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Xx7cZm8vpDJm"
+      },
+      "source": [
+        "The video's shape is `(frames, height, width, colors)`"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "E7k_PmbFSkHv"
+      },
+      "outputs": [],
+      "source": [
+        "jumpingjack=load_gif(jumpingjack_path)\n",
+        "jumpingjack.shape"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "LcKFy3oedBvF"
+      },
+      "source": [
+        "## How to use the model\n",
+        "\n",
+        "This section contains a walkthrough showing how to use the [models from TensorFlow Hub](https://tfhub.dev/google/collections/movinet/1). If you just want to see the models in action, skip to the next section.\n",
+        "\n",
+        "There are two versions of each model: `base` and `streaming`.\n",
+        "\n",
+        "* The `base` version takes a video as input, and returns the probabilities averaged over the frames.\n",
+        "* The `streaming` version takes a video frame and an RNN state as input, and returns the predictions for that frame, and the new RNN state. "
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WQO6Zb8Hm-9q"
+      },
+      "source": [
+        "### The base model"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "RfnYU20JnPqp"
+      },
+      "source": [
+        "Download the [pretrained model from TensorFlow Hub](https://tfhub.dev/tensorflow/movinet/a2/base/kinetics-600/classification/3). "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "FnpPo6HSR7qv"
+      },
+      "outputs": [],
+      "source": [
+        "%%time\n",
+        "id = 'a2'\n",
+        "mode = 'base'\n",
+        "version = '3'\n",
+        "hub_url = f'https://tfhub.dev/tensorflow/movinet/{id}/{mode}/kinetics-600/classification/{version}'\n",
+        "model = hub.load(hub_url)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jvaFwKhxndmb"
+      },
+      "source": [
+        "This version of the model has one `signature`. It takes an `image` argument which is a `tf.float32` with shape `(batch, frames, height, width, colors)`. It returns a dictionary containing one output: A `tf.float32` tensor of logits with shape `(batch, classes)`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "7GzZ4Y03T_gH"
+      },
+      "outputs": [],
+      "source": [
+        "sig = model.signatures['serving_default']\n",
+        "print(sig.pretty_printed_signature())"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "M4Xny1ANomi4"
+      },
+      "source": [
+        "To run this signature on the video you need to add the outer `batch` dimension to the video first."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "LBOFEDG1XvZE"
+      },
+      "outputs": [],
+      "source": [
+        "#warmup\n",
+        "sig(image = jumpingjack[tf.newaxis, :1]);"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "jCeW3KycVbGn"
+      },
+      "outputs": [],
+      "source": [
+        "%%time\n",
+        "logits = sig(image = jumpingjack[tf.newaxis, ...])\n",
+        "logits = logits['classifier_head'][0]\n",
+        "\n",
+        "print(logits.shape)\n",
+        "print()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "AE8doqkPpxED"
+      },
+      "source": [
+        "Define a `get_top_k` function that packages the above output processing for later."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "OozPNO6LvZ00"
+      },
+      "outputs": [],
+      "source": [
+        "#@title\n",
+        "# Get top_k labels and probabilities\n",
+        "def get_top_k(probs, k=5, label_map=KINETICS_600_LABELS):\n",
+        "  \"\"\"Outputs the top k model labels and probabilities on the given video.\n",
+        "\n",
+        "  Args:\n",
+        "    probs: probability tensor of shape (num_frames, num_classes) that represents\n",
+        "      the probability of each class on each frame.\n",
+        "    k: the number of top predictions to select.\n",
+        "    label_map: a list of labels to map logit indices to label strings.\n",
+        "\n",
+        "  Returns:\n",
+        "    a tuple of the top-k labels and probabilities.\n",
+        "  \"\"\"\n",
+        "  # Sort predictions to find top_k\n",
+        "  top_predictions = tf.argsort(probs, axis=-1, direction='DESCENDING')[:k]\n",
+        "  # collect the labels of top_k predictions\n",
+        "  top_labels = tf.gather(label_map, top_predictions, axis=-1)\n",
+        "  # decode lablels\n",
+        "  top_labels = [label.decode('utf8') for label in top_labels.numpy()]\n",
+        "  # top_k probabilities of the predictions\n",
+        "  top_probs = tf.gather(probs, top_predictions, axis=-1).numpy()\n",
+        "  return tuple(zip(top_labels, top_probs))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kTfKMT29pP_Z"
+      },
+      "source": [
+        "Convert the `logits` to probabilities, and look up the top 5 classes for the video. The model confirms that the video is probably of `jumping jacks`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Z-SrNGsGV5Mt"
+      },
+      "outputs": [],
+      "source": [
+        "probs = tf.nn.softmax(logits, axis=-1)\n",
+        "for label, p in get_top_k(probs):\n",
+        "  print(f'{label:20s}: {p:.3f}')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ltdijoQpqjxZ"
+      },
+      "source": [
+        "### The streaming model"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9dqdUPQXq45b"
+      },
+      "source": [
+        "The previous section used a model that runs over a whole video. Often when processing a video you don't want a single prediction at the end, you want to update predictions frame by frame. The `stream` versions of the model allow you to do this.\n",
+        "\n",
+        "Load the `stream` version of the model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "mxt0hRXFZkAM"
+      },
+      "outputs": [],
+      "source": [
+        "%%time\n",
+        "id = 'a2'\n",
+        "mode = 'stream'\n",
+        "version = '3'\n",
+        "hub_url = f'https://tfhub.dev/tensorflow/movinet/{id}/{mode}/kinetics-600/classification/{version}'\n",
+        "model = hub.load(hub_url)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "pDswtsGgsYGS"
+      },
+      "source": [
+        "Using this model is slightly more complex than the `base` model. You have to keep track of the internal state of the model's RNNs."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "0fM_Vb1VsbDm"
+      },
+      "outputs": [],
+      "source": [
+        "list(model.signatures.keys())"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ojr1_iYCtPvp"
+      },
+      "source": [
+        "The `init_states` signature takes the video's **shape** `(batch, frames, height, width, colors)` as input, and returns a large dictionary of tensors containing the initial RNN states: "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "67loYFGpo_RP"
+      },
+      "outputs": [],
+      "source": [
+        "lines = model.signatures['init_states'].pretty_printed_signature().splitlines()\n",
+        "lines = lines[:10]\n",
+        "lines.append('      ...')\n",
+        "print('.\\n'.join(lines))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "v5lG3vejn5df"
+      },
+      "outputs": [],
+      "source": [
+        "initial_state = model.init_states(jumpingjack[tf.newaxis, ...].shape)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "J3DwmyHnuhH_"
+      },
+      "outputs": [],
+      "source": [
+        "type(initial_state)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "K8SyiEU6tB-e"
+      },
+      "outputs": [],
+      "source": [
+        "list(sorted(initial_state.keys()))[:5]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "xeMCzJMBvwRF"
+      },
+      "source": [
+        "Once you have the initial state for the RNNs, you can pass the state and a video frame as input (keeping the `(batch, frames, height, width, colors)` shape for the video frame). The model returns a `(logits, state)` pair. \n",
+        "\n",
+        "After just seeing the first frame, the model is not convinced that the video is of \"jumping jacks\":"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "McSLdIgtsI3d"
+      },
+      "outputs": [],
+      "source": [
+        "inputs = initial_state.copy()\n",
+        "\n",
+        "# Add the batch axis, take the first frme, but keep the frame-axis.\n",
+        "inputs['image'] = jumpingjack[tf.newaxis, 0:1, ...] "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "WlH7PqLPX664"
+      },
+      "outputs": [],
+      "source": [
+        "# warmup\n",
+        "model(inputs);"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "7uzNXtu7X5sr"
+      },
+      "outputs": [],
+      "source": [
+        "logits, new_state = model(inputs)\n",
+        "logits = logits[0]\n",
+        "probs = tf.nn.softmax(logits, axis=-1)\n",
+        "\n",
+        "for label, p in get_top_k(probs):\n",
+        "  print(f'{label:20s}: {p:.3f}')\n",
+        "\n",
+        "print()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "oLU644FQwXSb"
+      },
+      "source": [
+        "If you run the model in a loop, passing the updated state with each frame, the model quickly converges to the correct result:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Fzm7T4ImmIEg"
+      },
+      "outputs": [],
+      "source": [
+        "%%time\n",
+        "state = initial_state.copy()\n",
+        "all_logits = []\n",
+        "\n",
+        "for n in range(len(jumpingjack)):\n",
+        "  inputs = state\n",
+        "  inputs['image'] = jumpingjack[tf.newaxis, n:n+1, ...]\n",
+        "  result, state = model(inputs)\n",
+        "  all_logits.append(logits)\n",
+        "\n",
+        "probabilities = tf.nn.softmax(all_logits, axis=-1)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "B7UtHoSWcOT2"
+      },
+      "outputs": [],
+      "source": [
+        "for label, p in get_top_k(probabilities[-1]):\n",
+        "  print(f'{label:20s}: {p:.3f}')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "6ffV3NhZcsrv"
+      },
+      "outputs": [],
+      "source": [
+        "id = tf.argmax(probabilities[-1])\n",
+        "plt.plot(probabilities[:, id])\n",
+        "plt.xlabel('Frame #')\n",
+        "plt.ylabel(f\"p('{KINETICS_600_LABELS[id]}')\");"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "d7MZ_AfRW845"
+      },
+      "source": [
+        "You may notice that the final probability is much more certain than in the previous section where you ran the `base` model. The `base` model returns an average of the predictions over the frames."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "0Wij4tsyW8dR"
+      },
+      "outputs": [],
+      "source": [
+        "for label, p in get_top_k(tf.reduce_mean(probabilities, axis=0)):\n",
+        "  print(f'{label:20s}: {p:.3f}')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "qLUoC9ejggGo"
+      },
+      "source": [
+        "## Animate the predictions over time\n",
+        "\n",
+        "The previous section went into some details about how to use these models. This section builds on top of that to produce some nice inference animations. "
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "OnFqOXazoWgy"
+      },
+      "source": [
+        "The hidden cell below to defines helper functions used in this section."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "dx55NK3ZoZeh"
+      },
+      "outputs": [],
+      "source": [
+        "#@title\n",
+        "# Get top_k labels and probabilities predicted using MoViNets streaming model\n",
+        "def get_top_k_streaming_labels(probs, k=5, label_map=KINETICS_600_LABELS):\n",
+        "  \"\"\"Returns the top-k labels over an entire video sequence.\n",
+        "\n",
+        "  Args:\n",
+        "    probs: probability tensor of shape (num_frames, num_classes) that represents\n",
+        "      the probability of each class on each frame.\n",
+        "    k: the number of top predictions to select.\n",
+        "    label_map: a list of labels to map logit indices to label strings.\n",
+        "\n",
+        "  Returns:\n",
+        "    a tuple of the top-k probabilities, labels, and logit indices\n",
+        "  \"\"\"\n",
+        "  top_categories_last = tf.argsort(probs, -1, 'DESCENDING')[-1, :1]\n",
+        "  # Sort predictions to find top_k\n",
+        "  categories = tf.argsort(probs, -1, 'DESCENDING')[:, :k]\n",
+        "  categories = tf.reshape(categories, [-1])\n",
+        "\n",
+        "  counts = sorted([\n",
+        "      (i.numpy(), tf.reduce_sum(tf.cast(categories == i, tf.int32)).numpy())\n",
+        "      for i in tf.unique(categories)[0]\n",
+        "  ], key=lambda x: x[1], reverse=True)\n",
+        "\n",
+        "  top_probs_idx = tf.constant([i for i, _ in counts[:k]])\n",
+        "  top_probs_idx = tf.concat([top_categories_last, top_probs_idx], 0)\n",
+        "  # find unique indices of categories\n",
+        "  top_probs_idx = tf.unique(top_probs_idx)[0][:k+1]\n",
+        "  # top_k probabilities of the predictions\n",
+        "  top_probs = tf.gather(probs, top_probs_idx, axis=-1)\n",
+        "  top_probs = tf.transpose(top_probs, perm=(1, 0))\n",
+        "  # collect the labels of top_k predictions\n",
+        "  top_labels = tf.gather(label_map, top_probs_idx, axis=0)\n",
+        "  # decode the top_k labels\n",
+        "  top_labels = [label.decode('utf8') for label in top_labels.numpy()]\n",
+        "\n",
+        "  return top_probs, top_labels, top_probs_idx\n",
+        "\n",
+        "# Plot top_k predictions at a given time step\n",
+        "def plot_streaming_top_preds_at_step(\n",
+        "    top_probs,\n",
+        "    top_labels,\n",
+        "    step=None,\n",
+        "    image=None,\n",
+        "    legend_loc='lower left',\n",
+        "    duration_seconds=10,\n",
+        "    figure_height=500,\n",
+        "    playhead_scale=0.8,\n",
+        "    grid_alpha=0.3):\n",
+        "  \"\"\"Generates a plot of the top video model predictions at a given time step.\n",
+        "\n",
+        "  Args:\n",
+        "    top_probs: a tensor of shape (k, num_frames) representing the top-k\n",
+        "      probabilities over all frames.\n",
+        "    top_labels: a list of length k that represents the top-k label strings.\n",
+        "    step: the current time step in the range [0, num_frames].\n",
+        "    image: the image frame to display at the current time step.\n",
+        "    legend_loc: the placement location of the legend.\n",
+        "    duration_seconds: the total duration of the video.\n",
+        "    figure_height: the output figure height.\n",
+        "    playhead_scale: scale value for the playhead.\n",
+        "    grid_alpha: alpha value for the gridlines.\n",
+        "\n",
+        "  Returns:\n",
+        "    A tuple of the output numpy image, figure, and axes.\n",
+        "  \"\"\"\n",
+        "  # find number of top_k labels and frames in the video\n",
+        "  num_labels, num_frames = top_probs.shape\n",
+        "  if step is None:\n",
+        "    step = num_frames\n",
+        "  # Visualize frames and top_k probabilities of streaming video\n",
+        "  fig = plt.figure(figsize=(6.5, 7), dpi=300)\n",
+        "  gs = mpl.gridspec.GridSpec(8, 1)\n",
+        "  ax2 = plt.subplot(gs[:-3, :])\n",
+        "  ax = plt.subplot(gs[-3:, :])\n",
+        "  # display the frame\n",
+        "  if image is not None:\n",
+        "    ax2.imshow(image, interpolation='nearest')\n",
+        "    ax2.axis('off')\n",
+        "  # x-axis (frame number)\n",
+        "  preview_line_x = tf.linspace(0., duration_seconds, num_frames)\n",
+        "  # y-axis (top_k probabilities)\n",
+        "  preview_line_y = top_probs\n",
+        "\n",
+        "  line_x = preview_line_x[:step+1]\n",
+        "  line_y = preview_line_y[:, :step+1]\n",
+        "\n",
+        "  for i in range(num_labels):\n",
+        "    ax.plot(preview_line_x, preview_line_y[i], label=None, linewidth='1.5',\n",
+        "            linestyle=':', color='gray')\n",
+        "    ax.plot(line_x, line_y[i], label=top_labels[i], linewidth='2.0')\n",
+        "\n",
+        "\n",
+        "  ax.grid(which='major', linestyle=':', linewidth='1.0', alpha=grid_alpha)\n",
+        "  ax.grid(which='minor', linestyle=':', linewidth='0.5', alpha=grid_alpha)\n",
+        "\n",
+        "  min_height = tf.reduce_min(top_probs) * playhead_scale\n",
+        "  max_height = tf.reduce_max(top_probs)\n",
+        "  ax.vlines(preview_line_x[step], min_height, max_height, colors='red')\n",
+        "  ax.scatter(preview_line_x[step], max_height, color='red')\n",
+        "\n",
+        "  ax.legend(loc=legend_loc)\n",
+        "\n",
+        "  plt.xlim(0, duration_seconds)\n",
+        "  plt.ylabel('Probability')\n",
+        "  plt.xlabel('Time (s)')\n",
+        "  plt.yscale('log')\n",
+        "\n",
+        "  fig.tight_layout()\n",
+        "  fig.canvas.draw()\n",
+        "\n",
+        "  data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)\n",
+        "  data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))\n",
+        "  plt.close()\n",
+        "\n",
+        "  figure_width = int(figure_height * data.shape[1] / data.shape[0])\n",
+        "  image = PIL.Image.fromarray(data).resize([figure_width, figure_height])\n",
+        "  image = np.array(image)\n",
+        "\n",
+        "  return image\n",
+        "\n",
+        "# Plotting top_k predictions from MoViNets streaming model\n",
+        "def plot_streaming_top_preds(\n",
+        "    probs,\n",
+        "    video,\n",
+        "    top_k=5,\n",
+        "    video_fps=25.,\n",
+        "    figure_height=500,\n",
+        "    use_progbar=True):\n",
+        "  \"\"\"Generates a video plot of the top video model predictions.\n",
+        "\n",
+        "  Args:\n",
+        "    probs: probability tensor of shape (num_frames, num_classes) that represents\n",
+        "      the probability of each class on each frame.\n",
+        "    video: the video to display in the plot.\n",
+        "    top_k: the number of top predictions to select.\n",
+        "    video_fps: the input video fps.\n",
+        "    figure_fps: the output video fps.\n",
+        "    figure_height: the height of the output video.\n",
+        "    use_progbar: display a progress bar.\n",
+        "\n",
+        "  Returns:\n",
+        "    A numpy array representing the output video.\n",
+        "  \"\"\"\n",
+        "  # select number of frames per second\n",
+        "  video_fps = 8.\n",
+        "  # select height of the image\n",
+        "  figure_height = 500\n",
+        "  # number of time steps of the given video\n",
+        "  steps = video.shape[0]\n",
+        "  # estimate duration of the video (in seconds)\n",
+        "  duration = steps / video_fps\n",
+        "  # estimate top_k probabilities and corresponding labels\n",
+        "  top_probs, top_labels, _ = get_top_k_streaming_labels(probs, k=top_k)\n",
+        "\n",
+        "  images = []\n",
+        "  step_generator = tqdm.trange(steps) if use_progbar else range(steps)\n",
+        "  for i in step_generator:\n",
+        "    image = plot_streaming_top_preds_at_step(\n",
+        "        top_probs=top_probs,\n",
+        "        top_labels=top_labels,\n",
+        "        step=i,\n",
+        "        image=video[i],\n",
+        "        duration_seconds=duration,\n",
+        "        figure_height=figure_height,\n",
+        "    )\n",
+        "    images.append(image)\n",
+        "\n",
+        "  return np.array(images)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "eLgFBslcZOQO"
+      },
+      "source": [
+        "Start by running the streaming model across the frames of the video, and collecting the logits:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "tXWR13wthnK5"
+      },
+      "outputs": [],
+      "source": [
+        "init_states = model.init_states(jumpingjack[tf.newaxis].shape)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "YqSkt7l8ltwt"
+      },
+      "outputs": [],
+      "source": [
+        "# Insert your video clip here\n",
+        "video = jumpingjack\n",
+        "images = tf.split(video[tf.newaxis], video.shape[0], axis=1)\n",
+        "\n",
+        "all_logits = []\n",
+        "\n",
+        "# To run on a video, pass in one frame at a time\n",
+        "states = init_states\n",
+        "for image in tqdm.tqdm(images):\n",
+        "  # predictions for each frame\n",
+        "  logits, states = model({**states, 'image': image})\n",
+        "  all_logits.append(logits)\n",
+        "\n",
+        "# concatenating all the logits\n",
+        "logits = tf.concat(all_logits, 0)\n",
+        "# estimating probabilities\n",
+        "probs = tf.nn.softmax(logits, axis=-1)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "OOGcCMMJyuPl"
+      },
+      "outputs": [],
+      "source": [
+        "final_probs = probs[-1]\n",
+        "print('Top_k predictions and their probablities\\n')\n",
+        "for label, p in get_top_k(final_probs):\n",
+        "  print(f'{label:20s}: {p:.3f}')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GaybT0rbZct-"
+      },
+      "source": [
+        "Convert the sequence of probabilities into a video:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Xdox556CtMRb"
+      },
+      "outputs": [],
+      "source": [
+        "# Generate a plot and output to a video tensor\n",
+        "plot_video = plot_streaming_top_preds(probs, video, video_fps=8.)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "NSStKE9klCs3"
+      },
+      "outputs": [],
+      "source": [
+        "# For gif format, set codec='gif'\n",
+        "media.show_video(plot_video, fps=3)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "LCImgZ3OdJw7"
+      },
+      "source": [
+        "## Resources\n",
+        "\n",
+        "The pretrained models are available from [TF Hub](https://tfhub.dev/google/collections/movinet/1). The TF Hub collection also includes quantized models optimized for [TFLite](https://tensorflow.org/lite).\n",
+        "\n",
+        "The source for these models is available in the [TensorFlow Model Garden](https://github.com/tensorflow/models/tree/master/official/projects/movinet). This includes a [longer version of this tutorial](https://colab.sandbox.google.com/github/tensorflow/models/blob/master/official/projects/movinet/movinet_tutorial.ipynb) that also covers building and fine-tuning a MoViNet model. "
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gh5lLAo-HpVF"
+      },
+      "source": [
+        "## Next Steps\n",
+        "\n",
+        "To learn more about working with video data in TensorFlow, check out the following tutorials:\n",
+        "\n",
+        "* [Load video data](https://www.tensorflow.org/tutorials/load_data/video)\n",
+        "* [Build a 3D CNN model for video classification](https://www.tensorflow.org/tutorials/video/video_classification)\n",
+        "* [Transfer learning for video classification with MoViNet](https://www.tensorflow.org/tutorials/video/transfer_learning_with_movinet)"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "name": "movinet.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/object_detection.ipynb b/site/en/hub/tutorials/object_detection.ipynb
new file mode 100644
index 00000000000..e1262f3084c
--- /dev/null
+++ b/site/en/hub/tutorials/object_detection.ipynb
@@ -0,0 +1,442 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "N6ZDpd9XzFeN"
+      },
+      "source": [
+        "##### Copyright 2018 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "both",
+        "id": "KUu4vOt5zI9d"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "CxmDMK4yupqg"
+      },
+      "source": [
+        "# Object Detection\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfBg1C5NB3X0"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/object_detection\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/object_detection.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/object_detection.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/object_detection.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/s?q=google%2Ffaster_rcnn%2Fopenimages_v4%2Finception_resnet_v2%2F1%20OR%20google%2Ffaster_rcnn%2Fopenimages_v4%2Finception_resnet_v2%2F1\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub models</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Sy553YSVmYiK"
+      },
+      "source": [
+        "This Colab demonstrates use of a TF-Hub module trained to perform object detection."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "v4XGxDrCkeip"
+      },
+      "source": [
+        "## Setup\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "both",
+        "id": "6cPY9Ou4sWs_"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Imports and function definitions\n",
+        "\n",
+        "# For running inference on the TF-Hub module.\n",
+        "import tensorflow as tf\n",
+        "\n",
+        "import tensorflow_hub as hub\n",
+        "\n",
+        "# For downloading the image.\n",
+        "import matplotlib.pyplot as plt\n",
+        "import tempfile\n",
+        "from six.moves.urllib.request import urlopen\n",
+        "from six import BytesIO\n",
+        "\n",
+        "# For drawing onto the image.\n",
+        "import numpy as np\n",
+        "from PIL import Image\n",
+        "from PIL import ImageColor\n",
+        "from PIL import ImageDraw\n",
+        "from PIL import ImageFont\n",
+        "from PIL import ImageOps\n",
+        "\n",
+        "# For measuring the inference time.\n",
+        "import time\n",
+        "\n",
+        "# Print Tensorflow version\n",
+        "print(tf.__version__)\n",
+        "\n",
+        "# Check available GPU devices.\n",
+        "print(\"The following GPU devices are available: %s\" % tf.test.gpu_device_name())"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ZGkrXGy62409"
+      },
+      "source": [
+        "## Example use"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vlA3CftFpRiW"
+      },
+      "source": [
+        "### Helper functions for downloading images and for visualization.\n",
+        "\n",
+        "Visualization code adapted from [TF object detection API](https://github.com/tensorflow/models/blob/master/research/object_detection/utils/visualization_utils.py) for the simplest required functionality."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "D9IwDpOtpIHW"
+      },
+      "outputs": [],
+      "source": [
+        "def display_image(image):\n",
+        "  fig = plt.figure(figsize=(20, 15))\n",
+        "  plt.grid(False)\n",
+        "  plt.imshow(image)\n",
+        "\n",
+        "\n",
+        "def download_and_resize_image(url, new_width=256, new_height=256,\n",
+        "                              display=False):\n",
+        "  _, filename = tempfile.mkstemp(suffix=\".jpg\")\n",
+        "  response = urlopen(url)\n",
+        "  image_data = response.read()\n",
+        "  image_data = BytesIO(image_data)\n",
+        "  pil_image = Image.open(image_data)\n",
+        "  pil_image = ImageOps.fit(pil_image, (new_width, new_height), Image.LANCZOS)\n",
+        "  pil_image_rgb = pil_image.convert(\"RGB\")\n",
+        "  pil_image_rgb.save(filename, format=\"JPEG\", quality=90)\n",
+        "  print(\"Image downloaded to %s.\" % filename)\n",
+        "  if display:\n",
+        "    display_image(pil_image)\n",
+        "  return filename\n",
+        "\n",
+        "\n",
+        "def draw_bounding_box_on_image(image,\n",
+        "                               ymin,\n",
+        "                               xmin,\n",
+        "                               ymax,\n",
+        "                               xmax,\n",
+        "                               color,\n",
+        "                               font,\n",
+        "                               thickness=4,\n",
+        "                               display_str_list=()):\n",
+        "  \"\"\"Adds a bounding box to an image.\"\"\"\n",
+        "  draw = ImageDraw.Draw(image)\n",
+        "  im_width, im_height = image.size\n",
+        "  (left, right, top, bottom) = (xmin * im_width, xmax * im_width,\n",
+        "                                ymin * im_height, ymax * im_height)\n",
+        "  draw.line([(left, top), (left, bottom), (right, bottom), (right, top),\n",
+        "             (left, top)],\n",
+        "            width=thickness,\n",
+        "            fill=color)\n",
+        "\n",
+        "  # If the total height of the display strings added to the top of the bounding\n",
+        "  # box exceeds the top of the image, stack the strings below the bounding box\n",
+        "  # instead of above.\n",
+        "  display_str_heights = [font.getbbox(ds)[3] for ds in display_str_list]\n",
+        "  # Each display_str has a top and bottom margin of 0.05x.\n",
+        "  total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)\n",
+        "\n",
+        "  if top > total_display_str_height:\n",
+        "    text_bottom = top\n",
+        "  else:\n",
+        "    text_bottom = top + total_display_str_height\n",
+        "  # Reverse list and print from bottom to top.\n",
+        "  for display_str in display_str_list[::-1]:\n",
+        "    bbox = font.getbbox(display_str)\n",
+        "    text_width, text_height = bbox[2], bbox[3]\n",
+        "    margin = np.ceil(0.05 * text_height)\n",
+        "    draw.rectangle([(left, text_bottom - text_height - 2 * margin),\n",
+        "                    (left + text_width, text_bottom)],\n",
+        "                   fill=color)\n",
+        "    draw.text((left + margin, text_bottom - text_height - margin),\n",
+        "              display_str,\n",
+        "              fill=\"black\",\n",
+        "              font=font)\n",
+        "    text_bottom -= text_height - 2 * margin\n",
+        "\n",
+        "\n",
+        "def draw_boxes(image, boxes, class_names, scores, max_boxes=10, min_score=0.1):\n",
+        "  \"\"\"Overlay labeled boxes on an image with formatted scores and label names.\"\"\"\n",
+        "  colors = list(ImageColor.colormap.values())\n",
+        "\n",
+        "  try:\n",
+        "    font = ImageFont.truetype(\"/usr/share/fonts/truetype/liberation/LiberationSansNarrow-Regular.ttf\",\n",
+        "                              25)\n",
+        "  except IOError:\n",
+        "    print(\"Font not found, using default font.\")\n",
+        "    font = ImageFont.load_default()\n",
+        "\n",
+        "  for i in range(min(boxes.shape[0], max_boxes)):\n",
+        "    if scores[i] >= min_score:\n",
+        "      ymin, xmin, ymax, xmax = tuple(boxes[i])\n",
+        "      display_str = \"{}: {}%\".format(class_names[i].decode(\"ascii\"),\n",
+        "                                     int(100 * scores[i]))\n",
+        "      color = colors[hash(class_names[i]) % len(colors)]\n",
+        "      image_pil = Image.fromarray(np.uint8(image)).convert(\"RGB\")\n",
+        "      draw_bounding_box_on_image(\n",
+        "          image_pil,\n",
+        "          ymin,\n",
+        "          xmin,\n",
+        "          ymax,\n",
+        "          xmax,\n",
+        "          color,\n",
+        "          font,\n",
+        "          display_str_list=[display_str])\n",
+        "      np.copyto(image, np.array(image_pil))\n",
+        "  return image"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "D19UCu9Q2-_8"
+      },
+      "source": [
+        "## Apply module\n",
+        "\n",
+        "Load a public image from Open Images v4, save locally, and display."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "both",
+        "id": "YLWNhjUY1mhg"
+      },
+      "outputs": [],
+      "source": [
+        "# By Heiko Gorski, Source: https://commons.wikimedia.org/wiki/File:Naxos_Taverna.jpg\n",
+        "image_url = \"https://upload.wikimedia.org/wikipedia/commons/6/60/Naxos_Taverna.jpg\"  #@param\n",
+        "downloaded_image_path = download_and_resize_image(image_url, 1280, 856, True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "t-VdfLbC1w51"
+      },
+      "source": [
+        "Pick an object detection module and apply on the downloaded image. Modules:\n",
+        "* **FasterRCNN+InceptionResNet V2**: high accuracy,\n",
+        "* **ssd+mobilenet V2**: small and fast."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "uazJ5ASc2_QE"
+      },
+      "outputs": [],
+      "source": [
+        "module_handle = \"https://tfhub.dev/google/faster_rcnn/openimages_v4/inception_resnet_v2/1\" #@param [\"https://tfhub.dev/google/openimages_v4/ssd/mobilenet_v2/1\", \"https://tfhub.dev/google/faster_rcnn/openimages_v4/inception_resnet_v2/1\"]\n",
+        "\n",
+        "detector = hub.load(module_handle).signatures['default']"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "znW8Fq1EC0x7"
+      },
+      "outputs": [],
+      "source": [
+        "def load_img(path):\n",
+        "  img = tf.io.read_file(path)\n",
+        "  img = tf.image.decode_jpeg(img, channels=3)\n",
+        "  return img"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "kwGJV96WWBLH"
+      },
+      "outputs": [],
+      "source": [
+        "def run_detector(detector, path):\n",
+        "  img = load_img(path)\n",
+        "\n",
+        "  converted_img  = tf.image.convert_image_dtype(img, tf.float32)[tf.newaxis, ...]\n",
+        "  start_time = time.time()\n",
+        "  result = detector(converted_img)\n",
+        "  end_time = time.time()\n",
+        "\n",
+        "  result = {key:value.numpy() for key,value in result.items()}\n",
+        "\n",
+        "  print(\"Found %d objects.\" % len(result[\"detection_scores\"]))\n",
+        "  print(\"Inference time: \", end_time-start_time)\n",
+        "\n",
+        "  image_with_boxes = draw_boxes(\n",
+        "      img.numpy(), result[\"detection_boxes\"],\n",
+        "      result[\"detection_class_entities\"], result[\"detection_scores\"])\n",
+        "\n",
+        "  display_image(image_with_boxes)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "vchaUW1XDodD"
+      },
+      "outputs": [],
+      "source": [
+        "run_detector(detector, downloaded_image_path)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WUUY3nfRX7VF"
+      },
+      "source": [
+        "### More images\n",
+        "Perform inference on some additional images with time tracking.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "rubdr2JXfsa1"
+      },
+      "outputs": [],
+      "source": [
+        "image_urls = [\n",
+        "  # Source: https://commons.wikimedia.org/wiki/File:The_Coleoptera_of_the_British_islands_(Plate_125)_(8592917784).jpg\n",
+        "  \"https://upload.wikimedia.org/wikipedia/commons/1/1b/The_Coleoptera_of_the_British_islands_%28Plate_125%29_%288592917784%29.jpg\",\n",
+        "  # By Américo Toledano, Source: https://commons.wikimedia.org/wiki/File:Biblioteca_Maim%C3%B3nides,_Campus_Universitario_de_Rabanales_007.jpg\n",
+        "  \"https://upload.wikimedia.org/wikipedia/commons/thumb/0/0d/Biblioteca_Maim%C3%B3nides%2C_Campus_Universitario_de_Rabanales_007.jpg/1024px-Biblioteca_Maim%C3%B3nides%2C_Campus_Universitario_de_Rabanales_007.jpg\",\n",
+        "  # Source: https://commons.wikimedia.org/wiki/File:The_smaller_British_birds_(8053836633).jpg\n",
+        "  \"https://upload.wikimedia.org/wikipedia/commons/0/09/The_smaller_British_birds_%288053836633%29.jpg\",\n",
+        "  ]\n",
+        "\n",
+        "def detect_img(image_url):\n",
+        "  start_time = time.time()\n",
+        "  image_path = download_and_resize_image(image_url, 640, 480)\n",
+        "  run_detector(detector, image_path)\n",
+        "  end_time = time.time()\n",
+        "  print(\"Inference time:\",end_time-start_time)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "otPnrxMKIrj5"
+      },
+      "outputs": [],
+      "source": [
+        "detect_img(image_urls[0])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "H5F7DkD5NtOx"
+      },
+      "outputs": [],
+      "source": [
+        "detect_img(image_urls[1])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "DZ18R7dWNyoU"
+      },
+      "outputs": [],
+      "source": [
+        "detect_img(image_urls[2])"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [],
+      "name": "object_detection.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/retrieval_with_tf_hub_universal_encoder_qa.ipynb b/site/en/hub/tutorials/retrieval_with_tf_hub_universal_encoder_qa.ipynb
new file mode 100644
index 00000000000..0166a7408d5
--- /dev/null
+++ b/site/en/hub/tutorials/retrieval_with_tf_hub_universal_encoder_qa.ipynb
@@ -0,0 +1,361 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "VFMCdVJIIraw"
+      },
+      "source": [
+        "##### Copyright 2019 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "code",
+        "id": "ZxMYj8OpIrCp"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0fO2R2BBKx3l"
+      },
+      "source": [
+        "# Multilingual Universal Sentence Encoder Q&A Retrieval\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfBg1C5NB3X0"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/retrieval_with_tf_hub_universal_encoder_qa\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/retrieval_with_tf_hub_universal_encoder_qa.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/retrieval_with_tf_hub_universal_encoder_qa.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/retrieval_with_tf_hub_universal_encoder_qa.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/s?q=google%2Funiversal-sentence-encoder-multilingual-qa%2F3%20OR%20google%2Funiversal-sentence-encoder-qa%2F3\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub models</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "zsDm_WgMNlJQ"
+      },
+      "source": [
+        "This is a demo for using [Universal Encoder Multilingual Q&A model](https://tfhub.dev/google/universal-sentence-encoder-multilingual-qa/3) for question-answer retrieval of text, illustrating the use of **question_encoder** and **response_encoder** of the model. We use sentences from [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/) paragraphs as the demo dataset, each sentence and its context (the text surrounding the sentence) is encoded into high dimension embeddings with the **response_encoder**. These embeddings are stored in an index built using the [simpleneighbors](https://pypi.org/project/simpleneighbors/) library for question-answer retrieval.\n",
+        "\n",
+        "On retrieval a random question is selected from the [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/) dataset and encoded into high dimension embedding with the **question_encoder** and query the  simpleneighbors index returning a list of approximate nearest neighbors in semantic space."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "U0eOW2LTWiLg"
+      },
+      "source": [
+        "### More models\n",
+        "You can find all currently hosted text embedding models [here](https://tfhub.dev/s?module-type=text-embedding) and all models that have been trained on SQuAD as well [here](https://tfhub.dev/s?dataset=squad)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ORy-KvWXGXBo"
+      },
+      "source": [
+        "## Setup\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "both",
+        "id": "x00t_uJCEbeb"
+      },
+      "outputs": [],
+      "source": [
+        "%%capture\n",
+        "#@title Setup Environment\n",
+        "# Install the latest Tensorflow version.\n",
+        "!pip install -q \"tensorflow-text==2.11.*\"\n",
+        "!pip install -q simpleneighbors[annoy]\n",
+        "!pip install -q nltk\n",
+        "!pip install -q tqdm"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "DmeFAuVsyWxg"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Setup common imports and functions\n",
+        "import json\n",
+        "import nltk\n",
+        "import os\n",
+        "import pprint\n",
+        "import random\n",
+        "import simpleneighbors\n",
+        "import urllib\n",
+        "from IPython.display import HTML, display\n",
+        "from tqdm.notebook import tqdm\n",
+        "\n",
+        "import tensorflow.compat.v2 as tf\n",
+        "import tensorflow_hub as hub\n",
+        "from tensorflow_text import SentencepieceTokenizer\n",
+        "\n",
+        "nltk.download('punkt')\n",
+        "\n",
+        "\n",
+        "def download_squad(url):\n",
+        "  return json.load(urllib.request.urlopen(url))\n",
+        "\n",
+        "def extract_sentences_from_squad_json(squad):\n",
+        "  all_sentences = []\n",
+        "  for data in squad['data']:\n",
+        "    for paragraph in data['paragraphs']:\n",
+        "      sentences = nltk.tokenize.sent_tokenize(paragraph['context'])\n",
+        "      all_sentences.extend(zip(sentences, [paragraph['context']] * len(sentences)))\n",
+        "  return list(set(all_sentences)) # remove duplicates\n",
+        "\n",
+        "def extract_questions_from_squad_json(squad):\n",
+        "  questions = []\n",
+        "  for data in squad['data']:\n",
+        "    for paragraph in data['paragraphs']:\n",
+        "      for qas in paragraph['qas']:\n",
+        "        if qas['answers']:\n",
+        "          questions.append((qas['question'], qas['answers'][0]['text']))\n",
+        "  return list(set(questions))\n",
+        "\n",
+        "def output_with_highlight(text, highlight):\n",
+        "  output = \"<li> \"\n",
+        "  i = text.find(highlight)\n",
+        "  while True:\n",
+        "    if i == -1:\n",
+        "      output += text\n",
+        "      break\n",
+        "    output += text[0:i]\n",
+        "    output += '<b>'+text[i:i+len(highlight)]+'</b>'\n",
+        "    text = text[i+len(highlight):]\n",
+        "    i = text.find(highlight)\n",
+        "  return output + \"</li>\\n\"\n",
+        "\n",
+        "def display_nearest_neighbors(query_text, answer_text=None):\n",
+        "  query_embedding = model.signatures['question_encoder'](tf.constant([query_text]))['outputs'][0]\n",
+        "  search_results = index.nearest(query_embedding, n=num_results)\n",
+        "\n",
+        "  if answer_text:\n",
+        "    result_md = '''\n",
+        "    <p>Random Question from SQuAD:</p>\n",
+        "    <p>&nbsp;&nbsp;<b>%s</b></p>\n",
+        "    <p>Answer:</p>\n",
+        "    <p>&nbsp;&nbsp;<b>%s</b></p>\n",
+        "    ''' % (query_text , answer_text)\n",
+        "  else:\n",
+        "    result_md = '''\n",
+        "    <p>Question:</p>\n",
+        "    <p>&nbsp;&nbsp;<b>%s</b></p>\n",
+        "    ''' % query_text\n",
+        "\n",
+        "  result_md += '''\n",
+        "    <p>Retrieved sentences :\n",
+        "    <ol>\n",
+        "  '''\n",
+        "\n",
+        "  if answer_text:\n",
+        "    for s in search_results:\n",
+        "      result_md += output_with_highlight(s, answer_text)\n",
+        "  else:\n",
+        "    for s in search_results:\n",
+        "      result_md += '<li>' + s + '</li>\\n'\n",
+        "\n",
+        "  result_md += \"</ol>\"\n",
+        "  display(HTML(result_md))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "1kbkT8i3FL_C"
+      },
+      "source": [
+        "Run the following code block to download and extract the SQuAD dataset into:\n",
+        "\n",
+        "* **sentences** is a list of (text, context) tuples - each paragraph from the SQuAD dataset are split into sentences using nltk library and the sentence and paragraph text forms the (text, context) tuple.\n",
+        "* **questions** is a list of (question, answer) tuples.\n",
+        "\n",
+        "Note: You can use this demo to index the SQuAD train dataset or the smaller dev dataset (1.1 or 2.0) by selecting the **squad_url** below.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "both",
+        "id": "iYqV2GAty_Eh"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Download and extract SQuAD data\n",
+        "squad_url = 'https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json' #@param [\"https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json\", \"https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json\", \"https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json\", \"https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json\"]\n",
+        "\n",
+        "squad_json = download_squad(squad_url)\n",
+        "sentences = extract_sentences_from_squad_json(squad_json)\n",
+        "questions = extract_questions_from_squad_json(squad_json)\n",
+        "print(\"%s sentences, %s questions extracted from SQuAD %s\" % (len(sentences), len(questions), squad_url))\n",
+        "\n",
+        "print(\"\\nExample sentence and context:\\n\")\n",
+        "sentence = random.choice(sentences)\n",
+        "print(\"sentence:\\n\")\n",
+        "pprint.pprint(sentence[0])\n",
+        "print(\"\\ncontext:\\n\")\n",
+        "pprint.pprint(sentence[1])\n",
+        "print()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9x3u-2uSGbDf"
+      },
+      "source": [
+        "The following code block setup the tensorflow graph **g** and **session** with the [Universal Encoder Multilingual Q&A model](https://tfhub.dev/google/universal-sentence-encoder-multilingual-qa/3)'s **question_encoder** and **response_encoder** signatures."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "44I0uCRQRiFO"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Load model from tensorflow hub\n",
+        "module_url = \"https://tfhub.dev/google/universal-sentence-encoder-multilingual-qa/3\" #@param [\"https://tfhub.dev/google/universal-sentence-encoder-multilingual-qa/3\", \"https://tfhub.dev/google/universal-sentence-encoder-qa/3\"]\n",
+        "model = hub.load(module_url)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "SCQpDmTZG0O6"
+      },
+      "source": [
+        "The following code block compute the embeddings for all the text, context tuples and store them in a [simpleneighbors](https://pypi.org/project/simpleneighbors/) index using the **response_encoder**.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "FwDUryIfSLp2"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Compute embeddings and build simpleneighbors index\n",
+        "batch_size = 100\n",
+        "\n",
+        "encodings = model.signatures['response_encoder'](\n",
+        "  input=tf.constant([sentences[0][0]]),\n",
+        "  context=tf.constant([sentences[0][1]]))\n",
+        "index = simpleneighbors.SimpleNeighbors(\n",
+        "    len(encodings['outputs'][0]), metric='angular')\n",
+        "\n",
+        "print('Computing embeddings for %s sentences' % len(sentences))\n",
+        "slices = zip(*(iter(sentences),) * batch_size)\n",
+        "num_batches = int(len(sentences) / batch_size)\n",
+        "for s in tqdm(slices, total=num_batches):\n",
+        "  response_batch = list([r for r, c in s])\n",
+        "  context_batch = list([c for r, c in s])\n",
+        "  encodings = model.signatures['response_encoder'](\n",
+        "    input=tf.constant(response_batch),\n",
+        "    context=tf.constant(context_batch)\n",
+        "  )\n",
+        "  for batch_index, batch in enumerate(response_batch):\n",
+        "    index.add_one(batch, encodings['outputs'][batch_index])\n",
+        "\n",
+        "index.build()\n",
+        "print('simpleneighbors index for %s sentences built.' % len(sentences))\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ZkNcjoPzHJpP"
+      },
+      "source": [
+        "On retrieval, the question is encoded using the **question_encoder** and the question embedding is used to query the simpleneighbors index."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "both",
+        "id": "J0xTw2w3UViK"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Retrieve nearest neighbors for a random question from SQuAD\n",
+        "num_results = 25 #@param {type:\"slider\", min:5, max:40, step:1}\n",
+        "\n",
+        "query = random.choice(questions)\n",
+        "display_nearest_neighbors(query[0], query[1])"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [
+        "VFMCdVJIIraw"
+      ],
+      "name": "retrieval_with_tf_hub_universal_encoder_qa.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/s3gan_generation_with_tf_hub.ipynb b/site/en/hub/tutorials/s3gan_generation_with_tf_hub.ipynb
new file mode 100644
index 00000000000..bd73cffebdf
--- /dev/null
+++ b/site/en/hub/tutorials/s3gan_generation_with_tf_hub.ipynb
@@ -0,0 +1,429 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "BhN1AplL0Hpv"
+      },
+      "source": [
+        "##### Copyright 2019 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "LMgeG2swVVi6"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "AqBuuwrIxlGs"
+      },
+      "source": [
+        "# Generating Images with Little Data Using S3GAN\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfBg1C5NB3X0"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/s3gan_generation_with_tf_hub\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/s3gan_generation_with_tf_hub.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/s3gan_generation_with_tf_hub.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/s3gan_generation_with_tf_hub.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/s?q=s3gan\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub models</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "p5AWAusyySDA"
+      },
+      "source": [
+        "This notebook is a demo of Generative Adversarial Networks trained on ImageNet with as little as 2.5% labeled data using self- and semi-supervised learning techniques. Both generator and discriminator models are available on [TF Hub](https://tfhub.dev/s?publisher=google&q=compare_gan).\n",
+        "\n",
+        "For more information about the models and the training procedure see our [blogpost](https://ai.googleblog.com/2019/03/reducing-need-for-labeled-data-in.html) and the [paper](https://arxiv.org/abs/1903.02271) [1].\n",
+        "The code for training these models is available on [GitHub](https://github.com/google/compare_gan).\n",
+        "\n",
+        "To get started, connect to a runtime and follow these steps:\n",
+        "\n",
+        "1. (Optional) Select a model in the second code cell below.\n",
+        "2. Click **Runtime > Run all** to run each cell in order.\n",
+        "  * Afterwards, the interactive visualizations should update automatically when you modify the settings using the sliders and dropdown menus.\n",
+        "\n",
+        "Note: if you run into any issues, you can try restarting the runtime and rerunning all cells from scratch by clicking **Runtime > Restart and run all...**.\n",
+        "\n",
+        "[1] Mario Lucic\\*, Michael Tschannen\\*, Marvin Ritter\\*, Xiaohua Zhai, Olivier\n",
+        "    Bachem, Sylvain Gelly, [High-Fidelity Image Generation With Fewer Labels](https://arxiv.org/abs/1903.02271), ICML 2019."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_m5jsOM9kXWP"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "NhlMa_tHs0_W"
+      },
+      "outputs": [],
+      "source": [
+        "# @title Imports and utility functions\n",
+        "import os\n",
+        "\n",
+        "import IPython\n",
+        "from IPython.display import display\n",
+        "import numpy as np\n",
+        "import PIL.Image\n",
+        "import pandas as pd\n",
+        "import six\n",
+        "\n",
+        "import tensorflow.compat.v1 as tf\n",
+        "tf.disable_v2_behavior()\n",
+        "\n",
+        "import tensorflow_hub as hub\n",
+        "\n",
+        "def imgrid(imarray, cols=8, pad=1):\n",
+        "  pad = int(pad)\n",
+        "  assert pad >= 0\n",
+        "  cols = int(cols)\n",
+        "  assert cols >= 1\n",
+        "  N, H, W, C = imarray.shape\n",
+        "  rows = int(np.ceil(N / float(cols)))\n",
+        "  batch_pad = rows * cols - N\n",
+        "  assert batch_pad >= 0\n",
+        "  post_pad = [batch_pad, pad, pad, 0]\n",
+        "  pad_arg = [[0, p] for p in post_pad]\n",
+        "  imarray = np.pad(imarray, pad_arg, 'constant')\n",
+        "  H += pad\n",
+        "  W += pad\n",
+        "  grid = (imarray\n",
+        "          .reshape(rows, cols, H, W, C)\n",
+        "          .transpose(0, 2, 1, 3, 4)\n",
+        "          .reshape(rows*H, cols*W, C))\n",
+        "  return grid[:-pad, :-pad]\n",
+        "\n",
+        "\n",
+        "def imshow(a, format='png', jpeg_fallback=True):\n",
+        "  a = np.asarray(a, dtype=np.uint8)\n",
+        "  if six.PY3:\n",
+        "    str_file = six.BytesIO()\n",
+        "  else:\n",
+        "    str_file = six.StringIO()\n",
+        "  PIL.Image.fromarray(a).save(str_file, format)\n",
+        "  png_data = str_file.getvalue()\n",
+        "  try:\n",
+        "    disp = display(IPython.display.Image(png_data))\n",
+        "  except IOError:\n",
+        "    if jpeg_fallback and format != 'jpeg':\n",
+        "      print ('Warning: image was too large to display in format \"{}\"; '\n",
+        "             'trying jpeg instead.').format(format)\n",
+        "      return imshow(a, format='jpeg')\n",
+        "    else:\n",
+        "      raise\n",
+        "  return disp\n",
+        "\n",
+        "\n",
+        "class Generator(object):\n",
+        "\n",
+        "  def __init__(self, module_spec):\n",
+        "    self._module_spec = module_spec\n",
+        "    self._sess = None\n",
+        "    self._graph = tf.Graph()\n",
+        "    self._load_model()\n",
+        "\n",
+        "  @property\n",
+        "  def z_dim(self):\n",
+        "    return self._z.shape[-1].value\n",
+        "\n",
+        "  @property\n",
+        "  def conditional(self):\n",
+        "    return self._labels is not None\n",
+        "\n",
+        "  def _load_model(self):\n",
+        "    with self._graph.as_default():\n",
+        "      self._generator = hub.Module(self._module_spec, name=\"gen_module\",\n",
+        "                                   tags={\"gen\", \"bsNone\"})\n",
+        "      input_info = self._generator.get_input_info_dict()\n",
+        "      inputs = {k: tf.placeholder(v.dtype, v.get_shape().as_list(), k)\n",
+        "                for k, v in self._generator.get_input_info_dict().items()}\n",
+        "      self._samples = self._generator(inputs=inputs, as_dict=True)[\"generated\"]\n",
+        "      print(\"Inputs:\", inputs)\n",
+        "      print(\"Outputs:\", self._samples)\n",
+        "      self._z = inputs[\"z\"]\n",
+        "      self._labels = inputs.get(\"labels\", None)\n",
+        "\n",
+        "  def _init_session(self):\n",
+        "    if self._sess is None:\n",
+        "      self._sess = tf.Session(graph=self._graph)\n",
+        "      self._sess.run(tf.global_variables_initializer())\n",
+        "\n",
+        "  def get_noise(self, num_samples, seed=None):\n",
+        "    if np.isscalar(seed):\n",
+        "      np.random.seed(seed)\n",
+        "      return np.random.normal(size=[num_samples, self.z_dim])\n",
+        "    z = np.empty(shape=(len(seed), self.z_dim), dtype=np.float32)\n",
+        "    for i, s in enumerate(seed):\n",
+        "      np.random.seed(s)\n",
+        "      z[i] = np.random.normal(size=[self.z_dim])\n",
+        "    return z\n",
+        "\n",
+        "  def get_samples(self, z, labels=None):\n",
+        "    with self._graph.as_default():\n",
+        "      self._init_session()\n",
+        "      feed_dict = {self._z: z}\n",
+        "      if self.conditional:\n",
+        "        assert labels is not None\n",
+        "        assert labels.shape[0] == z.shape[0]\n",
+        "        feed_dict[self._labels] = labels\n",
+        "      samples = self._sess.run(self._samples, feed_dict=feed_dict)\n",
+        "      return np.uint8(np.clip(256 * samples, 0, 255))\n",
+        "\n",
+        "\n",
+        "class Discriminator(object):\n",
+        "\n",
+        "  def __init__(self, module_spec):\n",
+        "    self._module_spec = module_spec\n",
+        "    self._sess = None\n",
+        "    self._graph = tf.Graph()\n",
+        "    self._load_model()\n",
+        "\n",
+        "  @property\n",
+        "  def conditional(self):\n",
+        "    return \"labels\" in self._inputs\n",
+        "\n",
+        "  @property\n",
+        "  def image_shape(self):\n",
+        "    return self._inputs[\"images\"].shape.as_list()[1:]\n",
+        "\n",
+        "  def _load_model(self):\n",
+        "    with self._graph.as_default():\n",
+        "      self._discriminator = hub.Module(self._module_spec, name=\"disc_module\",\n",
+        "                                       tags={\"disc\", \"bsNone\"})\n",
+        "      input_info = self._discriminator.get_input_info_dict()\n",
+        "      self._inputs = {k: tf.placeholder(v.dtype, v.get_shape().as_list(), k)\n",
+        "                      for k, v in input_info.items()}\n",
+        "      self._outputs = self._discriminator(inputs=self._inputs, as_dict=True)\n",
+        "      print(\"Inputs:\", self._inputs)\n",
+        "      print(\"Outputs:\", self._outputs)\n",
+        "\n",
+        "  def _init_session(self):\n",
+        "    if self._sess is None:\n",
+        "      self._sess = tf.Session(graph=self._graph)\n",
+        "      self._sess.run(tf.global_variables_initializer())\n",
+        "\n",
+        "  def predict(self, images, labels=None):\n",
+        "    with self._graph.as_default():\n",
+        "      self._init_session()\n",
+        "      feed_dict = {self._inputs[\"images\"]: images}\n",
+        "      if \"labels\" in self._inputs:\n",
+        "        assert labels is not None\n",
+        "        assert labels.shape[0] == images.shape[0]\n",
+        "        feed_dict[self._inputs[\"labels\"]] = labels\n",
+        "      return self._sess.run(self._outputs, feed_dict=feed_dict)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "msTFS1UPkugr"
+      },
+      "source": [
+        "## Select a model"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "-hBEi9IFdoI-"
+      },
+      "outputs": [],
+      "source": [
+        "# @title Select a model { run: \"auto\" }\n",
+        "\n",
+        "model_name = \"S3GAN 128x128 20% labels (FID 6.9, IS 98.1)\"  # @param [\"S3GAN 256x256 10% labels (FID 8.8, IS 130.7)\", \"S3GAN 128x128 2.5% labels (FID 12.6, IS 48.7)\", \"S3GAN 128x128 5% labels (FID 8.4, IS 74.0)\", \"S3GAN 128x128 10% labels (FID 7.6, IS 90.3)\", \"S3GAN 128x128 20% labels (FID 6.9, IS 98.1)\"]\n",
+        "models = {\n",
+        "    \"S3GAN 256x256 10% labels\": \"https://tfhub.dev/google/compare_gan/s3gan_10_256x256/1\",\n",
+        "    \"S3GAN 128x128 2.5% labels\": \"https://tfhub.dev/google/compare_gan/s3gan_2_5_128x128/1\",\n",
+        "    \"S3GAN 128x128 5% labels\": \"https://tfhub.dev/google/compare_gan/s3gan_5_128x128/1\",\n",
+        "    \"S3GAN 128x128 10% labels\": \"https://tfhub.dev/google/compare_gan/s3gan_10_128x128/1\",\n",
+        "    \"S3GAN 128x128 20% labels\": \"https://tfhub.dev/google/compare_gan/s3gan_20_128x128/1\",\n",
+        "}\n",
+        "\n",
+        "module_spec = models[model_name.split(\" (\")[0]]\n",
+        "print(\"Module spec:\", module_spec)\n",
+        "\n",
+        "tf.reset_default_graph()\n",
+        "print(\"Loading model...\")\n",
+        "sampler = Generator(module_spec)\n",
+        "print(\"Model loaded.\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ePQuAme_kxLj"
+      },
+      "source": [
+        "## Sample"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "kGgTXtFYq_FV"
+      },
+      "outputs": [],
+      "source": [
+        "# @title Sampling { run: \"auto\" }\n",
+        "\n",
+        "num_rows = 2  # @param {type: \"slider\", min:1, max:16}\n",
+        "num_cols = 3  # @param {type: \"slider\", min:1, max:16}\n",
+        "noise_seed = 23  # @param {type:\"slider\", min:0, max:100, step:1}\n",
+        "label_str = \"980) volcano\"  # @param [\"-1) Random\", \"0) tench, Tinca tinca\", \"1) goldfish, Carassius auratus\", \"2) great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias\", \"3) tiger shark, Galeocerdo cuvieri\", \"4) hammerhead, hammerhead shark\", \"5) electric ray, crampfish, numbfish, torpedo\", \"6) stingray\", \"7) cock\", \"8) hen\", \"9) ostrich, Struthio camelus\", \"10) brambling, Fringilla montifringilla\", \"11) goldfinch, Carduelis carduelis\", \"12) house finch, linnet, Carpodacus mexicanus\", \"13) junco, snowbird\", \"14) indigo bunting, indigo finch, indigo bird, Passerina cyanea\", \"15) robin, American robin, Turdus migratorius\", \"16) bulbul\", \"17) jay\", \"18) magpie\", \"19) chickadee\", \"20) water ouzel, dipper\", \"21) kite\", \"22) bald eagle, American eagle, Haliaeetus leucocephalus\", \"23) vulture\", \"24) great grey owl, great gray owl, Strix nebulosa\", \"25) European fire salamander, Salamandra salamandra\", \"980) volcano\"]\n",
+        "\n",
+        "num_samples = num_rows * num_cols\n",
+        "z = sampler.get_noise(num_samples, seed=noise_seed)\n",
+        "\n",
+        "label = int(label_str.split(')')[0])\n",
+        "if label == -1:\n",
+        "  labels = np.random.randint(0, num_classes, size=(num_samples))\n",
+        "else:\n",
+        "  labels = np.asarray([label] * num_samples)\n",
+        "\n",
+        "samples = sampler.get_samples(z, labels)\n",
+        "imshow(imgrid(samples, cols=num_cols))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "vCffdVZvTtxL"
+      },
+      "outputs": [],
+      "source": [
+        "# @title Interpolation { run: \"auto\" }\n",
+        "\n",
+        "num_samples = 1  # @param {type: \"slider\", min: 1, max: 6, step: 1}\n",
+        "num_interps = 6  # @param {type: \"slider\", min: 2, max: 10, step: 1}\n",
+        "noise_seed_A = 11  # @param {type: \"slider\", min: 0, max: 100, step: 1}\n",
+        "noise_seed_B = 0  # @param {type: \"slider\", min: 0, max: 100, step: 1}\n",
+        "label_str = \"1) goldfish, Carassius auratus\"  # @param [\"0) tench, Tinca tinca\", \"1) goldfish, Carassius auratus\", \"2) great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias\", \"3) tiger shark, Galeocerdo cuvieri\", \"4) hammerhead, hammerhead shark\", \"5) electric ray, crampfish, numbfish, torpedo\", \"6) stingray\", \"7) cock\", \"8) hen\", \"9) ostrich, Struthio camelus\", \"10) brambling, Fringilla montifringilla\", \"11) goldfinch, Carduelis carduelis\", \"12) house finch, linnet, Carpodacus mexicanus\", \"13) junco, snowbird\", \"14) indigo bunting, indigo finch, indigo bird, Passerina cyanea\", \"15) robin, American robin, Turdus migratorius\", \"16) bulbul\", \"17) jay\", \"18) magpie\", \"19) chickadee\", \"20) water ouzel, dipper\", \"21) kite\", \"22) bald eagle, American eagle, Haliaeetus leucocephalus\", \"23) vulture\", \"24) great grey owl, great gray owl, Strix nebulosa\", \"25) European fire salamander, Salamandra salamandra\"]\n",
+        "\n",
+        "\n",
+        "def interpolate(A, B, num_interps):\n",
+        "  alphas = np.linspace(0, 1, num_interps)\n",
+        "  if A.shape != B.shape:\n",
+        "    raise ValueError('A and B must have the same shape to interpolate.')\n",
+        "  return np.array([((1-a)*A + a*B)/np.sqrt(a**2 + (1-a)**2) for a in alphas])\n",
+        "\n",
+        "\n",
+        "def interpolate_and_shape(A, B, num_interps):\n",
+        "  interps = interpolate(A, B, num_interps)\n",
+        "  return (interps.transpose(1, 0, *range(2, len(interps.shape)))\n",
+        "                 .reshape(num_samples * num_interps, -1))\n",
+        "\n",
+        "label = int(label_str.split(')')[0])\n",
+        "labels = np.asarray([label] * num_samples * num_interps)\n",
+        "\n",
+        "\n",
+        "z_A = sampler.get_noise(num_samples, seed=noise_seed_A)\n",
+        "z_B = sampler.get_noise(num_samples, seed=noise_seed_B)\n",
+        "z = interpolate_and_shape(z_A, z_B, num_interps)\n",
+        "\n",
+        "samples = sampler.get_samples(z, labels)\n",
+        "imshow(imgrid(samples, cols=num_interps))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "esW0Up95Ob6U"
+      },
+      "source": [
+        "## Discriminator"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ButxPSq0OzgL"
+      },
+      "outputs": [],
+      "source": [
+        "disc = Discriminator(module_spec)\n",
+        "\n",
+        "batch_size = 4\n",
+        "num_classes = 1000\n",
+        "images = np.random.random(size=[batch_size] + disc.image_shape)\n",
+        "labels = np.random.randint(0, num_classes, size=(batch_size))\n",
+        "\n",
+        "disc.predict(images, labels=labels)"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [
+        "BhN1AplL0Hpv"
+      ],
+      "name": "s3gan_generation_with_tf_hub.ipynb",
+      "provenance": [],
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/semantic_approximate_nearest_neighbors.ipynb b/site/en/hub/tutorials/semantic_approximate_nearest_neighbors.ipynb
new file mode 100644
index 00000000000..55bcebcc447
--- /dev/null
+++ b/site/en/hub/tutorials/semantic_approximate_nearest_neighbors.ipynb
@@ -0,0 +1,882 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ACbjNjyO4f_8"
+      },
+      "source": [
+        "##### Copyright 2019 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "MCM50vaM4jiK"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9qOVy-_vmuUP"
+      },
+      "source": [
+        "# Semantic Search with Approximate Nearest Neighbors and Text Embeddings\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfBg1C5NB3X0"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/semantic_approximate_nearest_neighbors\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/semantic_approximate_nearest_neighbors.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/semantic_approximate_nearest_neighbors.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/semantic_approximate_nearest_neighbors.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/google/universal-sentence-encoder/2\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub models</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "7Hks9F5qq6m2"
+      },
+      "source": [
+        "This tutorial illustrates how to generate embeddings from a [TensorFlow Hub](https://tfhub.dev) (TF-Hub) module given input data, and build an approximate nearest neighbours (ANN) index using the extracted embeddings. The index can then be used for real-time similarity matching and retrieval. \n",
+        "\n",
+        "When dealing with a large corpus of data, it's not efficient to perform exact matching by scanning the whole repository to find the most similar items to a given query in real-time. Thus, we use an approximate similarity matching algorithm which allows us to trade off a little bit of accuracy in finding exact nearest neighbor matches for a significant boost in speed. \n",
+        "\n",
+        "In this tutorial, we show an example of real-time text search over a corpus of news headlines to find the headlines that are most similar to a query. Unlike keyword search, this captures the semantic similarity encoded in the text embedding.\n",
+        "\n",
+        "The steps of this tutorial are:\n",
+        "1. Download sample data.\n",
+        "2. Generate embeddings for the data using a TF-Hub module\n",
+        "3. Build an ANN index for the embeddings\n",
+        "4. Use the index for similarity matching\n",
+        "\n",
+        "We use [Apache Beam](https://beam.apache.org/documentation/programming-guide/) with [TensorFlow Transform](https://www.tensorflow.org/tfx/tutorials/transform/simple) (TF-Transform) to generate the embeddings from the TF-Hub module. We also use Spotify's [ANNOY](https://github.com/spotify/annoy) library to build the approximate nearest neighbours index. You can find benchmarking of ANN framework in this [Github repository](https://github.com/erikbern/ann-benchmarks).\n",
+        "\n",
+        "This tutorial uses TensorFlow 1.0 and works only with TF1 [Hub modules](https://www.tensorflow.org/hub/tf1_hub_module) from TF-Hub. See the updated [TF2 version of this tutorial](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_semantic_approximate_nearest_neighbors.ipynb)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Q0jr0QK9qO5P"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "whMRj9qeqed4"
+      },
+      "source": [
+        "Install the required libraries."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "qmXkLPoaqS--"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install -q apache_beam\n",
+        "!pip install -q 'scikit_learn~=0.23.0'  # For gaussian_random_matrix.\n",
+        "!pip install -q annoy"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "A-vBZiCCqld0"
+      },
+      "source": [
+        "Import the required libraries"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "6NTYbdWcseuK"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "import sys\n",
+        "import pathlib\n",
+        "import pickle\n",
+        "from collections import namedtuple\n",
+        "from datetime import datetime\n",
+        "\n",
+        "import numpy as np\n",
+        "import apache_beam as beam\n",
+        "import annoy\n",
+        "from sklearn.random_projection import gaussian_random_matrix\n",
+        "\n",
+        "import tensorflow.compat.v1 as tf\n",
+        "import tensorflow_hub as hub"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "_GF0GnLqGdPQ"
+      },
+      "outputs": [],
+      "source": [
+        "# TFT needs to be installed afterwards\n",
+        "!pip install -q tensorflow_transform==0.24\n",
+        "import tensorflow_transform as tft\n",
+        "import tensorflow_transform.beam as tft_beam"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "tx0SZa6-7b-f"
+      },
+      "outputs": [],
+      "source": [
+        "print('TF version: {}'.format(tf.__version__))\n",
+        "print('TF-Hub version: {}'.format(hub.__version__))\n",
+        "print('TF-Transform version: {}'.format(tft.__version__))\n",
+        "print('Apache Beam version: {}'.format(beam.__version__))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "P6Imq876rLWx"
+      },
+      "source": [
+        "## 1. Download Sample Data\n",
+        "\n",
+        "[A Million News Headlines](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/SYBGZL#) dataset contains news headlines published over a period of 15 years sourced from the reputable Australian Broadcasting Corp. (ABC). This news dataset has a summarised historical record of noteworthy events in the globe from early-2003 to end-2017 with a more granular focus on Australia. \n",
+        "\n",
+        "**Format**: Tab-separated two-column data: 1) publication date and 2) headline text. We are only interested in the headline text.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "OpF57n8e5C9D"
+      },
+      "outputs": [],
+      "source": [
+        "!wget 'https://dataverse.harvard.edu/api/access/datafile/3450625?format=tab&gbrecs=true' -O raw.tsv\n",
+        "!wc -l raw.tsv\n",
+        "!head raw.tsv"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Reeoc9z0zTxJ"
+      },
+      "source": [
+        "For simplicity, we only keep the headline text and remove the publication date"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "INPWa4upv_yJ"
+      },
+      "outputs": [],
+      "source": [
+        "!rm -r corpus\n",
+        "!mkdir corpus\n",
+        "\n",
+        "with open('corpus/text.txt', 'w') as out_file:\n",
+        "  with open('raw.tsv', 'r') as in_file:\n",
+        "    for line in in_file:\n",
+        "      headline = line.split('\\t')[1].strip().strip('\"')\n",
+        "      out_file.write(headline+\"\\n\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "5-oedX40z6o2"
+      },
+      "outputs": [],
+      "source": [
+        "!tail corpus/text.txt"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ls0Zh7kYz3PM"
+      },
+      "source": [
+        "## Helper function to load a TF-Hub module"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "vSt_jmyKz3Xp"
+      },
+      "outputs": [],
+      "source": [
+        "def load_module(module_url):\n",
+        "  embed_module = hub.Module(module_url)\n",
+        "  placeholder = tf.placeholder(dtype=tf.string)\n",
+        "  embed = embed_module(placeholder)\n",
+        "  session = tf.Session()\n",
+        "  session.run([tf.global_variables_initializer(), tf.tables_initializer()])\n",
+        "  print('TF-Hub module is loaded.')\n",
+        "\n",
+        "  def _embeddings_fn(sentences):\n",
+        "    computed_embeddings = session.run(\n",
+        "        embed, feed_dict={placeholder: sentences})\n",
+        "    return computed_embeddings\n",
+        "\n",
+        "  return _embeddings_fn"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2AngMtH50jNb"
+      },
+      "source": [
+        "## 2. Generate Embeddings for the Data.\n",
+        "\n",
+        "In this tutorial, we use the [Universal Sentence Encoder](https://tfhub.dev/google/universal-sentence-encoder/2) to generate embeddings for the headline data. The sentence embeddings can then be easily used to compute sentence level meaning similarity. We run the embedding generation process using Apache Beam and TF-Transform."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "F_DvXnDB1pEX"
+      },
+      "source": [
+        "### Embedding extraction method"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "yL7OEY1E0A35"
+      },
+      "outputs": [],
+      "source": [
+        "encoder = None\n",
+        "\n",
+        "def embed_text(text, module_url, random_projection_matrix):\n",
+        "  # Beam will run this function in different processes that need to\n",
+        "  # import hub and load embed_fn (if not previously loaded)\n",
+        "  global encoder\n",
+        "  if not encoder:\n",
+        "    encoder = hub.Module(module_url)\n",
+        "  embedding = encoder(text)\n",
+        "  if random_projection_matrix is not None:\n",
+        "    # Perform random projection for the embedding\n",
+        "    embedding = tf.matmul(\n",
+        "        embedding, tf.cast(random_projection_matrix, embedding.dtype))\n",
+        "  return embedding\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_don5gXy9D59"
+      },
+      "source": [
+        "### Make TFT preprocess_fn method"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "fwYlrzzK9ECE"
+      },
+      "outputs": [],
+      "source": [
+        "def make_preprocess_fn(module_url, random_projection_matrix=None):\n",
+        "  '''Makes a tft preprocess_fn'''\n",
+        "\n",
+        "  def _preprocess_fn(input_features):\n",
+        "    '''tft preprocess_fn'''\n",
+        "    text = input_features['text']\n",
+        "    # Generate the embedding for the input text\n",
+        "    embedding = embed_text(text, module_url, random_projection_matrix)\n",
+        "    \n",
+        "    output_features = {\n",
+        "        'text': text, \n",
+        "        'embedding': embedding\n",
+        "        }\n",
+        "        \n",
+        "    return output_features\n",
+        "  \n",
+        "  return _preprocess_fn"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "SQ492LN7A-NZ"
+      },
+      "source": [
+        "### Create dataset metadata"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "d2D4332VA-2V"
+      },
+      "outputs": [],
+      "source": [
+        "def create_metadata():\n",
+        "  '''Creates metadata for the raw data'''\n",
+        "  from tensorflow_transform.tf_metadata import dataset_metadata\n",
+        "  from tensorflow_transform.tf_metadata import schema_utils\n",
+        "  feature_spec = {'text': tf.FixedLenFeature([], dtype=tf.string)}\n",
+        "  schema = schema_utils.schema_from_feature_spec(feature_spec)\n",
+        "  metadata = dataset_metadata.DatasetMetadata(schema)\n",
+        "  return metadata"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5zlSLPzRBm6H"
+      },
+      "source": [
+        "### Beam pipeline"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "jCGUIB172m2G"
+      },
+      "outputs": [],
+      "source": [
+        "def run_hub2emb(args):\n",
+        "  '''Runs the embedding generation pipeline'''\n",
+        "\n",
+        "  options = beam.options.pipeline_options.PipelineOptions(**args)\n",
+        "  args = namedtuple(\"options\", args.keys())(*args.values())\n",
+        "\n",
+        "  raw_metadata = create_metadata()\n",
+        "  converter = tft.coders.CsvCoder(\n",
+        "      column_names=['text'], schema=raw_metadata.schema)\n",
+        "\n",
+        "  with beam.Pipeline(args.runner, options=options) as pipeline:\n",
+        "    with tft_beam.Context(args.temporary_dir):\n",
+        "      # Read the sentences from the input file\n",
+        "      sentences = ( \n",
+        "          pipeline\n",
+        "          | 'Read sentences from files' >> beam.io.ReadFromText(\n",
+        "              file_pattern=args.data_dir)\n",
+        "          | 'Convert to dictionary' >> beam.Map(converter.decode)\n",
+        "      )\n",
+        "\n",
+        "      sentences_dataset = (sentences, raw_metadata)\n",
+        "      preprocess_fn = make_preprocess_fn(args.module_url, args.random_projection_matrix)\n",
+        "      # Generate the embeddings for the sentence using the TF-Hub module\n",
+        "      embeddings_dataset, _ = (\n",
+        "          sentences_dataset\n",
+        "          | 'Extract embeddings' >> tft_beam.AnalyzeAndTransformDataset(preprocess_fn)\n",
+        "      )\n",
+        "\n",
+        "      embeddings, transformed_metadata = embeddings_dataset\n",
+        "      # Write the embeddings to TFRecords files\n",
+        "      embeddings | 'Write embeddings to TFRecords' >> beam.io.tfrecordio.WriteToTFRecord(\n",
+        "          file_path_prefix='{}/emb'.format(args.output_dir),\n",
+        "          file_name_suffix='.tfrecords',\n",
+        "          coder=tft.coders.ExampleProtoCoder(transformed_metadata.schema))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "uHbq4t2gCDAG"
+      },
+      "source": [
+        "### Generaring Random Projection Weight Matrix\n",
+        "\n",
+        "[Random projection](https://en.wikipedia.org/wiki/Random_projection) is a simple, yet powerfull technique used to reduce the dimensionality of a set of points which lie in Euclidean space. For a theoretical background, see the [Johnson-Lindenstrauss lemma](https://en.wikipedia.org/wiki/Johnson%E2%80%93Lindenstrauss_lemma).\n",
+        "\n",
+        "Reducing the dimensionality of the embeddings with random projection means less time needed to build and query the ANN index.\n",
+        "\n",
+        "In this tutorial we use [Gaussian Random Projection](https://en.wikipedia.org/wiki/Random_projection#Gaussian_random_projection) from the [Scikit-learn](https://scikit-learn.org/stable/modules/random_projection.html#gaussian-random-projection) library."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "T1aYPeOUCDIP"
+      },
+      "outputs": [],
+      "source": [
+        "def generate_random_projection_weights(original_dim, projected_dim):\n",
+        "  random_projection_matrix = None\n",
+        "  if projected_dim and original_dim > projected_dim:\n",
+        "    random_projection_matrix = gaussian_random_matrix(\n",
+        "        n_components=projected_dim, n_features=original_dim).T\n",
+        "    print(\"A Gaussian random weight matrix was creates with shape of {}\".format(random_projection_matrix.shape))\n",
+        "    print('Storing random projection matrix to disk...')\n",
+        "    with open('random_projection_matrix', 'wb') as handle:\n",
+        "      pickle.dump(random_projection_matrix, \n",
+        "                  handle, protocol=pickle.HIGHEST_PROTOCOL)\n",
+        "        \n",
+        "  return random_projection_matrix"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "CHxZX2Z3Nk64"
+      },
+      "source": [
+        "### Set parameters\n",
+        "If you want to build an index using the original embedding space without random projection, set the `projected_dim` parameter to `None`. Note that this will slow down the indexing step for high-dimensional embeddings."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "feMVXFL0NlIM"
+      },
+      "outputs": [],
+      "source": [
+        "module_url = 'https://tfhub.dev/google/universal-sentence-encoder/2' #@param {type:\"string\"}\n",
+        "projected_dim = 64  #@param {type:\"number\"}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "On-MbzD922kb"
+      },
+      "source": [
+        "### Run pipeline"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Y3I1Wv4i21yY"
+      },
+      "outputs": [],
+      "source": [
+        "import tempfile\n",
+        "\n",
+        "output_dir = pathlib.Path(tempfile.mkdtemp())\n",
+        "temporary_dir = pathlib.Path(tempfile.mkdtemp())\n",
+        "\n",
+        "g = tf.Graph()\n",
+        "with g.as_default():\n",
+        "  original_dim = load_module(module_url)(['']).shape[1]\n",
+        "  random_projection_matrix = None\n",
+        "\n",
+        "  if projected_dim:\n",
+        "    random_projection_matrix = generate_random_projection_weights(\n",
+        "        original_dim, projected_dim)\n",
+        "\n",
+        "args = {\n",
+        "    'job_name': 'hub2emb-{}'.format(datetime.utcnow().strftime('%y%m%d-%H%M%S')),\n",
+        "    'runner': 'DirectRunner',\n",
+        "    'batch_size': 1024,\n",
+        "    'data_dir': 'corpus/*.txt',\n",
+        "    'output_dir': output_dir,\n",
+        "    'temporary_dir': temporary_dir,\n",
+        "    'module_url': module_url,\n",
+        "    'random_projection_matrix': random_projection_matrix,\n",
+        "}\n",
+        "\n",
+        "print(\"Pipeline args are set.\")\n",
+        "args"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "iS9obmeP4ZOA"
+      },
+      "outputs": [],
+      "source": [
+        "!rm -r {output_dir}\n",
+        "!rm -r {temporary_dir}\n",
+        "\n",
+        "print(\"Running pipeline...\")\n",
+        "%time run_hub2emb(args)\n",
+        "print(\"Pipeline is done.\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "JAwOo7gQWvVd"
+      },
+      "outputs": [],
+      "source": [
+        "!ls {output_dir}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "HVnee4e6U90u"
+      },
+      "source": [
+        "Read some of the generated embeddings..."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "-K7pGXlXOj1N"
+      },
+      "outputs": [],
+      "source": [
+        "import itertools\n",
+        "\n",
+        "embed_file = os.path.join(output_dir, 'emb-00000-of-00001.tfrecords')\n",
+        "sample = 5\n",
+        "record_iterator =  tf.io.tf_record_iterator(path=embed_file)\n",
+        "for string_record in itertools.islice(record_iterator, sample):\n",
+        "  example = tf.train.Example()\n",
+        "  example.ParseFromString(string_record)\n",
+        "  text = example.features.feature['text'].bytes_list.value\n",
+        "  embedding = np.array(example.features.feature['embedding'].float_list.value)\n",
+        "  print(\"Embedding dimensions: {}\".format(embedding.shape[0]))\n",
+        "  print(\"{}: {}\".format(text, embedding[:10]))\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "agGoaMSgY8wN"
+      },
+      "source": [
+        "## 3. Build the ANN Index for the Embeddings\n",
+        "\n",
+        "[ANNOY](https://github.com/spotify/annoy) (Approximate Nearest Neighbors Oh Yeah) is a C++ library with Python bindings to search for points in space that are close to a given query point. It also creates large read-only file-based data structures that are mmapped into memory. It is built and used by [Spotify](https://www.spotify.com) for music recommendations."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "UcPDspU3WjgH"
+      },
+      "outputs": [],
+      "source": [
+        "def build_index(embedding_files_pattern, index_filename, vector_length, \n",
+        "    metric='angular', num_trees=100):\n",
+        "  '''Builds an ANNOY index'''\n",
+        "\n",
+        "  annoy_index = annoy.AnnoyIndex(vector_length, metric=metric)\n",
+        "  # Mapping between the item and its identifier in the index\n",
+        "  mapping = {}\n",
+        "\n",
+        "  embed_files = tf.gfile.Glob(embedding_files_pattern)\n",
+        "  print('Found {} embedding file(s).'.format(len(embed_files)))\n",
+        "\n",
+        "  item_counter = 0\n",
+        "  for f, embed_file in enumerate(embed_files):\n",
+        "    print('Loading embeddings in file {} of {}...'.format(\n",
+        "      f+1, len(embed_files)))\n",
+        "    record_iterator = tf.io.tf_record_iterator(\n",
+        "      path=embed_file)\n",
+        "\n",
+        "    for string_record in record_iterator:\n",
+        "      example = tf.train.Example()\n",
+        "      example.ParseFromString(string_record)\n",
+        "      text = example.features.feature['text'].bytes_list.value[0].decode(\"utf-8\")\n",
+        "      mapping[item_counter] = text\n",
+        "      embedding = np.array(\n",
+        "        example.features.feature['embedding'].float_list.value)\n",
+        "      annoy_index.add_item(item_counter, embedding)\n",
+        "      item_counter += 1\n",
+        "      if item_counter % 100000 == 0:\n",
+        "        print('{} items loaded to the index'.format(item_counter))\n",
+        "\n",
+        "  print('A total of {} items added to the index'.format(item_counter))\n",
+        "\n",
+        "  print('Building the index with {} trees...'.format(num_trees))\n",
+        "  annoy_index.build(n_trees=num_trees)\n",
+        "  print('Index is successfully built.')\n",
+        "  \n",
+        "  print('Saving index to disk...')\n",
+        "  annoy_index.save(index_filename)\n",
+        "  print('Index is saved to disk.')\n",
+        "  print(\"Index file size: {} GB\".format(\n",
+        "    round(os.path.getsize(index_filename) / float(1024 ** 3), 2)))\n",
+        "  annoy_index.unload()\n",
+        "\n",
+        "  print('Saving mapping to disk...')\n",
+        "  with open(index_filename + '.mapping', 'wb') as handle:\n",
+        "    pickle.dump(mapping, handle, protocol=pickle.HIGHEST_PROTOCOL)\n",
+        "  print('Mapping is saved to disk.')\n",
+        "  print(\"Mapping file size: {} MB\".format(\n",
+        "    round(os.path.getsize(index_filename + '.mapping') / float(1024 ** 2), 2)))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "AgyOQhUq6FNE"
+      },
+      "outputs": [],
+      "source": [
+        "embedding_files = \"{}/emb-*.tfrecords\".format(output_dir)\n",
+        "embedding_dimension = projected_dim\n",
+        "index_filename = \"index\"\n",
+        "\n",
+        "!rm {index_filename}\n",
+        "!rm {index_filename}.mapping\n",
+        "\n",
+        "%time build_index(embedding_files, index_filename, embedding_dimension)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Ic31Tm5cgAd5"
+      },
+      "outputs": [],
+      "source": [
+        "!ls"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "maGxDl8ufP-p"
+      },
+      "source": [
+        "## 4. Use the Index for Similarity Matching\n",
+        "Now we can use the ANN index to find news headlines that are semantically close to an input query."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_dIs8W78fYPp"
+      },
+      "source": [
+        "### Load the index and the mapping files"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "jlTTrbQHayvb"
+      },
+      "outputs": [],
+      "source": [
+        "index = annoy.AnnoyIndex(embedding_dimension)\n",
+        "index.load(index_filename, prefault=True)\n",
+        "print('Annoy index is loaded.')\n",
+        "with open(index_filename + '.mapping', 'rb') as handle:\n",
+        "  mapping = pickle.load(handle)\n",
+        "print('Mapping file is loaded.')\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "y6liFMSUh08J"
+      },
+      "source": [
+        "### Similarity matching method"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "mUxjTag8hc16"
+      },
+      "outputs": [],
+      "source": [
+        "def find_similar_items(embedding, num_matches=5):\n",
+        "  '''Finds similar items to a given embedding in the ANN index'''\n",
+        "  ids = index.get_nns_by_vector(\n",
+        "  embedding, num_matches, search_k=-1, include_distances=False)\n",
+        "  items = [mapping[i] for i in ids]\n",
+        "  return items"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hjerNpmZja0A"
+      },
+      "source": [
+        "### Extract embedding from a given query"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "a0IIXzfBjZ19"
+      },
+      "outputs": [],
+      "source": [
+        "# Load the TF-Hub module\n",
+        "print(\"Loading the TF-Hub module...\")\n",
+        "g = tf.Graph()\n",
+        "with g.as_default():\n",
+        "  embed_fn = load_module(module_url)\n",
+        "print(\"TF-Hub module is loaded.\")\n",
+        "\n",
+        "random_projection_matrix = None\n",
+        "if os.path.exists('random_projection_matrix'):\n",
+        "  print(\"Loading random projection matrix...\")\n",
+        "  with open('random_projection_matrix', 'rb') as handle:\n",
+        "    random_projection_matrix = pickle.load(handle)\n",
+        "  print('random projection matrix is loaded.')\n",
+        "\n",
+        "def extract_embeddings(query):\n",
+        "  '''Generates the embedding for the query'''\n",
+        "  query_embedding =  embed_fn([query])[0]\n",
+        "  if random_projection_matrix is not None:\n",
+        "    query_embedding = query_embedding.dot(random_projection_matrix)\n",
+        "  return query_embedding"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "kCoCNROujEIO"
+      },
+      "outputs": [],
+      "source": [
+        "extract_embeddings(\"Hello Machine Learning!\")[:10]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nE_Q60nCk_ZB"
+      },
+      "source": [
+        "### Enter a query to find the most similar items"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "wC0uLjvfk5nB"
+      },
+      "outputs": [],
+      "source": [
+        "#@title { run: \"auto\" }\n",
+        "query = \"confronting global challenges\" #@param {type:\"string\"}\n",
+        "print(\"Generating embedding for the query...\")\n",
+        "%time query_embedding = extract_embeddings(query)\n",
+        "\n",
+        "print(\"\")\n",
+        "print(\"Finding relevant items in the index...\")\n",
+        "%time items = find_similar_items(query_embedding, 10)\n",
+        "\n",
+        "print(\"\")\n",
+        "print(\"Results:\")\n",
+        "print(\"=========\")\n",
+        "for item in items:\n",
+        "  print(item)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wwtMtyOeDKwt"
+      },
+      "source": [
+        "## Want to learn more?\n",
+        "\n",
+        "You can learn more about TensorFlow at [tensorflow.org](https://www.tensorflow.org/) and see the TF-Hub API documentation at [tensorflow.org/hub](https://www.tensorflow.org/hub/). Find available TensorFlow Hub modules at [tfhub.dev](https://tfhub.dev/) including more text embedding modules and image feature vector modules.\n",
+        "\n",
+        "Also check out the [Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course/) which is Google's fast-paced, practical introduction to machine learning."
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [
+        "ls0Zh7kYz3PM",
+        "_don5gXy9D59",
+        "SQ492LN7A-NZ"
+      ],
+      "name": "semantic_approximate_nearest_neighbors.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder.ipynb b/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder.ipynb
new file mode 100644
index 00000000000..0c2874bc030
--- /dev/null
+++ b/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder.ipynb
@@ -0,0 +1,363 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "RUymE2l9GZfO"
+      },
+      "source": [
+        "##### Copyright 2018 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "code",
+        "id": "JMyTNwSJGGWg"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "co7MV6sX7Xto"
+      },
+      "source": [
+        "# Universal Sentence Encoder\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfBg1C5NB3X0"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/s?q=google%2Funiversal-sentence-encoder%2F4%20OR%20google%2Funiversal-sentence-encoder-large%2F5\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub models</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "eAVQGidpL8v5"
+      },
+      "source": [
+        "This notebook illustrates how to access the Universal Sentence Encoder and use it for sentence similarity and sentence classification tasks.\n",
+        "\n",
+        "The Universal Sentence Encoder makes getting sentence level embeddings as easy as it has historically been to lookup the embeddings for individual words. The sentence embeddings can then be trivially used to compute sentence level meaning similarity as well as to enable better performance on downstream classification tasks using less supervised training data.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "pOTzp8O36CyQ"
+      },
+      "source": [
+        "## Setup\n",
+        "\n",
+        "This section sets up the environment for access to the Universal Sentence Encoder on TF Hub and provides examples of applying the encoder to words, sentences, and paragraphs."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "lVjNK8shFKOC"
+      },
+      "outputs": [],
+      "source": [
+        "%%capture\n",
+        "!pip3 install seaborn"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "63Pd3nJnTl-i"
+      },
+      "source": [
+        "More detailed information about installing Tensorflow can be found at [https://www.tensorflow.org/install/](https://www.tensorflow.org/install/)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "zwty8Z6mAkdV"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Load the Universal Sentence Encoder's TF Hub module\n",
+        "from absl import logging\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "\n",
+        "import tensorflow_hub as hub\n",
+        "import matplotlib.pyplot as plt\n",
+        "import numpy as np\n",
+        "import os\n",
+        "import pandas as pd\n",
+        "import re\n",
+        "import seaborn as sns\n",
+        "\n",
+        "module_url = \"https://tfhub.dev/google/universal-sentence-encoder/4\" #@param [\"https://tfhub.dev/google/universal-sentence-encoder/4\", \"https://tfhub.dev/google/universal-sentence-encoder-large/5\"]\n",
+        "model = hub.load(module_url)\n",
+        "print (\"module %s loaded\" % module_url)\n",
+        "def embed(input):\n",
+        "  return model(input)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Q8F4LNGFqOiq"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Compute a representation for each message, showing various lengths supported.\n",
+        "word = \"Elephant\"\n",
+        "sentence = \"I am a sentence for which I would like to get its embedding.\"\n",
+        "paragraph = (\n",
+        "    \"Universal Sentence Encoder embeddings also support short paragraphs. \"\n",
+        "    \"There is no hard limit on how long the paragraph is. Roughly, the longer \"\n",
+        "    \"the more 'diluted' the embedding will be.\")\n",
+        "messages = [word, sentence, paragraph]\n",
+        "\n",
+        "# Reduce logging output.\n",
+        "logging.set_verbosity(logging.ERROR)\n",
+        "\n",
+        "message_embeddings = embed(messages)\n",
+        "\n",
+        "for i, message_embedding in enumerate(np.array(message_embeddings).tolist()):\n",
+        "  print(\"Message: {}\".format(messages[i]))\n",
+        "  print(\"Embedding size: {}\".format(len(message_embedding)))\n",
+        "  message_embedding_snippet = \", \".join(\n",
+        "      (str(x) for x in message_embedding[:3]))\n",
+        "  print(\"Embedding: [{}, ...]\\n\".format(message_embedding_snippet))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "BnvjATdy64eR"
+      },
+      "source": [
+        "# Semantic Textual Similarity Task Example\n",
+        "\n",
+        "The embeddings produced by the Universal Sentence Encoder are approximately normalized. The semantic similarity of two sentences can be trivially computed as the inner product of the encodings."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "h1FFCTKm7ba4"
+      },
+      "outputs": [],
+      "source": [
+        "def plot_similarity(labels, features, rotation):\n",
+        "  corr = np.inner(features, features)\n",
+        "  sns.set(font_scale=1.2)\n",
+        "  g = sns.heatmap(\n",
+        "      corr,\n",
+        "      xticklabels=labels,\n",
+        "      yticklabels=labels,\n",
+        "      vmin=0,\n",
+        "      vmax=1,\n",
+        "      cmap=\"YlOrRd\")\n",
+        "  g.set_xticklabels(labels, rotation=rotation)\n",
+        "  g.set_title(\"Semantic Textual Similarity\")\n",
+        "\n",
+        "def run_and_plot(messages_):\n",
+        "  message_embeddings_ = embed(messages_)\n",
+        "  plot_similarity(messages_, message_embeddings_, 90)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "339tuJ5Pwqqv"
+      },
+      "source": [
+        "## Similarity Visualized\n",
+        "Here we show the similarity in a heat map. The final graph is a 9x9 matrix where each entry `[i, j]` is colored based on the inner product of the encodings for sentence `i` and `j`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "cPMCaxrZwp7t"
+      },
+      "outputs": [],
+      "source": [
+        "messages = [\n",
+        "    # Smartphones\n",
+        "    \"I like my phone\",\n",
+        "    \"My phone is not good.\",\n",
+        "    \"Your cellphone looks great.\",\n",
+        "\n",
+        "    # Weather\n",
+        "    \"Will it snow tomorrow?\",\n",
+        "    \"Recently a lot of hurricanes have hit the US\",\n",
+        "    \"Global warming is real\",\n",
+        "\n",
+        "    # Food and health\n",
+        "    \"An apple a day, keeps the doctors away\",\n",
+        "    \"Eating strawberries is healthy\",\n",
+        "    \"Is paleo better than keto?\",\n",
+        "\n",
+        "    # Asking about age\n",
+        "    \"How old are you?\",\n",
+        "    \"what is your age?\",\n",
+        "]\n",
+        "\n",
+        "run_and_plot(messages)\n",
+        "               "
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6FjdeCqPJeg-"
+      },
+      "source": [
+        "## Evaluation: STS (Semantic Textual Similarity) Benchmark\n",
+        "\n",
+        "The [**STS Benchmark**](https://ixa2.si.ehu.eus/stswiki/stswiki.html#STS_benchmark) provides an intrinsic evaluation of the degree to which similarity scores computed using sentence embeddings align with human judgements. The benchmark requires systems to return similarity scores for a diverse selection of sentence pairs. [Pearson correlation](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient) is then used to evaluate the quality of the machine similarity scores against human judgements."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "q5nuBbI1iFQR"
+      },
+      "source": [
+        "### Download data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "VOs8ZfOnJeBF"
+      },
+      "outputs": [],
+      "source": [
+        "import pandas\n",
+        "import scipy\n",
+        "import math\n",
+        "import csv\n",
+        "\n",
+        "sts_dataset = tf.keras.utils.get_file(\n",
+        "    fname=\"Stsbenchmark.tar.gz\",\n",
+        "    origin=\"http://ixa2.si.ehu.es/stswiki/images/4/48/Stsbenchmark.tar.gz\",\n",
+        "    extract=True)\n",
+        "sts_dev = pandas.read_table(\n",
+        "    os.path.join(os.path.dirname(sts_dataset), \"stsbenchmark\", \"sts-dev.csv\"),\n",
+        "    skip_blank_lines=True,\n",
+        "    usecols=[4, 5, 6],\n",
+        "    names=[\"sim\", \"sent_1\", \"sent_2\"])\n",
+        "sts_test = pandas.read_table(\n",
+        "    os.path.join(\n",
+        "        os.path.dirname(sts_dataset), \"stsbenchmark\", \"sts-test.csv\"),\n",
+        "    quoting=csv.QUOTE_NONE,\n",
+        "    skip_blank_lines=True,\n",
+        "    usecols=[4, 5, 6],\n",
+        "    names=[\"sim\", \"sent_1\", \"sent_2\"])\n",
+        "# cleanup some NaN values in sts_dev\n",
+        "sts_dev = sts_dev[[isinstance(s, str) for s in sts_dev['sent_2']]]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8OKy8WhnKRe_"
+      },
+      "source": [
+        "### Evaluate Sentence Embeddings"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "W-q2r7jyZGb7"
+      },
+      "outputs": [],
+      "source": [
+        "sts_data = sts_dev #@param [\"sts_dev\", \"sts_test\"] {type:\"raw\"}\n",
+        "\n",
+        "def run_sts_benchmark(batch):\n",
+        "  sts_encode1 = tf.nn.l2_normalize(embed(tf.constant(batch['sent_1'].tolist())), axis=1)\n",
+        "  sts_encode2 = tf.nn.l2_normalize(embed(tf.constant(batch['sent_2'].tolist())), axis=1)\n",
+        "  cosine_similarities = tf.reduce_sum(tf.multiply(sts_encode1, sts_encode2), axis=1)\n",
+        "  clip_cosine_similarities = tf.clip_by_value(cosine_similarities, -1.0, 1.0)\n",
+        "  scores = 1.0 - tf.acos(clip_cosine_similarities) / math.pi\n",
+        "  \"\"\"Returns the similarity scores\"\"\"\n",
+        "  return scores\n",
+        "\n",
+        "dev_scores = sts_data['sim'].tolist()\n",
+        "scores = []\n",
+        "for batch in np.array_split(sts_data, 10):\n",
+        "  scores.extend(run_sts_benchmark(batch))\n",
+        "\n",
+        "pearson_correlation = scipy.stats.pearsonr(scores, dev_scores)\n",
+        "print('Pearson correlation coefficient = {0}\\np-value = {1}'.format(\n",
+        "    pearson_correlation[0], pearson_correlation[1]))"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [
+        "RUymE2l9GZfO"
+      ],
+      "name": "semantic_similarity_with_tf_hub_universal_encoder.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder_lite.ipynb b/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder_lite.ipynb
new file mode 100644
index 00000000000..78d4eebadb0
--- /dev/null
+++ b/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder_lite.ipynb
@@ -0,0 +1,537 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "IJhWonqQN7u0"
+      },
+      "source": [
+        "##### Copyright 2018 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "MegtYH2UN8tT"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MlHqSdgSEwPE"
+      },
+      "source": [
+        "# Universal Sentence Encoder-Lite demo\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfBg1C5NB3X0"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder_lite\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder_lite.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder_lite.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder_lite.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/google/universal-sentence-encoder-lite/2\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub model</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "j0HuiScHQ3OK"
+      },
+      "source": [
+        "This Colab illustrates how to use the Universal Sentence Encoder-Lite for sentence similarity task. This module is very similar to [Universal Sentence Encoder](https://www.tensorflow.org/hub/modules/google/universal-sentence-encoder/2) with the only difference that you need to run [SentencePiece](https://github.com/google/sentencepiece) processing on your input sentences.\n",
+        "\n",
+        "The Universal Sentence Encoder makes getting sentence level embeddings as easy as it has historically been to lookup the embeddings for individual words. The sentence embeddings can then be trivially used to compute sentence level meaning similarity as well as to enable better performance on downstream classification tasks using less supervised training data."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wqCB2pyK-WSU"
+      },
+      "source": [
+        "# Getting started"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rWeEjoO5M0Cx"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "f5_potQBMzcU"
+      },
+      "outputs": [],
+      "source": [
+        "# Install seaborn for pretty visualizations\n",
+        "!pip3 install --quiet seaborn\n",
+        "# Install SentencePiece package\n",
+        "# SentencePiece package is needed for Universal Sentence Encoder Lite. We'll\n",
+        "# use it for all the text processing and sentence feature ID lookup.\n",
+        "!pip3 install --quiet sentencepiece"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dMTa6V4a-cmf"
+      },
+      "outputs": [],
+      "source": [
+        "from absl import logging\n",
+        "\n",
+        "import tensorflow.compat.v1 as tf\n",
+        "tf.disable_v2_behavior()\n",
+        "\n",
+        "import tensorflow_hub as hub\n",
+        "import sentencepiece as spm\n",
+        "import matplotlib.pyplot as plt\n",
+        "import numpy as np\n",
+        "import os\n",
+        "import pandas as pd\n",
+        "import re\n",
+        "import seaborn as sns"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WPXYQDBiFJHd"
+      },
+      "source": [
+        "## Load the module from TF-Hub"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "HEWUT-lmAkxM"
+      },
+      "outputs": [],
+      "source": [
+        "module = hub.Module(\"https://tfhub.dev/google/universal-sentence-encoder-lite/2\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "5277Z-9qARYF"
+      },
+      "outputs": [],
+      "source": [
+        "input_placeholder = tf.sparse_placeholder(tf.int64, shape=[None, None])\n",
+        "encodings = module(\n",
+        "    inputs=dict(\n",
+        "        values=input_placeholder.values,\n",
+        "        indices=input_placeholder.indices,\n",
+        "        dense_shape=input_placeholder.dense_shape))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Yydbhuba_nek"
+      },
+      "source": [
+        "## Load SentencePiece model from the TF-Hub Module\n",
+        "The SentencePiece model is conveniently stored inside the module's assets. It has to be loaded in order to initialize the processor."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2CyUjKzE_tcJ"
+      },
+      "outputs": [],
+      "source": [
+        "with tf.Session() as sess:\n",
+        "  spm_path = sess.run(module(signature=\"spm_path\"))\n",
+        "\n",
+        "sp = spm.SentencePieceProcessor()\n",
+        "with tf.io.gfile.GFile(spm_path, mode=\"rb\") as f:\n",
+        "  sp.LoadFromSerializedProto(f.read())\n",
+        "print(\"SentencePiece model loaded at {}.\".format(spm_path))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "6y5kkN-l-5QV"
+      },
+      "outputs": [],
+      "source": [
+        "def process_to_IDs_in_sparse_format(sp, sentences):\n",
+        "  # An utility method that processes sentences with the sentence piece processor\n",
+        "  # 'sp' and returns the results in tf.SparseTensor-similar format:\n",
+        "  # (values, indices, dense_shape)\n",
+        "  ids = [sp.EncodeAsIds(x) for x in sentences]\n",
+        "  max_len = max(len(x) for x in ids)\n",
+        "  dense_shape=(len(ids), max_len)\n",
+        "  values=[item for sublist in ids for item in sublist]\n",
+        "  indices=[[row,col] for row in range(len(ids)) for col in range(len(ids[row]))]\n",
+        "  return (values, indices, dense_shape)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PVpHEWrPAdxR"
+      },
+      "source": [
+        "### Test the module with a few examples"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "pSkjuGYoCBfU"
+      },
+      "outputs": [],
+      "source": [
+        "# Compute a representation for each message, showing various lengths supported.\n",
+        "word = \"Elephant\"\n",
+        "sentence = \"I am a sentence for which I would like to get its embedding.\"\n",
+        "paragraph = (\n",
+        "    \"Universal Sentence Encoder embeddings also support short paragraphs. \"\n",
+        "    \"There is no hard limit on how long the paragraph is. Roughly, the longer \"\n",
+        "    \"the more 'diluted' the embedding will be.\")\n",
+        "messages = [word, sentence, paragraph]\n",
+        "\n",
+        "values, indices, dense_shape = process_to_IDs_in_sparse_format(sp, messages)\n",
+        "\n",
+        "# Reduce logging output.\n",
+        "logging.set_verbosity(logging.ERROR)\n",
+        "\n",
+        "with tf.Session() as session:\n",
+        "  session.run([tf.global_variables_initializer(), tf.tables_initializer()])\n",
+        "  message_embeddings = session.run(\n",
+        "      encodings,\n",
+        "      feed_dict={input_placeholder.values: values,\n",
+        "                input_placeholder.indices: indices,\n",
+        "                input_placeholder.dense_shape: dense_shape})\n",
+        "\n",
+        "  for i, message_embedding in enumerate(np.array(message_embeddings).tolist()):\n",
+        "    print(\"Message: {}\".format(messages[i]))\n",
+        "    print(\"Embedding size: {}\".format(len(message_embedding)))\n",
+        "    message_embedding_snippet = \", \".join(\n",
+        "        (str(x) for x in message_embedding[:3]))\n",
+        "    print(\"Embedding: [{}, ...]\\n\".format(message_embedding_snippet))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "46jrIgHyFDz9"
+      },
+      "source": [
+        "# Semantic Textual Similarity (STS) task example\n",
+        "\n",
+        "The embeddings produced by the Universal Sentence Encoder are approximately normalized. The semantic similarity of two sentences can be trivially computed as the inner product of the encodings."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "OIQudHgWBGSk"
+      },
+      "outputs": [],
+      "source": [
+        "def plot_similarity(labels, features, rotation):\n",
+        "  corr = np.inner(features, features)\n",
+        "  sns.set(font_scale=1.2)\n",
+        "  g = sns.heatmap(\n",
+        "      corr,\n",
+        "      xticklabels=labels,\n",
+        "      yticklabels=labels,\n",
+        "      vmin=0,\n",
+        "      vmax=1,\n",
+        "      cmap=\"YlOrRd\")\n",
+        "  g.set_xticklabels(labels, rotation=rotation)\n",
+        "  g.set_title(\"Semantic Textual Similarity\")\n",
+        "\n",
+        "\n",
+        "def run_and_plot(session, input_placeholder, messages):\n",
+        "  values, indices, dense_shape = process_to_IDs_in_sparse_format(sp,messages)\n",
+        "\n",
+        "  message_embeddings = session.run(\n",
+        "      encodings,\n",
+        "      feed_dict={input_placeholder.values: values,\n",
+        "                input_placeholder.indices: indices,\n",
+        "                input_placeholder.dense_shape: dense_shape})\n",
+        "  \n",
+        "  plot_similarity(messages, message_embeddings, 90)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wlDqttNcE0Bx"
+      },
+      "source": [
+        "## Similarity visualized\n",
+        "Here we show the similarity in a heat map. The final graph is a 9x9 matrix where each entry `[i, j]` is colored based on the inner product of the encodings for sentence `i` and `j`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "_GSCW5QIBKVe"
+      },
+      "outputs": [],
+      "source": [
+        "messages = [\n",
+        "    # Smartphones\n",
+        "    \"I like my phone\",\n",
+        "    \"My phone is not good.\",\n",
+        "    \"Your cellphone looks great.\",\n",
+        "\n",
+        "    # Weather\n",
+        "    \"Will it snow tomorrow?\",\n",
+        "    \"Recently a lot of hurricanes have hit the US\",\n",
+        "    \"Global warming is real\",\n",
+        "\n",
+        "    # Food and health\n",
+        "    \"An apple a day, keeps the doctors away\",\n",
+        "    \"Eating strawberries is healthy\",\n",
+        "    \"Is paleo better than keto?\",\n",
+        "\n",
+        "    # Asking about age\n",
+        "    \"How old are you?\",\n",
+        "    \"what is your age?\",\n",
+        "]\n",
+        "\n",
+        "\n",
+        "with tf.Session() as session:\n",
+        "  session.run(tf.global_variables_initializer())\n",
+        "  session.run(tf.tables_initializer())\n",
+        "  run_and_plot(session, input_placeholder, messages)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "QkZ4sRBYBnL8"
+      },
+      "source": [
+        "## Evaluation: STS (Semantic Textual Similarity) Benchmark\n",
+        "\n",
+        "The [**STS Benchmark**](https://ixa2.si.ehu.es/stswiki/index.php/STSbenchmark) provides an intristic evaluation of the degree to which similarity scores computed using sentence embeddings align with human judgements. The benchmark requires systems to return similarity scores for a diverse selection of sentence pairs. [Pearson correlation](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient) is then used to evaluate the quality of the machine similarity scores against human judgements."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kNMVfSelBsHW"
+      },
+      "source": [
+        "### Download data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "8zAWVzBMBptq"
+      },
+      "outputs": [],
+      "source": [
+        "import pandas\n",
+        "import scipy\n",
+        "import math\n",
+        "\n",
+        "\n",
+        "def load_sts_dataset(filename):\n",
+        "  # Loads a subset of the STS dataset into a DataFrame. In particular both\n",
+        "  # sentences and their human rated similarity score.\n",
+        "  sent_pairs = []\n",
+        "  with tf.gfile.GFile(filename, \"r\") as f:\n",
+        "    for line in f:\n",
+        "      ts = line.strip().split(\"\\t\")\n",
+        "      # (sent_1, sent_2, similarity_score)\n",
+        "      sent_pairs.append((ts[5], ts[6], float(ts[4])))\n",
+        "  return pandas.DataFrame(sent_pairs, columns=[\"sent_1\", \"sent_2\", \"sim\"])\n",
+        "\n",
+        "\n",
+        "def download_and_load_sts_data():\n",
+        "  sts_dataset = tf.keras.utils.get_file(\n",
+        "      fname=\"Stsbenchmark.tar.gz\",\n",
+        "      origin=\"http://ixa2.si.ehu.es/stswiki/images/4/48/Stsbenchmark.tar.gz\",\n",
+        "      extract=True)\n",
+        "\n",
+        "  sts_dev = load_sts_dataset(\n",
+        "      os.path.join(os.path.dirname(sts_dataset), \"stsbenchmark\", \"sts-dev.csv\"))\n",
+        "  sts_test = load_sts_dataset(\n",
+        "      os.path.join(\n",
+        "          os.path.dirname(sts_dataset), \"stsbenchmark\", \"sts-test.csv\"))\n",
+        "\n",
+        "  return sts_dev, sts_test\n",
+        "\n",
+        "\n",
+        "sts_dev, sts_test = download_and_load_sts_data()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "l8lEawD6B4Fr"
+      },
+      "source": [
+        "### Build evaluation graph"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "etiZUkP-B6bR"
+      },
+      "outputs": [],
+      "source": [
+        "sts_input1 = tf.sparse_placeholder(tf.int64, shape=(None, None))\n",
+        "sts_input2 = tf.sparse_placeholder(tf.int64, shape=(None, None))\n",
+        "\n",
+        "# For evaluation we use exactly normalized rather than\n",
+        "# approximately normalized.\n",
+        "sts_encode1 = tf.nn.l2_normalize(\n",
+        "    module(\n",
+        "        inputs=dict(values=sts_input1.values,\n",
+        "                    indices=sts_input1.indices,\n",
+        "                    dense_shape=sts_input1.dense_shape)),\n",
+        "    axis=1)\n",
+        "sts_encode2 = tf.nn.l2_normalize(\n",
+        "    module(\n",
+        "        inputs=dict(values=sts_input2.values,\n",
+        "                    indices=sts_input2.indices,\n",
+        "                    dense_shape=sts_input2.dense_shape)),\n",
+        "    axis=1)\n",
+        "\n",
+        "sim_scores = -tf.acos(tf.reduce_sum(tf.multiply(sts_encode1, sts_encode2), axis=1))\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "e4Q34ssLB-rw"
+      },
+      "source": [
+        "### Evaluate sentence embeddings"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "-vRFEFPJPyeF"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Choose dataset for benchmark\n",
+        "dataset = sts_dev #@param [\"sts_dev\", \"sts_test\"] {type:\"raw\"}\n",
+        "\n",
+        "values1, indices1, dense_shape1 = process_to_IDs_in_sparse_format(sp, dataset['sent_1'].tolist())\n",
+        "values2, indices2, dense_shape2 = process_to_IDs_in_sparse_format(sp, dataset['sent_2'].tolist())\n",
+        "similarity_scores = dataset['sim'].tolist()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "_QJ2DI85CBDh"
+      },
+      "outputs": [],
+      "source": [
+        "def run_sts_benchmark(session):\n",
+        "  \"\"\"Returns the similarity scores\"\"\"\n",
+        "  scores = session.run(\n",
+        "      sim_scores,\n",
+        "      feed_dict={\n",
+        "          sts_input1.values: values1,\n",
+        "          sts_input1.indices:  indices1,\n",
+        "          sts_input1.dense_shape:  dense_shape1,\n",
+        "          sts_input2.values:  values2,\n",
+        "          sts_input2.indices:  indices2,\n",
+        "          sts_input2.dense_shape:  dense_shape2,\n",
+        "      })\n",
+        "  return scores\n",
+        "\n",
+        "\n",
+        "with tf.Session() as session:\n",
+        "  session.run(tf.global_variables_initializer())\n",
+        "  session.run(tf.tables_initializer())\n",
+        "  scores = run_sts_benchmark(session)\n",
+        "\n",
+        "pearson_correlation = scipy.stats.pearsonr(scores, similarity_scores)\n",
+        "print('Pearson correlation coefficient = {0}\\np-value = {1}'.format(\n",
+        "    pearson_correlation[0], pearson_correlation[1]))"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [
+        "IJhWonqQN7u0"
+      ],
+      "name": "semantic_similarity_with_tf_hub_universal_encoder_lite.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/senteval_for_universal_sentence_encoder_cmlm.ipynb b/site/en/hub/tutorials/senteval_for_universal_sentence_encoder_cmlm.ipynb
new file mode 100644
index 00000000000..c33dce64c92
--- /dev/null
+++ b/site/en/hub/tutorials/senteval_for_universal_sentence_encoder_cmlm.ipynb
@@ -0,0 +1,248 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "CGyzr0tfeUTQ"
+      },
+      "source": [
+        "**Copyright 2021 The TensorFlow Hub Authors.**\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "zV1OQAGReaGQ"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2021 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "L5bsDhkRfTpq"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/senteval_for_universal_sentence_encoder_cmlm\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/senteval_for_universal_sentence_encoder_cmlm.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/senteval_for_universal_sentence_encoder_cmlm.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/senteval_for_universal_sentence_encoder_cmlm.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/google/universal-sentence-encoder-cmlm/en-base/1\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub model</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "owWqOcw1e-RZ"
+      },
+      "source": [
+        "# Universal Sentence Encoder SentEval demo\n",
+        "This colab demostrates the [Universal Sentence Encoder CMLM model](https://tfhub.dev/google/universal-sentence-encoder-cmlm/en-base/1) using the [SentEval](https://github.com/facebookresearch/SentEval) toolkit, which is a library for measuring the quality of sentence embeddings. The SentEval toolkit includes a diverse set of downstream tasks that are able to evaluate the generalization power of an embedding model and to evaluate the linguistic properties encoded.\n",
+        "\n",
+        "Run the first two code blocks to setup the environment, in the third code block you can pick a SentEval task to evaluate the model. A GPU runtime is recommended to run this Colab.\n",
+        "\n",
+        "To learn more about the Universal Sentence Encoder CMLM model, see https://openreview.net/forum?id=WDVD4lUCTzU."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "-CerULCLsjzV"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Install dependencies\n",
+        "!pip install --quiet \"tensorflow-text==2.11.*\"\n",
+        "!pip install --quiet torch==1.8.1"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "LjqkqD6aiZGU"
+      },
+      "source": [
+        "## Download SentEval and task data\n",
+        "This step download SentEval from github and execute the data script to download the task data. It may take up to 5 minutes to complete."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "3UwhHQiKJmSc"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Install SentEval and download task data\n",
+        "!rm -rf ./SentEval\n",
+        "!git clone https://github.com/facebookresearch/SentEval.git\n",
+        "!cd $PWD/SentEval/data/downstream && bash get_transfer_data.bash > /dev/null 2>&1"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "7a2ohPn8vMe2"
+      },
+      "source": [
+        "#Execute a SentEval evaluation task\n",
+        "The following code block executes a SentEval task and output the results, choose one of the following tasks to evaluate the USE CMLM model:\n",
+        "\n",
+        "```\n",
+        "MR\tCR\tSUBJ\tMPQA\tSST\tTREC\tMRPC\tSICK-E\n",
+        "```\n",
+        "\n",
+        "Select a model, params and task to run. The rapid prototyping params can be used for reducing computation time for faster result.\n",
+        "\n",
+        "It typically takes 5-15 mins to complete a task with the **'rapid prototyping'** params and up to an hour with the **'slower, best performance'** params.\n",
+        "\n",
+        "```\n",
+        "params = {'task_path': PATH_TO_DATA, 'usepytorch': True, 'kfold': 5}\n",
+        "params['classifier'] = {'nhid': 0, 'optim': 'rmsprop', 'batch_size': 128,\n",
+        "                                 'tenacity': 3, 'epoch_size': 2}\n",
+        "```\n",
+        "\n",
+        "For better result, use the slower **'slower, best performance'** params, computation may take up to 1 hour:\n",
+        "\n",
+        "```\n",
+        "params = {'task_path': PATH_TO_DATA, 'usepytorch': True, 'kfold': 10}\n",
+        "params['classifier'] = {'nhid': 0, 'optim': 'adam', 'batch_size': 16,\n",
+        "                                 'tenacity': 5, 'epoch_size': 6}\n",
+        "```\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "nenCcawjwowt"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'\n",
+        "\n",
+        "import sys\n",
+        "sys.path.append(f'{os.getcwd()}/SentEval')\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "\n",
+        "# Prevent TF from claiming all GPU memory so there is some left for pytorch.\n",
+        "gpus = tf.config.list_physical_devices('GPU')\n",
+        "if gpus:\n",
+        "  # Memory growth needs to be the same across GPUs.\n",
+        "  for gpu in gpus:\n",
+        "    tf.config.experimental.set_memory_growth(gpu, True)\n",
+        "\n",
+        "import tensorflow_hub as hub\n",
+        "import tensorflow_text\n",
+        "import senteval\n",
+        "import time\n",
+        "\n",
+        "PATH_TO_DATA = f'{os.getcwd()}/SentEval/data'\n",
+        "MODEL = 'https://tfhub.dev/google/universal-sentence-encoder-cmlm/en-base/1' #@param ['https://tfhub.dev/google/universal-sentence-encoder-cmlm/en-base/1', 'https://tfhub.dev/google/universal-sentence-encoder-cmlm/en-large/1']\n",
+        "PARAMS = 'rapid prototyping' #@param ['slower, best performance', 'rapid prototyping']\n",
+        "TASK = 'CR' #@param ['CR','MR', 'MPQA', 'MRPC', 'SICKEntailment', 'SNLI', 'SST2', 'SUBJ', 'TREC']\n",
+        "\n",
+        "params_prototyping = {'task_path': PATH_TO_DATA, 'usepytorch': True, 'kfold': 5}\n",
+        "params_prototyping['classifier'] = {'nhid': 0, 'optim': 'rmsprop', 'batch_size': 128,\n",
+        "                                 'tenacity': 3, 'epoch_size': 2}\n",
+        "\n",
+        "params_best = {'task_path': PATH_TO_DATA, 'usepytorch': True, 'kfold': 10}\n",
+        "params_best['classifier'] = {'nhid': 0, 'optim': 'adam', 'batch_size': 16,\n",
+        "                                 'tenacity': 5, 'epoch_size': 6}\n",
+        "\n",
+        "params = params_best if PARAMS == 'slower, best performance' else params_prototyping\n",
+        "\n",
+        "preprocessor = hub.KerasLayer(\n",
+        "    \"https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3\")\n",
+        "encoder = hub.KerasLayer(\n",
+        "    \"https://tfhub.dev/google/universal-sentence-encoder-cmlm/en-base/1\")\n",
+        "\n",
+        "inputs = tf.keras.Input(shape=tf.shape(''), dtype=tf.string)\n",
+        "outputs = encoder(preprocessor(inputs))\n",
+        "\n",
+        "model = tf.keras.Model(inputs=inputs, outputs=outputs)\n",
+        "\n",
+        "def prepare(params, samples):\n",
+        "    return\n",
+        "\n",
+        "def batcher(_, batch):\n",
+        "    batch = [' '.join(sent) if sent else '.' for sent in batch]\n",
+        "    return model.predict(tf.constant(batch))[\"default\"]\n",
+        "\n",
+        "\n",
+        "se = senteval.engine.SE(params, batcher, prepare)\n",
+        "print(\"Evaluating task %s with %s parameters\" % (TASK, PARAMS))\n",
+        "start = time.time()\n",
+        "results = se.eval(TASK)\n",
+        "end = time.time()\n",
+        "print('Time took on task %s : %.1f. seconds' % (TASK, end - start))\n",
+        "print(results)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "SNvsY6Hsvs0_"
+      },
+      "source": [
+        "#Learn More\n",
+        "\n",
+        "*   Find more text embedding models on [TensorFlow Hub](https://tfhub.dev)\n",
+        "*   See also the [Multilingual Universal Sentence Encoder CMLM model](https://tfhub.dev/google/universal-sentence-encoder-cmlm/multilingual-base-br/1)\n",
+        "*   Check out other [Universal Sentence Encoder models](https://tfhub.dev/google/collections/universal-sentence-encoder/1)\n",
+        "\n",
+        "## Reference\n",
+        "\n",
+        "*   Ziyi Yang, Yinfei Yang, Daniel Cer, Jax Law, Eric Darve. [Universal Sentence Representations Learning with Conditional Masked Language Model. November 2020](https://openreview.net/forum?id=WDVD4lUCTzU)\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [],
+      "name": "senteval_for_universal_sentence_encoder_cmlm.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/spice.ipynb b/site/en/hub/tutorials/spice.ipynb
new file mode 100644
index 00000000000..9ff6cd3bd62
--- /dev/null
+++ b/site/en/hub/tutorials/spice.ipynb
@@ -0,0 +1,937 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "aXehiGc3Kr2I"
+      },
+      "source": [
+        "##### Copyright 2020 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "-6LKjmi8Ktoh"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Copyright 2020 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfBg1C5NB3X0"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/spice\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/spice.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/spice.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/spice.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/google/spice/2\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub model</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "sPQKw4x4bL8w"
+      },
+      "source": [
+        "# Pitch Detection with SPICE\n",
+        "\n",
+        "This colab will show you how to use the SPICE model downloaded from TensorFlow Hub."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "rfKwZlPnPwD1"
+      },
+      "outputs": [],
+      "source": [
+        "!sudo apt-get install -q -y timidity libsndfile1"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dYrIdOS8SW3b"
+      },
+      "outputs": [],
+      "source": [
+        "# All the imports to deal with sound data\n",
+        "!pip install pydub librosa music21"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "p09o78LGYdnz"
+      },
+      "outputs": [],
+      "source": [
+        "import tensorflow as tf\n",
+        "import tensorflow_hub as hub\n",
+        "\n",
+        "import numpy as np\n",
+        "import matplotlib.pyplot as plt\n",
+        "import librosa\n",
+        "from librosa import display as librosadisplay\n",
+        "\n",
+        "import logging\n",
+        "import math\n",
+        "import statistics\n",
+        "import sys\n",
+        "\n",
+        "from IPython.display import Audio, Javascript\n",
+        "from scipy.io import wavfile\n",
+        "\n",
+        "from base64 import b64decode\n",
+        "\n",
+        "import music21\n",
+        "from pydub import AudioSegment\n",
+        "\n",
+        "logger = logging.getLogger()\n",
+        "logger.setLevel(logging.ERROR)\n",
+        "\n",
+        "print(\"tensorflow: %s\" % tf.__version__)\n",
+        "#print(\"librosa: %s\" % librosa.__version__)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wHxox8hXc3w1"
+      },
+      "source": [
+        "# The audio input file\n",
+        "Now the hardest part: Record your singing! :)\n",
+        "\n",
+        "We provide four methods to obtain an audio file:\n",
+        "\n",
+        "1.   Record audio directly in colab\n",
+        "2.   Upload from your computer\n",
+        "3.   Use a file saved on Google Drive\n",
+        "4.   Download the file from the web\n",
+        "\n",
+        "Choose one of the four methods below."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "HaCAHOqiVu5B"
+      },
+      "outputs": [],
+      "source": [
+        "#@title [Run this] Definition of the JS code to record audio straight from the browser\n",
+        "\n",
+        "RECORD = \"\"\"\n",
+        "const sleep  = time => new Promise(resolve => setTimeout(resolve, time))\n",
+        "const b2text = blob => new Promise(resolve => {\n",
+        "  const reader = new FileReader()\n",
+        "  reader.onloadend = e => resolve(e.srcElement.result)\n",
+        "  reader.readAsDataURL(blob)\n",
+        "})\n",
+        "var record = time => new Promise(async resolve => {\n",
+        "  stream = await navigator.mediaDevices.getUserMedia({ audio: true })\n",
+        "  recorder = new MediaRecorder(stream)\n",
+        "  chunks = []\n",
+        "  recorder.ondataavailable = e => chunks.push(e.data)\n",
+        "  recorder.start()\n",
+        "  await sleep(time)\n",
+        "  recorder.onstop = async ()=>{\n",
+        "    blob = new Blob(chunks)\n",
+        "    text = await b2text(blob)\n",
+        "    resolve(text)\n",
+        "  }\n",
+        "  recorder.stop()\n",
+        "})\n",
+        "\"\"\"\n",
+        "\n",
+        "def record(sec=5):\n",
+        "  try:\n",
+        "    from google.colab import output\n",
+        "  except ImportError:\n",
+        "    print('No possible to import output from google.colab')\n",
+        "    return ''\n",
+        "  else:\n",
+        "    print('Recording')\n",
+        "    display(Javascript(RECORD))\n",
+        "    s = output.eval_js('record(%d)' % (sec*1000))\n",
+        "    fname = 'recorded_audio.wav'\n",
+        "    print('Saving to', fname)\n",
+        "    b = b64decode(s.split(',')[1])\n",
+        "    with open(fname, 'wb') as f:\n",
+        "      f.write(b)\n",
+        "    return fname"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "both",
+        "id": "sBpWWkTzfUYR"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Select how to input your audio  { run: \"auto\" }\n",
+        "INPUT_SOURCE = 'https://storage.googleapis.com/download.tensorflow.org/data/c-scale-metronome.wav' #@param [\"https://storage.googleapis.com/download.tensorflow.org/data/c-scale-metronome.wav\", \"RECORD\", \"UPLOAD\", \"./drive/My Drive/YOUR_MUSIC_FILE.wav\"] {allow-input: true}\n",
+        "\n",
+        "print('You selected', INPUT_SOURCE)\n",
+        "\n",
+        "if INPUT_SOURCE == 'RECORD':\n",
+        "  uploaded_file_name = record(5)\n",
+        "elif INPUT_SOURCE == 'UPLOAD':\n",
+        "  try:\n",
+        "    from google.colab import files\n",
+        "  except ImportError:\n",
+        "    print(\"ImportError: files from google.colab seems to not be available\")\n",
+        "  else:\n",
+        "    uploaded = files.upload()\n",
+        "    for fn in uploaded.keys():\n",
+        "      print('User uploaded file \"{name}\" with length {length} bytes'.format(\n",
+        "          name=fn, length=len(uploaded[fn])))\n",
+        "    uploaded_file_name = next(iter(uploaded))\n",
+        "    print('Uploaded file: ' + uploaded_file_name)\n",
+        "elif INPUT_SOURCE.startswith('./drive/'):\n",
+        "  try:\n",
+        "    from google.colab import drive\n",
+        "  except ImportError:\n",
+        "    print(\"ImportError: files from google.colab seems to not be available\")\n",
+        "  else:\n",
+        "    drive.mount('/content/drive')\n",
+        "    # don't forget to change the name of the file you\n",
+        "    # will you here!\n",
+        "    gdrive_audio_file = 'YOUR_MUSIC_FILE.wav'\n",
+        "    uploaded_file_name = INPUT_SOURCE\n",
+        "elif INPUT_SOURCE.startswith('http'):\n",
+        "  !wget --no-check-certificate 'https://storage.googleapis.com/download.tensorflow.org/data/c-scale-metronome.wav' -O c-scale.wav\n",
+        "  uploaded_file_name = 'c-scale.wav'\n",
+        "else:\n",
+        "  print('Unrecognized input format!')\n",
+        "  print('Please select \"RECORD\", \"UPLOAD\", or specify a file hosted on Google Drive or a file from the web to download file to download')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4S2BvIoDf9nf"
+      },
+      "source": [
+        "# Preparing the audio data\n",
+        "\n",
+        "Now we have the audio, let's convert it to the expected format and then listen to it!\n",
+        "\n",
+        "The SPICE model needs as input an audio file at a sampling rate of 16kHz and with only one channel (mono). \n",
+        "\n",
+        "To help you with this part, we created a function (`convert_audio_for_model`) to convert any wav file you have to the model's expected format:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "bQ1362i-JoFI"
+      },
+      "outputs": [],
+      "source": [
+        "# Function that converts the user-created audio to the format that the model \n",
+        "# expects: bitrate 16kHz and only one channel (mono).\n",
+        "\n",
+        "EXPECTED_SAMPLE_RATE = 16000\n",
+        "\n",
+        "def convert_audio_for_model(user_file, output_file='converted_audio_file.wav'):\n",
+        "  audio = AudioSegment.from_file(user_file)\n",
+        "  audio = audio.set_frame_rate(EXPECTED_SAMPLE_RATE).set_channels(1)\n",
+        "  audio.export(output_file, format=\"wav\")\n",
+        "  return output_file"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "oL9pftZ2nPm9"
+      },
+      "outputs": [],
+      "source": [
+        "# Converting to the expected format for the model\n",
+        "# in all the input 4 input method before, the uploaded file name is at\n",
+        "# the variable uploaded_file_name\n",
+        "converted_audio_file = convert_audio_for_model(uploaded_file_name)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "TslkX2AOZN0p"
+      },
+      "outputs": [],
+      "source": [
+        "# Loading audio samples from the wav file:\n",
+        "sample_rate, audio_samples = wavfile.read(converted_audio_file, 'rb')\n",
+        "\n",
+        "# Show some basic information about the audio.\n",
+        "duration = len(audio_samples)/sample_rate\n",
+        "print(f'Sample rate: {sample_rate} Hz')\n",
+        "print(f'Total duration: {duration:.2f}s')\n",
+        "print(f'Size of the input: {len(audio_samples)}')\n",
+        "\n",
+        "# Let's listen to the wav file.\n",
+        "Audio(audio_samples, rate=sample_rate)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "iBicZu5AgcpR"
+      },
+      "source": [
+        "First thing, let's take a look at the waveform of our singing."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "aAa2M3CLZcWW"
+      },
+      "outputs": [],
+      "source": [
+        "# We can visualize the audio as a waveform.\n",
+        "_ = plt.plot(audio_samples)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "J1eI0b8qgn08"
+      },
+      "source": [
+        "A more informative visualization is the [spectrogram](https://en.wikipedia.org/wiki/Spectrogram), which shows frequencies present over time.\n",
+        "\n",
+        "Here, we use a logarithmic frequency scale, to make the singing more clearly visible.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "fGR4UZtpZvWI"
+      },
+      "outputs": [],
+      "source": [
+        "MAX_ABS_INT16 = 32768.0\n",
+        "\n",
+        "def plot_stft(x, sample_rate, show_black_and_white=False):\n",
+        "  x_stft = np.abs(librosa.stft(x, n_fft=2048))\n",
+        "  fig, ax = plt.subplots()\n",
+        "  fig.set_size_inches(20, 10)\n",
+        "  x_stft_db = librosa.amplitude_to_db(x_stft, ref=np.max)\n",
+        "  if(show_black_and_white):\n",
+        "    librosadisplay.specshow(data=x_stft_db, y_axis='log', \n",
+        "                             sr=sample_rate, cmap='gray_r')\n",
+        "  else:\n",
+        "    librosadisplay.specshow(data=x_stft_db, y_axis='log', sr=sample_rate)\n",
+        "\n",
+        "  plt.colorbar(format='%+2.0f dB')\n",
+        "\n",
+        "plot_stft(audio_samples / MAX_ABS_INT16 , sample_rate=EXPECTED_SAMPLE_RATE)\n",
+        "plt.show()\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MGCzo_cjjH-7"
+      },
+      "source": [
+        "We need one last conversion here. The audio samples are in int16 format. They need to be normalized to floats between -1 and 1."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dv4H4O1Xb8T8"
+      },
+      "outputs": [],
+      "source": [
+        "audio_samples = audio_samples / float(MAX_ABS_INT16)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yTdo_TwljVUV"
+      },
+      "source": [
+        "# Executing the Model\n",
+        "Now is the easy part, let's load the model with **TensorFlow Hub**, and feed the audio to it.\n",
+        "SPICE will give us two outputs: pitch and uncertainty\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "xUptYSTAbc3I"
+      },
+      "source": [
+        "**TensorFlow Hub** is a library for the publication, discovery, and consumption of reusable parts of machine learning models. It makes easy to use machine learning to solve your challenges.\n",
+        "\n",
+        "To load the model you just need the Hub module and the URL pointing to the model:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ri0A0DSXY_Yd"
+      },
+      "outputs": [],
+      "source": [
+        "# Loading the SPICE model is easy:\n",
+        "model = hub.load(\"https://tfhub.dev/google/spice/2\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kQV5H6J4suMT"
+      },
+      "source": [
+        "**Note:** An interesting detail here is that all the model urls from Hub can be used for download and also to read the documentation, so if you point your browser to that link you can read documentation on how to use the model and learn more about how it was trained."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GUVICjIps9hI"
+      },
+      "source": [
+        "With the model loaded, data prepared, we need 3 lines to get the result: "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "tP55fXBYcBhb"
+      },
+      "outputs": [],
+      "source": [
+        "# We now feed the audio to the SPICE tf.hub model to obtain pitch and uncertainty outputs as tensors.\n",
+        "model_output = model.signatures[\"serving_default\"](tf.constant(audio_samples, tf.float32))\n",
+        "\n",
+        "pitch_outputs = model_output[\"pitch\"]\n",
+        "uncertainty_outputs = model_output[\"uncertainty\"]\n",
+        "\n",
+        "# 'Uncertainty' basically means the inverse of confidence.\n",
+        "confidence_outputs = 1.0 - uncertainty_outputs\n",
+        "\n",
+        "fig, ax = plt.subplots()\n",
+        "fig.set_size_inches(20, 10)\n",
+        "plt.plot(pitch_outputs, label='pitch')\n",
+        "plt.plot(confidence_outputs, label='confidence')\n",
+        "plt.legend(loc=\"lower right\")\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "blJwFWR4kMul"
+      },
+      "source": [
+        "Let's make the results easier to understand by removing all pitch estimates with low confidence (confidence < 0.9) and plot the remaining ones.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "d1MRmcm2cEkM"
+      },
+      "outputs": [],
+      "source": [
+        "confidence_outputs = list(confidence_outputs)\n",
+        "pitch_outputs = [ float(x) for x in pitch_outputs]\n",
+        "\n",
+        "indices = range(len (pitch_outputs))\n",
+        "confident_pitch_outputs = [ (i,p)  \n",
+        "  for i, p, c in zip(indices, pitch_outputs, confidence_outputs) if  c >= 0.9  ]\n",
+        "confident_pitch_outputs_x, confident_pitch_outputs_y = zip(*confident_pitch_outputs)\n",
+        " \n",
+        "fig, ax = plt.subplots()\n",
+        "fig.set_size_inches(20, 10)\n",
+        "ax.set_ylim([0, 1])\n",
+        "plt.scatter(confident_pitch_outputs_x, confident_pitch_outputs_y, )\n",
+        "plt.scatter(confident_pitch_outputs_x, confident_pitch_outputs_y, c=\"r\")\n",
+        "\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vNBZ7ZblkxOm"
+      },
+      "source": [
+        "The pitch values returned by SPICE are in the range from 0 to 1. Let's convert them to absolute pitch values in Hz."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "n-CnpKzmcQi9"
+      },
+      "outputs": [],
+      "source": [
+        "def output2hz(pitch_output):\n",
+        "  # Constants taken from https://tfhub.dev/google/spice/2\n",
+        "  PT_OFFSET = 25.58\n",
+        "  PT_SLOPE = 63.07\n",
+        "  FMIN = 10.0;\n",
+        "  BINS_PER_OCTAVE = 12.0;\n",
+        "  cqt_bin = pitch_output * PT_SLOPE + PT_OFFSET;\n",
+        "  return FMIN * 2.0 ** (1.0 * cqt_bin / BINS_PER_OCTAVE)\n",
+        "    \n",
+        "confident_pitch_values_hz = [ output2hz(p) for p in confident_pitch_outputs_y ]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "24yK0a6HjCSZ"
+      },
+      "source": [
+        "Now, let's see how good the prediction is: We will overlay the predicted pitches over the original spectrogram. To make the pitch predictions more visible, we changed the spectrogram to black and white."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "L1kaAcX9rrDo"
+      },
+      "outputs": [],
+      "source": [
+        "plot_stft(audio_samples / MAX_ABS_INT16 , \n",
+        "          sample_rate=EXPECTED_SAMPLE_RATE, show_black_and_white=True)\n",
+        "# Note: conveniently, since the plot is in log scale, the pitch outputs \n",
+        "# also get converted to the log scale automatically by matplotlib.\n",
+        "plt.scatter(confident_pitch_outputs_x, confident_pitch_values_hz, c=\"r\")\n",
+        "\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "NskqpiHLxq6V"
+      },
+      "source": [
+        "# Converting to musical notes\n",
+        "\n",
+        "Now that we have the pitch values, let's convert them to notes!\n",
+        "This is part is challenging by itself. We have to take into account two things:\n",
+        "1. the rests (when there's no singing) \n",
+        "2. the size of each note (offsets) "
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KDOlm9PLTTjt"
+      },
+      "source": [
+        "### 1: Adding zeros to the output to indicate when there's no singing"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9uSQ3bJmTZmo"
+      },
+      "outputs": [],
+      "source": [
+        "pitch_outputs_and_rests = [\n",
+        "    output2hz(p) if c >= 0.9 else 0\n",
+        "    for i, p, c in zip(indices, pitch_outputs, confidence_outputs)\n",
+        "]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9fM0UwlsTt4w"
+      },
+      "source": [
+        "### 2: Adding note offsets\n",
+        "\n",
+        "When a person sings freely, the melody may have an offset to the absolute pitch values that notes can represent.\n",
+        "Hence, to convert predictions to notes, one needs to correct for this possible offset.\n",
+        "This is what the following code computes."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "fsJu-P5ksdFW"
+      },
+      "outputs": [],
+      "source": [
+        "A4 = 440\n",
+        "C0 = A4 * pow(2, -4.75)\n",
+        "note_names = [\"C\", \"C#\", \"D\", \"D#\", \"E\", \"F\", \"F#\", \"G\", \"G#\", \"A\", \"A#\", \"B\"]\n",
+        "\n",
+        "def hz2offset(freq):\n",
+        "  # This measures the quantization error for a single note.\n",
+        "  if freq == 0:  # Rests always have zero error.\n",
+        "    return None\n",
+        "  # Quantized note.\n",
+        "  h = round(12 * math.log2(freq / C0))\n",
+        "  return 12 * math.log2(freq / C0) - h\n",
+        "\n",
+        "\n",
+        "# The ideal offset is the mean quantization error for all the notes\n",
+        "# (excluding rests):\n",
+        "offsets = [hz2offset(p) for p in pitch_outputs_and_rests if p != 0]\n",
+        "print(\"offsets: \", offsets)\n",
+        "\n",
+        "ideal_offset = statistics.mean(offsets)\n",
+        "print(\"ideal offset: \", ideal_offset)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "K17It_qT2DtE"
+      },
+      "source": [
+        "We can now use some heuristics to try and estimate the most likely sequence of notes that were sung.\n",
+        "The ideal offset computed above is one ingredient - but we also need to know the speed (how many predictions make, say, an eighth?), and the time offset to start quantizing.  To keep it simple, we'll just try different speeds and time offsets and measure the quantization error, using in the end the values that minimize this error."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "eMULTI4L52ZHA"
+      },
+      "outputs": [],
+      "source": [
+        "def quantize_predictions(group, ideal_offset):\n",
+        "  # Group values are either 0, or a pitch in Hz.\n",
+        "  non_zero_values = [v for v in group if v != 0]\n",
+        "  zero_values_count = len(group) - len(non_zero_values)\n",
+        "\n",
+        "  # Create a rest if 80% is silent, otherwise create a note.\n",
+        "  if zero_values_count > 0.8 * len(group):\n",
+        "    # Interpret as a rest. Count each dropped note as an error, weighted a bit\n",
+        "    # worse than a badly sung note (which would 'cost' 0.5).\n",
+        "    return 0.51 * len(non_zero_values), \"Rest\"\n",
+        "  else:\n",
+        "    # Interpret as note, estimating as mean of non-rest predictions.\n",
+        "    h = round(\n",
+        "        statistics.mean([\n",
+        "            12 * math.log2(freq / C0) - ideal_offset for freq in non_zero_values\n",
+        "        ]))\n",
+        "    octave = h // 12\n",
+        "    n = h % 12\n",
+        "    note = note_names[n] + str(octave)\n",
+        "    # Quantization error is the total difference from the quantized note.\n",
+        "    error = sum([\n",
+        "        abs(12 * math.log2(freq / C0) - ideal_offset - h)\n",
+        "        for freq in non_zero_values\n",
+        "    ])\n",
+        "    return error, note\n",
+        "\n",
+        "\n",
+        "def get_quantization_and_error(pitch_outputs_and_rests, predictions_per_eighth,\n",
+        "                               prediction_start_offset, ideal_offset):\n",
+        "  # Apply the start offset - we can just add the offset as rests.\n",
+        "  pitch_outputs_and_rests = [0] * prediction_start_offset + \\\n",
+        "                            pitch_outputs_and_rests\n",
+        "  # Collect the predictions for each note (or rest).\n",
+        "  groups = [\n",
+        "      pitch_outputs_and_rests[i:i + predictions_per_eighth]\n",
+        "      for i in range(0, len(pitch_outputs_and_rests), predictions_per_eighth)\n",
+        "  ]\n",
+        "\n",
+        "  quantization_error = 0\n",
+        "\n",
+        "  notes_and_rests = []\n",
+        "  for group in groups:\n",
+        "    error, note_or_rest = quantize_predictions(group, ideal_offset)\n",
+        "    quantization_error += error\n",
+        "    notes_and_rests.append(note_or_rest)\n",
+        "\n",
+        "  return quantization_error, notes_and_rests\n",
+        "\n",
+        "\n",
+        "best_error = float(\"inf\")\n",
+        "best_notes_and_rests = None\n",
+        "best_predictions_per_note = None\n",
+        "\n",
+        "for predictions_per_note in range(20, 65, 1):\n",
+        "  for prediction_start_offset in range(predictions_per_note):\n",
+        "\n",
+        "    error, notes_and_rests = get_quantization_and_error(\n",
+        "        pitch_outputs_and_rests, predictions_per_note,\n",
+        "        prediction_start_offset, ideal_offset)\n",
+        "\n",
+        "    if error < best_error:      \n",
+        "      best_error = error\n",
+        "      best_notes_and_rests = notes_and_rests\n",
+        "      best_predictions_per_note = predictions_per_note\n",
+        "\n",
+        "# At this point, best_notes_and_rests contains the best quantization.\n",
+        "# Since we don't need to have rests at the beginning, let's remove these:\n",
+        "while best_notes_and_rests[0] == 'Rest':\n",
+        "  best_notes_and_rests = best_notes_and_rests[1:]\n",
+        "# Also remove silence at the end.\n",
+        "while best_notes_and_rests[-1] == 'Rest':\n",
+        "  best_notes_and_rests = best_notes_and_rests[:-1]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vMZbWA3aVqee"
+      },
+      "source": [
+        "Now let's write the quantized notes as sheet music score!\n",
+        "\n",
+        "To do it we will use two libraries: [music21](http://web.mit.edu/music21/) and [Open Sheet Music Display](https://github.com/opensheetmusicdisplay/opensheetmusicdisplay)\n",
+        "\n",
+        "**Note:** for simplicity, we assume here that all notes have the same duration (a half note)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "yVrk_IOIzpQR"
+      },
+      "outputs": [],
+      "source": [
+        "# Creating the sheet music score.\n",
+        "sc = music21.stream.Score()\n",
+        "# Adjust the speed to match the actual singing.\n",
+        "bpm = 60 * 60 / best_predictions_per_note\n",
+        "print ('bpm: ', bpm)\n",
+        "a = music21.tempo.MetronomeMark(number=bpm)\n",
+        "sc.insert(0,a)\n",
+        "\n",
+        "for snote in best_notes_and_rests:   \n",
+        "    d = 'half'\n",
+        "    if snote == 'Rest':      \n",
+        "      sc.append(music21.note.Rest(type=d))\n",
+        "    else:\n",
+        "      sc.append(music21.note.Note(snote, type=d))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "both",
+        "id": "CEleCWHtG2s4"
+      },
+      "outputs": [],
+      "source": [
+        "#@title [Run this] Helper function to use Open Sheet Music Display (JS code) to show a music score\n",
+        "\n",
+        "from IPython.core.display import display, HTML, Javascript\n",
+        "import json, random\n",
+        "\n",
+        "def showScore(score):\n",
+        "    xml = open(score.write('musicxml')).read()\n",
+        "    showMusicXML(xml)\n",
+        "    \n",
+        "def showMusicXML(xml):\n",
+        "    DIV_ID = \"OSMD_div\"\n",
+        "    display(HTML('<div id=\"'+DIV_ID+'\">loading OpenSheetMusicDisplay</div>'))\n",
+        "    script = \"\"\"\n",
+        "    var div_id = %%DIV_ID%%;\n",
+        "    function loadOSMD() { \n",
+        "        return new Promise(function(resolve, reject){\n",
+        "            if (window.opensheetmusicdisplay) {\n",
+        "                return resolve(window.opensheetmusicdisplay)\n",
+        "            }\n",
+        "            // OSMD script has a 'define' call which conflicts with requirejs\n",
+        "            var _define = window.define // save the define object \n",
+        "            window.define = undefined // now the loaded script will ignore requirejs\n",
+        "            var s = document.createElement( 'script' );\n",
+        "            s.setAttribute( 'src', \"https://cdn.jsdelivr.net/npm/opensheetmusicdisplay@0.7.6/build/opensheetmusicdisplay.min.js\" );\n",
+        "            //s.setAttribute( 'src', \"/custom/opensheetmusicdisplay.js\" );\n",
+        "            s.onload=function(){\n",
+        "                window.define = _define\n",
+        "                resolve(opensheetmusicdisplay);\n",
+        "            };\n",
+        "            document.body.appendChild( s ); // browser will try to load the new script tag\n",
+        "        }) \n",
+        "    }\n",
+        "    loadOSMD().then((OSMD)=>{\n",
+        "        window.openSheetMusicDisplay = new OSMD.OpenSheetMusicDisplay(div_id, {\n",
+        "          drawingParameters: \"compacttight\"\n",
+        "        });\n",
+        "        openSheetMusicDisplay\n",
+        "            .load(%%data%%)\n",
+        "            .then(\n",
+        "              function() {\n",
+        "                openSheetMusicDisplay.render();\n",
+        "              }\n",
+        "            );\n",
+        "    })\n",
+        "    \"\"\".replace('%%DIV_ID%%',DIV_ID).replace('%%data%%',json.dumps(xml))\n",
+        "    display(Javascript(script))\n",
+        "    return"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "WTu4phq4WeAI"
+      },
+      "outputs": [],
+      "source": [
+        "# rendering the music score\n",
+        "showScore(sc)\n",
+        "print(best_notes_and_rests)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fGPXm6Z83U2g"
+      },
+      "source": [
+        "Let's convert the music notes to a MIDI file and listen to it.\n",
+        "\n",
+        "To create this file, we can use the stream we created before."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "klYoWjgmPaod"
+      },
+      "outputs": [],
+      "source": [
+        "# Saving the recognized musical notes as a MIDI file\n",
+        "converted_audio_file_as_midi = converted_audio_file[:-4] + '.mid'\n",
+        "fp = sc.write('midi', fp=converted_audio_file_as_midi)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "tz7Mj3Qx1lpR"
+      },
+      "outputs": [],
+      "source": [
+        "wav_from_created_midi = converted_audio_file_as_midi.replace(' ', '_') + \"_midioutput.wav\"\n",
+        "print(wav_from_created_midi)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ahss5EOiWDDp"
+      },
+      "source": [
+        "To listen to it on colab, we need to convert it back to wav. An easy way of doing that is using Timidity."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "XmeJ-UITV2nq"
+      },
+      "outputs": [],
+      "source": [
+        "!timidity $converted_audio_file_as_midi -Ow -o $wav_from_created_midi"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bnvwmyNj7kCC"
+      },
+      "source": [
+        "And finally, listen the audio, created from notes, created via MIDI from the predicted pitches, inferred by the model!\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "qNLBB0zJV6vN"
+      },
+      "outputs": [],
+      "source": [
+        "Audio(wav_from_created_midi)"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "spice.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/text_classification_with_tf_hub_on_kaggle.ipynb b/site/en/hub/tutorials/text_classification_with_tf_hub_on_kaggle.ipynb
new file mode 100644
index 00000000000..e2985bda51e
--- /dev/null
+++ b/site/en/hub/tutorials/text_classification_with_tf_hub_on_kaggle.ipynb
@@ -0,0 +1,477 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "N6ZDpd9XzFeN"
+      },
+      "source": [
+        "##### Copyright 2018 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "KUu4vOt5zI9d"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ok9PfyoQ2rH_"
+      },
+      "source": [
+        "# How to solve a problem on Kaggle with TF-Hub\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfBg1C5NB3X0"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/text_classification_with_tf_hub_on_kaggle\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/text_classification_with_tf_hub_on_kaggle.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/text_classification_with_tf_hub_on_kaggle.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/text_classification_with_tf_hub_on_kaggle.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/google/nnlm-en-dim128/1\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub model</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "556YQZLUO4Ih"
+      },
+      "source": [
+        "TF-Hub is a platform to share machine learning expertise packaged in reusable resources, notably pre-trained **modules**. In this tutorial, we will use a TF-Hub text embedding module to train a simple sentiment classifier with a reasonable baseline accuracy. We will then submit the predictions to Kaggle.\n",
+        "\n",
+        "For more detailed tutorial on text classification with TF-Hub and further steps for improving the accuracy, take a look at [Text classification with TF-Hub](https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/text_classification_with_tf_hub.ipynb)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Q4DN769E2O_R"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9KyLct9rq0lo"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install -q kaggle"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "v7hy0bhngTUp"
+      },
+      "outputs": [],
+      "source": [
+        "import tensorflow as tf\n",
+        "import tensorflow_hub as hub\n",
+        "import matplotlib.pyplot as plt\n",
+        "import numpy as np\n",
+        "import pandas as pd\n",
+        "import seaborn as sns\n",
+        "import zipfile\n",
+        "\n",
+        "from sklearn import model_selection"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JvgBdeMsuu_3"
+      },
+      "source": [
+        "Since this tutorial will be using a dataset from Kaggle, it requires [creating an API Token](https://github.com/Kaggle/kaggle-api) for your Kaggle account, and uploading it to the Colab environment."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "nI7C-Zc4urOH"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "import pathlib\n",
+        "\n",
+        "# Upload the API token.\n",
+        "def get_kaggle():\n",
+        "  try:\n",
+        "    import kaggle\n",
+        "    return kaggle\n",
+        "  except OSError:\n",
+        "    pass\n",
+        "\n",
+        "  token_file = pathlib.Path(\"~/.kaggle/kaggle.json\").expanduser()\n",
+        "  token_file.parent.mkdir(exist_ok=True, parents=True)\n",
+        "\n",
+        "  try:\n",
+        "    from google.colab import files\n",
+        "  except ImportError:\n",
+        "    raise ValueError(\"Could not find kaggle token.\")\n",
+        "\n",
+        "  uploaded = files.upload()\n",
+        "  token_content = uploaded.get('kaggle.json', None)\n",
+        "  if token_content:\n",
+        "    token_file.write_bytes(token_content)\n",
+        "    token_file.chmod(0o600)\n",
+        "  else:\n",
+        "    raise ValueError('Need a file named \"kaggle.json\"')\n",
+        "  \n",
+        "  import kaggle\n",
+        "  return kaggle\n",
+        "\n",
+        "\n",
+        "kaggle = get_kaggle()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6OPyVxHuiTEE"
+      },
+      "source": [
+        "# Getting started\n",
+        "\n",
+        "## Data\n",
+        "We will try to solve the [Sentiment Analysis on Movie Reviews](https://www.kaggle.com/c/sentiment-analysis-on-movie-reviews/data) task from Kaggle. The dataset consists of syntactic subphrases of the Rotten Tomatoes movie reviews. The task is to label the phrases as **negative** or **positive** on the scale from 1 to 5.\n",
+        "\n",
+        "You must [accept the competition rules](https://www.kaggle.com/c/sentiment-analysis-on-movie-reviews/data) before you can use the API to download the data.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "both",
+        "id": "rKzc-fOGV72G"
+      },
+      "outputs": [],
+      "source": [
+        "SENTIMENT_LABELS = [\n",
+        "    \"negative\", \"somewhat negative\", \"neutral\", \"somewhat positive\", \"positive\"\n",
+        "]\n",
+        "\n",
+        "# Add a column with readable values representing the sentiment.\n",
+        "def add_readable_labels_column(df, sentiment_value_column):\n",
+        "  df[\"SentimentLabel\"] = df[sentiment_value_column].replace(\n",
+        "      range(5), SENTIMENT_LABELS)\n",
+        "    \n",
+        "# Download data from Kaggle and create a DataFrame.\n",
+        "def load_data_from_zip(path):\n",
+        "  with zipfile.ZipFile(path, \"r\") as zip_ref:\n",
+        "    name = zip_ref.namelist()[0]\n",
+        "    with zip_ref.open(name) as zf:\n",
+        "      return pd.read_csv(zf, sep=\"\\t\", index_col=0)\n",
+        "\n",
+        "\n",
+        "# The data does not come with a validation set so we'll create one from the\n",
+        "# training set.\n",
+        "def get_data(competition, train_file, test_file, validation_set_ratio=0.1):\n",
+        "  data_path = pathlib.Path(\"data\")\n",
+        "  kaggle.api.competition_download_files(competition, data_path)\n",
+        "  competition_path = (data_path/competition)\n",
+        "  competition_path.mkdir(exist_ok=True, parents=True)\n",
+        "  competition_zip_path = competition_path.with_suffix(\".zip\")\n",
+        "\n",
+        "  with zipfile.ZipFile(competition_zip_path, \"r\") as zip_ref:\n",
+        "    zip_ref.extractall(competition_path)\n",
+        "  \n",
+        "  train_df = load_data_from_zip(competition_path/train_file)\n",
+        "  test_df = load_data_from_zip(competition_path/test_file)\n",
+        "\n",
+        "  # Add a human readable label.\n",
+        "  add_readable_labels_column(train_df, \"Sentiment\")\n",
+        "\n",
+        "  # We split by sentence ids, because we don't want to have phrases belonging\n",
+        "  # to the same sentence in both training and validation set.\n",
+        "  train_indices, validation_indices = model_selection.train_test_split(\n",
+        "      np.unique(train_df[\"SentenceId\"]),\n",
+        "      test_size=validation_set_ratio,\n",
+        "      random_state=0)\n",
+        "\n",
+        "  validation_df = train_df[train_df[\"SentenceId\"].isin(validation_indices)]\n",
+        "  train_df = train_df[train_df[\"SentenceId\"].isin(train_indices)]\n",
+        "  print(\"Split the training data into %d training and %d validation examples.\" %\n",
+        "        (len(train_df), len(validation_df)))\n",
+        "\n",
+        "  return train_df, validation_df, test_df\n",
+        "\n",
+        "\n",
+        "train_df, validation_df, test_df = get_data(\n",
+        "    \"sentiment-analysis-on-movie-reviews\",\n",
+        "    \"train.tsv.zip\", \"test.tsv.zip\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DFq_EyS1BEyK"
+      },
+      "source": [
+        "Note: In this competition the task is not to rate entire reviews, but individual phrases from within the reviews. This is a much harder task."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "42hgsiWNq5y9"
+      },
+      "outputs": [],
+      "source": [
+        "train_df.head(20)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YPuHgx3BWBOg"
+      },
+      "source": [
+        "## Training an Model\n",
+        "\n",
+        "*Note: We could model this task also as a regression, see [Text classification with TF-Hub](https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/text_classification_with_tf_hub.ipynb).*"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "23U30yEkVq4w"
+      },
+      "outputs": [],
+      "source": [
+        "class MyModel(tf.keras.Model):\n",
+        "  def __init__(self, hub_url):\n",
+        "    super().__init__()\n",
+        "    self.hub_url = hub_url\n",
+        "    self.embed = hub.load(self.hub_url).signatures['default']\n",
+        "    self.sequential = tf.keras.Sequential([\n",
+        "      tf.keras.layers.Dense(500),\n",
+        "      tf.keras.layers.Dense(100),\n",
+        "      tf.keras.layers.Dense(5),\n",
+        "    ])\n",
+        "\n",
+        "  def call(self, inputs):\n",
+        "    phrases = inputs['Phrase'][:,0]\n",
+        "    embedding = 5*self.embed(phrases)['default']\n",
+        "    return self.sequential(embedding)\n",
+        "\n",
+        "  def get_config(self):\n",
+        "    return {\"hub_url\":self.hub_url}"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "JE--GDMM2tSp"
+      },
+      "outputs": [],
+      "source": [
+        "model = MyModel(\"https://tfhub.dev/google/nnlm-en-dim128/1\")\n",
+        "model.compile(\n",
+        "    loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
+        "    optimizer=tf.optimizers.Adam(), \n",
+        "    metrics = [tf.keras.metrics.SparseCategoricalAccuracy(name=\"accuracy\")])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "SRr-lvhstiNw"
+      },
+      "outputs": [],
+      "source": [
+        "history = model.fit(x=dict(train_df), y=train_df['Sentiment'],\n",
+        "          validation_data=(dict(validation_df), validation_df['Sentiment']),\n",
+        "          epochs = 25)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "s8j7YTRSe7Pj"
+      },
+      "source": [
+        "# Prediction\n",
+        "\n",
+        "Run predictions for the validation set and training set."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "iGqVNSl87bgN"
+      },
+      "outputs": [],
+      "source": [
+        "plt.plot(history.history['accuracy'])\n",
+        "plt.plot(history.history['val_accuracy'])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "zbLg5LzGwAfC"
+      },
+      "outputs": [],
+      "source": [
+        "train_eval_result = model.evaluate(dict(train_df), train_df['Sentiment'])\n",
+        "validation_eval_result = model.evaluate(dict(validation_df), validation_df['Sentiment'])\n",
+        "\n",
+        "print(f\"Training set accuracy: {train_eval_result[1]}\")\n",
+        "print(f\"Validation set accuracy: {validation_eval_result[1]}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DR2IsTF5vuAX"
+      },
+      "source": [
+        "## Confusion matrix\n",
+        "\n",
+        "Another very interesting statistic, especially for multiclass problems, is the [confusion matrix](https://en.wikipedia.org/wiki/Confusion_matrix). The confusion matrix allows visualization of the proportion of correctly and incorrectly labelled examples. We can easily see how much our classifier is biased and whether the distribution of labels makes sense. Ideally the largest fraction of predictions should be distributed along the diagonal."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "yKUnJFYY8bO_"
+      },
+      "outputs": [],
+      "source": [
+        "predictions = model.predict(dict(validation_df))\n",
+        "predictions = tf.argmax(predictions, axis=-1)\n",
+        "predictions"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "fjAs8W_Z9BvP"
+      },
+      "outputs": [],
+      "source": [
+        "cm = tf.math.confusion_matrix(validation_df['Sentiment'], predictions)\n",
+        "cm = cm/cm.numpy().sum(axis=1)[:, tf.newaxis]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "nT71CtArpsKz"
+      },
+      "outputs": [],
+      "source": [
+        "sns.heatmap(\n",
+        "    cm, annot=True,\n",
+        "    xticklabels=SENTIMENT_LABELS,\n",
+        "    yticklabels=SENTIMENT_LABELS)\n",
+        "plt.xlabel(\"Predicted\")\n",
+        "plt.ylabel(\"True\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Pic7o2m04weY"
+      },
+      "source": [
+        "We can easily submit the predictions back to Kaggle by pasting the following code to a code cell and executing it:\n",
+        "\n",
+        "``` python\n",
+        "test_predictions = model.predict(dict(test_df))\n",
+        "test_predictions = np.argmax(test_predictions, axis=-1)\n",
+        "\n",
+        "result_df = test_df.copy()\n",
+        "\n",
+        "result_df[\"Predictions\"] = test_predictions\n",
+        "\n",
+        "result_df.to_csv(\n",
+        "    \"predictions.csv\",\n",
+        "    columns=[\"Predictions\"],\n",
+        "    header=[\"Sentiment\"])\n",
+        "kaggle.api.competition_submit(\"predictions.csv\", \"Submitted from Colab\",\n",
+        "                              \"sentiment-analysis-on-movie-reviews\")\n",
+        "```\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "50BLu-JX_dlm"
+      },
+      "source": [
+        "After submitting, [check the leaderboard](https://www.kaggle.com/c/sentiment-analysis-on-movie-reviews/leaderboard) to see how you did."
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "text_classification_with_tf_hub_on_kaggle.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/text_cookbook.md b/site/en/hub/tutorials/text_cookbook.md
new file mode 100644
index 00000000000..dee9c1cf466
--- /dev/null
+++ b/site/en/hub/tutorials/text_cookbook.md
@@ -0,0 +1,101 @@
+# Text Cookbook
+
+This page lists a set of known guides and tools solving problems in the text
+domain with TensorFlow Hub. It is a starting place for anybody who wants to
+solve typical ML problems using pre-trained ML components rather than starting
+from scratch.
+
+## Classification
+
+When we want to predict a class for a given example, for example **sentiment**,
+**toxicity**, **article category**, or any other characteristic.
+
+![Text Classification Graphic](https://www.gstatic.com/aihub/tfhub/universal-sentence-encoder/example-classification.png)
+
+The tutorials below are solving the same task from different perspectives and
+using different tools.
+
+### Keras
+
+[Text classification with Keras](https://www.tensorflow.org/tutorials/keras/text_classification_with_hub) -
+example for building an IMDB sentiment classifier with Keras and TensorFlow
+Datasets.
+
+### Estimator
+
+[Text classification](https://github.com/tensorflow/docs/blob/master/g3doc/en/hub/tutorials/text_classification_with_tf_hub.ipynb) -
+example for building an IMDB sentiment classifier with Estimator. Contains
+multiple tips for improvement and a module comparison section.
+
+### BERT
+[Predicting Movie Review Sentiment with BERT on TF Hub](https://github.com/google-research/bert/blob/master/predicting_movie_reviews_with_bert_on_tf_hub.ipynb) -
+shows how to use a BERT module for classification. Includes use of `bert`
+library for tokenization and preprocessing.
+
+### Kaggle
+
+[IMDB classification on Kaggle](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/text_classification_with_tf_hub_on_kaggle.ipynb) -
+shows how to easily interact with a Kaggle competition from a Colab, including
+downloading the data and submitting the results.
+
+                                                                                                                                                                                         | Estimator                                                                                         | Keras                                                                                             | TF2                                                                                               | TF Datasets                                                                                       | BERT                                                                                              | Kaggle APIs
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | -----------
+[Text classification](https://www.tensorflow.org/hub/tutorials/text_classification_with_tf_hub)                                                                                          | ![done](https://www.gstatic.com/images/icons/material/system_gm/1x/bigtop_done_googblue_18dp.png) |                                                                                                   |                                                                                                   |                                                                                                   |                                                                                                   |
+[Text classification with Keras](https://www.tensorflow.org/tutorials/keras/text_classification_with_hub)                                                                                |                                                                                                   | ![done](https://www.gstatic.com/images/icons/material/system_gm/1x/bigtop_done_googblue_18dp.png) | ![done](https://www.gstatic.com/images/icons/material/system_gm/1x/bigtop_done_googblue_18dp.png) | ![done](https://www.gstatic.com/images/icons/material/system_gm/1x/bigtop_done_googblue_18dp.png) |                                                                                                   |
+[Predicting Movie Review Sentiment with BERT on TF Hub](https://github.com/google-research/bert/blob/master/predicting_movie_reviews_with_bert_on_tf_hub.ipynb)                          | ![done](https://www.gstatic.com/images/icons/material/system_gm/1x/bigtop_done_googblue_18dp.png) |                                                                                                   |                                                                                                   |                                                                                                   | ![done](https://www.gstatic.com/images/icons/material/system_gm/1x/bigtop_done_googblue_18dp.png) |
+[IMDB classification on Kaggle](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/text_classification_with_tf_hub_on_kaggle.ipynb) | ![done](https://www.gstatic.com/images/icons/material/system_gm/1x/bigtop_done_googblue_18dp.png) |                                                                                                   |                                                                                                   |                                                                                                   |                                                                                                   | ![done](https://www.gstatic.com/images/icons/material/system_gm/1x/bigtop_done_googblue_18dp.png)
+
+### Bangla task with FastText embeddings
+TensorFlow Hub does not currently offer a module in every language. The
+following tutorial shows how to leverage TensorFlow Hub for fast experimentation
+and modular ML development.
+
+[Bangla Article Classifier](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/bangla_article_classifier.ipynb) -
+demonstrates how to create a reusable TensorFlow Hub text embedding, and use it
+to train a Keras classifier for
+[BARD Bangla Article dataset](https://github.com/tanvirfahim15/BARD-Bangla-Article-Classifier).
+
+## Semantic similarity
+
+When we want to find out which sentences correlate with each other in zero-shot
+setup (no training examples).
+
+![Semantic Similarity Graphic](https://www.gstatic.com/aihub/tfhub/universal-sentence-encoder/example-similarity.png)
+
+### Basic
+
+[Semantic similarity](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder.ipynb) -
+shows how to use the sentence encoder module to compute sentence similarity.
+
+### Cross-lingual
+
+[Cross-lingual semantic similarity](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/cross_lingual_similarity_with_tf_hub_multilingual_universal_encoder.ipynb) -
+shows how to use one of the cross-lingual sentence encoders to compute sentence
+similarity across languages.
+
+### Semantic retrieval
+
+[Semantic retrieval](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/retrieval_with_tf_hub_universal_encoder_qa.ipynb) -
+shows how to use Q/A sentence encoder to index a collection of documents for
+retrieval based on semantic similarity.
+
+### SentencePiece input
+
+[Semantic similarity with universal encoder lite](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder_lite.ipynb) -
+shows how to use sentence encoder modules that accept
+[SentencePiece](https://github.com/google/sentencepiece) ids on input instead of
+text.
+
+## Module creation
+Instead of using only modules on [tfhub.dev](https://tfhub.dev), there are ways
+to create own modules. This can be a useful tool for better ML codebase
+modularity and for further sharing.
+
+### Wrapping existing pre-trained embeddings
+[Text embedding module exporter](https://github.com/tensorflow/hub/blob/master/examples/text_embeddings/export.py) -
+a tool to wrap an existing pre-trained embedding into a module. Shows how to
+include text pre-processing ops into the module. This allows to create a
+sentence embedding module from token embeddings.
+
+[Text embedding module exporter v2](https://github.com/tensorflow/hub/blob/master/examples/text_embeddings_v2/export_v2.py) -
+same as above, but compatible with TensorFlow 2 and eager execution.
diff --git a/site/en/hub/tutorials/text_to_video_retrieval_with_s3d_milnce.ipynb b/site/en/hub/tutorials/text_to_video_retrieval_with_s3d_milnce.ipynb
new file mode 100644
index 00000000000..52fb3c0e4ab
--- /dev/null
+++ b/site/en/hub/tutorials/text_to_video_retrieval_with_s3d_milnce.ipynb
@@ -0,0 +1,275 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "8JSGdaDHc_f4"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "z2_BHI6XdJ30"
+      },
+      "source": [
+        "# Text-to-Video retrieval with S3D MIL-NCE"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Rm0K9ZTgfISB"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/text_to_video_retrieval_with_s3d_milnce\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/text_to_video_retrieval_with_s3d_milnce.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/text_to_video_retrieval_with_s3d_milnce.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/text_to_video_retrieval_with_s3d_milnce.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/deepmind/mil-nce/s3d/1\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub model</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "bC_xJPpQd-LO"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install -q opencv-python\n",
+        "\n",
+        "import os\n",
+        "\n",
+        "import tensorflow.compat.v2 as tf\n",
+        "import tensorflow_hub as hub\n",
+        "\n",
+        "import numpy as np\n",
+        "import cv2\n",
+        "from IPython import display\n",
+        "import math"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ZxwaK-jf7qkW"
+      },
+      "source": [
+        "## Import TF-Hub model\n",
+        "\n",
+        "This tutorial demonstrates how to use the [S3D MIL-NCE model](https://tfhub.dev/deepmind/mil-nce/s3d/1) from TensorFlow Hub to do **text-to-video retrieval** to find the most similar videos for a given text query.\n",
+        "\n",
+        "The model has 2 signatures, one for generating *video embeddings* and one for generating *text embeddings*. We will use these embedding to find the nearest neighbors in the embedding space."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "nwv4ZQ4qmak5"
+      },
+      "outputs": [],
+      "source": [
+        "# Load the model once from TF-Hub.\n",
+        "hub_handle = 'https://tfhub.dev/deepmind/mil-nce/s3d/1'\n",
+        "hub_model = hub.load(hub_handle)\n",
+        "\n",
+        "def generate_embeddings(model, input_frames, input_words):\n",
+        "  \"\"\"Generate embeddings from the model from video frames and input words.\"\"\"\n",
+        "  # Input_frames must be normalized in [0, 1] and of the shape Batch x T x H x W x 3\n",
+        "  vision_output = model.signatures['video'](tf.constant(tf.cast(input_frames, dtype=tf.float32)))\n",
+        "  text_output = model.signatures['text'](tf.constant(input_words))\n",
+        "  return vision_output['video_embedding'], text_output['text_embedding']"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "EOZzu9ddekEj"
+      },
+      "outputs": [],
+      "source": [
+        "# @title Define video loading and visualization functions  { display-mode: \"form\" }\n",
+        "\n",
+        "# Utilities to open video files using CV2\n",
+        "def crop_center_square(frame):\n",
+        "  y, x = frame.shape[0:2]\n",
+        "  min_dim = min(y, x)\n",
+        "  start_x = (x // 2) - (min_dim // 2)\n",
+        "  start_y = (y // 2) - (min_dim // 2)\n",
+        "  return frame[start_y:start_y+min_dim,start_x:start_x+min_dim]\n",
+        "\n",
+        "\n",
+        "def load_video(video_url, max_frames=32, resize=(224, 224)):\n",
+        "  path = tf.keras.utils.get_file(os.path.basename(video_url)[-128:], video_url)\n",
+        "  cap = cv2.VideoCapture(path)\n",
+        "  frames = []\n",
+        "  try:\n",
+        "    while True:\n",
+        "      ret, frame = cap.read()\n",
+        "      if not ret:\n",
+        "        break\n",
+        "      frame = crop_center_square(frame)\n",
+        "      frame = cv2.resize(frame, resize)\n",
+        "      frame = frame[:, :, [2, 1, 0]]\n",
+        "      frames.append(frame)\n",
+        "\n",
+        "      if len(frames) == max_frames:\n",
+        "        break\n",
+        "  finally:\n",
+        "    cap.release()\n",
+        "  frames = np.array(frames)\n",
+        "  if len(frames) < max_frames:\n",
+        "    n_repeat = int(math.ceil(max_frames / float(len(frames))))\n",
+        "    frames = frames.repeat(n_repeat, axis=0)\n",
+        "  frames = frames[:max_frames]\n",
+        "  return frames / 255.0\n",
+        "\n",
+        "def display_video(urls):\n",
+        "    html = '<table>'\n",
+        "    html += '<tr><th>Video 1</th><th>Video 2</th><th>Video 3</th></tr><tr>'\n",
+        "    for url in urls:\n",
+        "        html += '<td>'\n",
+        "        html += '<img src=\"{}\" height=\"224\">'.format(url)\n",
+        "        html += '</td>'\n",
+        "    html += '</tr></table>'\n",
+        "    return display.HTML(html)\n",
+        "\n",
+        "def display_query_and_results_video(query, urls, scores):\n",
+        "  \"\"\"Display a text query and the top result videos and scores.\"\"\"\n",
+        "  sorted_ix = np.argsort(-scores)\n",
+        "  html = ''\n",
+        "  html += '<h2>Input query: <i>{}</i> </h2><div>'.format(query)\n",
+        "  html += 'Results: <div>'\n",
+        "  html += '<table>'\n",
+        "  html += '<tr><th>Rank #1, Score:{:.2f}</th>'.format(scores[sorted_ix[0]])\n",
+        "  html += '<th>Rank #2, Score:{:.2f}</th>'.format(scores[sorted_ix[1]])\n",
+        "  html += '<th>Rank #3, Score:{:.2f}</th></tr><tr>'.format(scores[sorted_ix[2]])\n",
+        "  for i, idx in enumerate(sorted_ix):\n",
+        "    url = urls[sorted_ix[i]];\n",
+        "    html += '<td>'\n",
+        "    html += '<img src=\"{}\" height=\"224\">'.format(url)\n",
+        "    html += '</td>'\n",
+        "  html += '</tr></table>'\n",
+        "  return html\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Ime5V4kDewh8"
+      },
+      "outputs": [],
+      "source": [
+        "# @title Load example videos and define text queries  { display-mode: \"form\" }\n",
+        "\n",
+        "video_1_url = 'https://upload.wikimedia.org/wikipedia/commons/b/b0/YosriAirTerjun.gif' # @param {type:\"string\"}\n",
+        "video_2_url = 'https://upload.wikimedia.org/wikipedia/commons/e/e6/Guitar_solo_gif.gif' # @param {type:\"string\"}\n",
+        "video_3_url = 'https://upload.wikimedia.org/wikipedia/commons/3/30/2009-08-16-autodrift-by-RalfR-gif-by-wau.gif' # @param {type:\"string\"}\n",
+        "\n",
+        "video_1 = load_video(video_1_url)\n",
+        "video_2 = load_video(video_2_url)\n",
+        "video_3 = load_video(video_3_url)\n",
+        "all_videos = [video_1, video_2, video_3]\n",
+        "\n",
+        "query_1_video = 'waterfall' # @param {type:\"string\"}\n",
+        "query_2_video = 'playing guitar' # @param {type:\"string\"}\n",
+        "query_3_video = 'car drifting' # @param {type:\"string\"}\n",
+        "all_queries_video = [query_1_video, query_2_video, query_3_video]\n",
+        "all_videos_urls = [video_1_url, video_2_url, video_3_url]\n",
+        "display_video(all_videos_urls)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "NCLKv_L_8Anc"
+      },
+      "source": [
+        "## Demonstrate text to video retrieval\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9oX8ItFUjybi"
+      },
+      "outputs": [],
+      "source": [
+        "# Prepare video inputs.\n",
+        "videos_np = np.stack(all_videos, axis=0)\n",
+        "\n",
+        "# Prepare text input.\n",
+        "words_np = np.array(all_queries_video)\n",
+        "\n",
+        "# Generate the video and text embeddings.\n",
+        "video_embd, text_embd = generate_embeddings(hub_model, videos_np, words_np)\n",
+        "\n",
+        "# Scores between video and text is computed by dot products.\n",
+        "all_scores = np.dot(text_embd, tf.transpose(video_embd))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "d4AwYmODmE9Y"
+      },
+      "outputs": [],
+      "source": [
+        "# Display results.\n",
+        "html = ''\n",
+        "for i, words in enumerate(words_np):\n",
+        "  html += display_query_and_results_video(words, all_videos_urls, all_scores[i, :])\n",
+        "  html += '<br>'\n",
+        "display.HTML(html)"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "text_to_video_retrieval_with_s3d_milnce.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/tf2_arbitrary_image_stylization.ipynb b/site/en/hub/tutorials/tf2_arbitrary_image_stylization.ipynb
new file mode 100644
index 00000000000..3a0cb09113e
--- /dev/null
+++ b/site/en/hub/tutorials/tf2_arbitrary_image_stylization.ipynb
@@ -0,0 +1,375 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ScitaPqhKtuW"
+      },
+      "source": [
+        "##### Copyright 2019 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "jvztxQ6VsK2k"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "oXlcl8lqBgAD"
+      },
+      "source": [
+        "# Fast Style Transfer for Arbitrary Styles\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfBg1C5NB3X0"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/tf2_arbitrary_image_stylization\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_arbitrary_image_stylization.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_arbitrary_image_stylization.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/tf2_arbitrary_image_stylization.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub model</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YeeuYzbZcJzs"
+      },
+      "source": [
+        "Based on the model code in [magenta](https://github.com/tensorflow/magenta/tree/master/magenta/models/arbitrary_image_stylization) and the publication:\n",
+        "\n",
+        "[Exploring the structure of a real-time, arbitrary neural artistic stylization\n",
+        "network](https://arxiv.org/abs/1705.06830).\n",
+        "*Golnaz Ghiasi, Honglak Lee,\n",
+        "Manjunath Kudlur, Vincent Dumoulin, Jonathon Shlens*,\n",
+        "Proceedings of the British Machine Vision Conference (BMVC), 2017.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "TaM8BVxrCA2E"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "J65jog2ncJzt"
+      },
+      "source": [
+        "Let's start with importing TF2 and all relevant dependencies."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "v-KXRY5XBu2u"
+      },
+      "outputs": [],
+      "source": [
+        "import functools\n",
+        "import os\n",
+        "\n",
+        "from matplotlib import gridspec\n",
+        "import matplotlib.pylab as plt\n",
+        "import numpy as np\n",
+        "import tensorflow as tf\n",
+        "import tensorflow_hub as hub\n",
+        "\n",
+        "print(\"TF Version: \", tf.__version__)\n",
+        "print(\"TF Hub version: \", hub.__version__)\n",
+        "print(\"Eager mode enabled: \", tf.executing_eagerly())\n",
+        "print(\"GPU available: \", tf.config.list_physical_devices('GPU'))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "tsoDv_9geoZn"
+      },
+      "outputs": [],
+      "source": [
+        "# @title Define image loading and visualization functions  { display-mode: \"form\" }\n",
+        "\n",
+        "def crop_center(image):\n",
+        "  \"\"\"Returns a cropped square image.\"\"\"\n",
+        "  shape = image.shape\n",
+        "  new_shape = min(shape[1], shape[2])\n",
+        "  offset_y = max(shape[1] - shape[2], 0) // 2\n",
+        "  offset_x = max(shape[2] - shape[1], 0) // 2\n",
+        "  image = tf.image.crop_to_bounding_box(\n",
+        "      image, offset_y, offset_x, new_shape, new_shape)\n",
+        "  return image\n",
+        "\n",
+        "@functools.lru_cache(maxsize=None)\n",
+        "def load_image(image_url, image_size=(256, 256), preserve_aspect_ratio=True):\n",
+        "  \"\"\"Loads and preprocesses images.\"\"\"\n",
+        "  # Cache image file locally.\n",
+        "  image_path = tf.keras.utils.get_file(os.path.basename(image_url)[-128:], image_url)\n",
+        "  # Load and convert to float32 numpy array, add batch dimension, and normalize to range [0, 1].\n",
+        "  img = tf.io.decode_image(\n",
+        "      tf.io.read_file(image_path),\n",
+        "      channels=3, dtype=tf.float32)[tf.newaxis, ...]\n",
+        "  img = crop_center(img)\n",
+        "  img = tf.image.resize(img, image_size, preserve_aspect_ratio=True)\n",
+        "  return img\n",
+        "\n",
+        "def show_n(images, titles=('',)):\n",
+        "  n = len(images)\n",
+        "  image_sizes = [image.shape[1] for image in images]\n",
+        "  w = (image_sizes[0] * 6) // 320\n",
+        "  plt.figure(figsize=(w * n, w))\n",
+        "  gs = gridspec.GridSpec(1, n, width_ratios=image_sizes)\n",
+        "  for i in range(n):\n",
+        "    plt.subplot(gs[i])\n",
+        "    plt.imshow(images[i][0], aspect='equal')\n",
+        "    plt.axis('off')\n",
+        "    plt.title(titles[i] if len(titles) > i else '')\n",
+        "  plt.show()\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8etHh05-CJHc"
+      },
+      "source": [
+        "Let's get as well some images to play with."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dRc0vat3Alzo"
+      },
+      "outputs": [],
+      "source": [
+        "# @title Load example images  { display-mode: \"form\" }\n",
+        "\n",
+        "content_image_url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/f/fd/Golden_Gate_Bridge_from_Battery_Spencer.jpg/640px-Golden_Gate_Bridge_from_Battery_Spencer.jpg'  # @param {type:\"string\"}\n",
+        "style_image_url = 'https://upload.wikimedia.org/wikipedia/commons/0/0a/The_Great_Wave_off_Kanagawa.jpg'  # @param {type:\"string\"}\n",
+        "output_image_size = 384  # @param {type:\"integer\"}\n",
+        "\n",
+        "# The content image size can be arbitrary.\n",
+        "content_img_size = (output_image_size, output_image_size)\n",
+        "# The style prediction model was trained with image size 256 and it's the \n",
+        "# recommended image size for the style image (though, other sizes work as \n",
+        "# well but will lead to different results).\n",
+        "style_img_size = (256, 256)  # Recommended to keep it at 256.\n",
+        "\n",
+        "content_image = load_image(content_image_url, content_img_size)\n",
+        "style_image = load_image(style_image_url, style_img_size)\n",
+        "style_image = tf.nn.avg_pool(style_image, ksize=[3,3], strides=[1,1], padding='SAME')\n",
+        "show_n([content_image, style_image], ['Content image', 'Style image'])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yL2Bn5ThR1nY"
+      },
+      "source": [
+        "## Import TF Hub module"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "467AVDSuzBPc"
+      },
+      "outputs": [],
+      "source": [
+        "# Load TF Hub module.\n",
+        "\n",
+        "hub_handle = 'https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2'\n",
+        "hub_module = hub.load(hub_handle)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "uAR70_3wLEDB"
+      },
+      "source": [
+        "The signature of this hub module for image stylization is:\n",
+        "```\n",
+        "outputs = hub_module(content_image, style_image)\n",
+        "stylized_image = outputs[0]\n",
+        "```\n",
+        "Where `content_image`, `style_image`, and `stylized_image` are expected to be 4-D Tensors with shapes `[batch_size, image_height, image_width, 3]`.\n",
+        "\n",
+        "In the current example we provide only single images and therefore the batch dimension is 1, but one can use the same module to process more images at the same time.\n",
+        "\n",
+        "The input and output values of the images should be in the range [0, 1].\n",
+        "\n",
+        "The shapes of content and style image don't have to match. Output image shape\n",
+        "is the same as the content image shape."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "qEhYJno1R7rP"
+      },
+      "source": [
+        "## Demonstrate image stylization"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "lnAv-F3O9fLV"
+      },
+      "outputs": [],
+      "source": [
+        "# Stylize content image with given style image.\n",
+        "# This is pretty fast within a few milliseconds on a GPU.\n",
+        "\n",
+        "outputs = hub_module(tf.constant(content_image), tf.constant(style_image))\n",
+        "stylized_image = outputs[0]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "OEAPEdq698gs"
+      },
+      "outputs": [],
+      "source": [
+        "# Visualize input images and the generated stylized image.\n",
+        "\n",
+        "show_n([content_image, style_image, stylized_image], titles=['Original content image', 'Style image', 'Stylized image'])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "v-gYvjTWK-lx"
+      },
+      "source": [
+        "## Let's try it on more images"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "WSMaY0YBNfkK"
+      },
+      "outputs": [],
+      "source": [
+        "# @title To Run: Load more images { display-mode: \"form\" }\n",
+        "\n",
+        "content_urls = dict(\n",
+        "  sea_turtle='https://upload.wikimedia.org/wikipedia/commons/d/d7/Green_Sea_Turtle_grazing_seagrass.jpg',\n",
+        "  tuebingen='https://upload.wikimedia.org/wikipedia/commons/0/00/Tuebingen_Neckarfront.jpg',\n",
+        "  grace_hopper='https://storage.googleapis.com/download.tensorflow.org/example_images/grace_hopper.jpg',\n",
+        "  )\n",
+        "style_urls = dict(\n",
+        "  kanagawa_great_wave='https://upload.wikimedia.org/wikipedia/commons/0/0a/The_Great_Wave_off_Kanagawa.jpg',\n",
+        "  kandinsky_composition_7='https://upload.wikimedia.org/wikipedia/commons/b/b4/Vassily_Kandinsky%2C_1913_-_Composition_7.jpg',\n",
+        "  hubble_pillars_of_creation='https://upload.wikimedia.org/wikipedia/commons/6/68/Pillars_of_creation_2014_HST_WFC3-UVIS_full-res_denoised.jpg',\n",
+        "  van_gogh_starry_night='https://upload.wikimedia.org/wikipedia/commons/thumb/e/ea/Van_Gogh_-_Starry_Night_-_Google_Art_Project.jpg/1024px-Van_Gogh_-_Starry_Night_-_Google_Art_Project.jpg',\n",
+        "  turner_nantes='https://upload.wikimedia.org/wikipedia/commons/b/b7/JMW_Turner_-_Nantes_from_the_Ile_Feydeau.jpg',\n",
+        "  munch_scream='https://upload.wikimedia.org/wikipedia/commons/c/c5/Edvard_Munch%2C_1893%2C_The_Scream%2C_oil%2C_tempera_and_pastel_on_cardboard%2C_91_x_73_cm%2C_National_Gallery_of_Norway.jpg',\n",
+        "  picasso_demoiselles_avignon='https://upload.wikimedia.org/wikipedia/en/4/4c/Les_Demoiselles_d%27Avignon.jpg',\n",
+        "  picasso_violin='https://upload.wikimedia.org/wikipedia/en/3/3c/Pablo_Picasso%2C_1911-12%2C_Violon_%28Violin%29%2C_oil_on_canvas%2C_Kr%C3%B6ller-M%C3%BCller_Museum%2C_Otterlo%2C_Netherlands.jpg',\n",
+        "  picasso_bottle_of_rum='https://upload.wikimedia.org/wikipedia/en/7/7f/Pablo_Picasso%2C_1911%2C_Still_Life_with_a_Bottle_of_Rum%2C_oil_on_canvas%2C_61.3_x_50.5_cm%2C_Metropolitan_Museum_of_Art%2C_New_York.jpg',\n",
+        "  fire='https://upload.wikimedia.org/wikipedia/commons/3/36/Large_bonfire.jpg',\n",
+        "  derkovits_woman_head='https://upload.wikimedia.org/wikipedia/commons/0/0d/Derkovits_Gyula_Woman_head_1922.jpg',\n",
+        "  amadeo_style_life='https://upload.wikimedia.org/wikipedia/commons/8/8e/Untitled_%28Still_life%29_%281913%29_-_Amadeo_Souza-Cardoso_%281887-1918%29_%2817385824283%29.jpg',\n",
+        "  derkovtis_talig='https://upload.wikimedia.org/wikipedia/commons/3/37/Derkovits_Gyula_Talig%C3%A1s_1920.jpg',\n",
+        "  amadeo_cardoso='https://upload.wikimedia.org/wikipedia/commons/7/7d/Amadeo_de_Souza-Cardoso%2C_1915_-_Landscape_with_black_figure.jpg'\n",
+        ")\n",
+        "\n",
+        "content_image_size = 384\n",
+        "style_image_size = 256\n",
+        "content_images = {k: load_image(v, (content_image_size, content_image_size)) for k, v in content_urls.items()}\n",
+        "style_images = {k: load_image(v, (style_image_size, style_image_size)) for k, v in style_urls.items()}\n",
+        "style_images = {k: tf.nn.avg_pool(style_image, ksize=[3,3], strides=[1,1], padding='SAME') for k, style_image in style_images.items()}\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dqB6aNTLNVkK"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Specify the main content image and the style you want to use.  { display-mode: \"form\" }\n",
+        "\n",
+        "content_name = 'sea_turtle'  # @param ['sea_turtle', 'tuebingen', 'grace_hopper']\n",
+        "style_name = 'munch_scream'  # @param ['kanagawa_great_wave', 'kandinsky_composition_7', 'hubble_pillars_of_creation', 'van_gogh_starry_night', 'turner_nantes', 'munch_scream', 'picasso_demoiselles_avignon', 'picasso_violin', 'picasso_bottle_of_rum', 'fire', 'derkovits_woman_head', 'amadeo_style_life', 'derkovtis_talig', 'amadeo_cardoso']\n",
+        "\n",
+        "stylized_image = hub_module(tf.constant(content_images[content_name]),\n",
+        "                            tf.constant(style_images[style_name]))[0]\n",
+        "\n",
+        "show_n([content_images[content_name], style_images[style_name], stylized_image],\n",
+        "       titles=['Original content image', 'Style image', 'Stylized image'])"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [],
+      "name": "tf2_arbitrary_image_stylization.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/tf2_image_retraining.ipynb b/site/en/hub/tutorials/tf2_image_retraining.ipynb
new file mode 100644
index 00000000000..0266f4683c1
--- /dev/null
+++ b/site/en/hub/tutorials/tf2_image_retraining.ipynb
@@ -0,0 +1,605 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ScitaPqhKtuW"
+      },
+      "source": [
+        "##### Copyright 2021 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "jvztxQ6VsK2k"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2021 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "oYM61xrTsP5d"
+      },
+      "source": [
+        "# Retraining an Image Classifier\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfBg1C5NB3X0"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/tf2_image_retraining\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_image_retraining.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_image_retraining.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/tf2_image_retraining.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/google/collections/image/1\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub models</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "L1otmJgmbahf"
+      },
+      "source": [
+        "## Introduction\n",
+        "\n",
+        "Image classification models have millions of parameters. Training them from\n",
+        "scratch requires a lot of labeled training data and a lot of computing power. Transfer learning is a technique that shortcuts much of this by taking a piece of a model that has already been trained on a related task and reusing it in a new model.\n",
+        "\n",
+        "This Colab demonstrates how to build a Keras model for classifying five species of flowers by using a pre-trained TF2 SavedModel from TensorFlow Hub for image feature extraction, trained on the much larger and more general ImageNet dataset. Optionally, the feature extractor can be trained (\"fine-tuned\") alongside the newly added classifier.\n",
+        "\n",
+        "### Looking for a tool instead?\n",
+        "\n",
+        "This is a TensorFlow coding tutorial. If you want a tool that just builds the TensorFlow or TFLite model for, take a look at the [make_image_classifier](https://github.com/tensorflow/hub/tree/master/tensorflow_hub/tools/make_image_classifier) command-line tool that gets [installed](https://www.tensorflow.org/hub/installation) by the PIP package `tensorflow-hub[make_image_classifier]`, or at [this](https://colab.sandbox.google.com/github/tensorflow/examples/blob/master/tensorflow_examples/lite/model_maker/demo/image_classification.ipynb) TFLite colab.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bL54LWCHt5q5"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dlauq-4FWGZM"
+      },
+      "outputs": [],
+      "source": [
+        "import itertools\n",
+        "import os\n",
+        "\n",
+        "import matplotlib.pylab as plt\n",
+        "import numpy as np\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "import tensorflow_hub as hub\n",
+        "\n",
+        "print(\"TF version:\", tf.__version__)\n",
+        "print(\"Hub version:\", hub.__version__)\n",
+        "print(\"GPU is\", \"available\" if tf.config.list_physical_devices('GPU') else \"NOT AVAILABLE\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mmaHHH7Pvmth"
+      },
+      "source": [
+        "## Select the TF2 SavedModel module to use\n",
+        "\n",
+        "For starters, use [https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/feature_vector/4](https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/feature_vector/4). The same URL can be used in code to identify the SavedModel and in your browser to show its documentation. (Note that models in TF1 Hub format won't work here.)\n",
+        "\n",
+        "You can find more TF2 models that generate image feature vectors [here](https://tfhub.dev/s?module-type=image-feature-vector&tf-version=tf2).\n",
+        "\n",
+        "There are multiple possible models to try. All you need to do is select a different one on the cell below and follow up with the notebook."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "FlsEcKVeuCnf"
+      },
+      "outputs": [],
+      "source": [
+        "#@title\n",
+        "\n",
+        "model_name = \"efficientnetv2-xl-21k\" # @param ['efficientnetv2-s', 'efficientnetv2-m', 'efficientnetv2-l', 'efficientnetv2-s-21k', 'efficientnetv2-m-21k', 'efficientnetv2-l-21k', 'efficientnetv2-xl-21k', 'efficientnetv2-b0-21k', 'efficientnetv2-b1-21k', 'efficientnetv2-b2-21k', 'efficientnetv2-b3-21k', 'efficientnetv2-s-21k-ft1k', 'efficientnetv2-m-21k-ft1k', 'efficientnetv2-l-21k-ft1k', 'efficientnetv2-xl-21k-ft1k', 'efficientnetv2-b0-21k-ft1k', 'efficientnetv2-b1-21k-ft1k', 'efficientnetv2-b2-21k-ft1k', 'efficientnetv2-b3-21k-ft1k', 'efficientnetv2-b0', 'efficientnetv2-b1', 'efficientnetv2-b2', 'efficientnetv2-b3', 'efficientnet_b0', 'efficientnet_b1', 'efficientnet_b2', 'efficientnet_b3', 'efficientnet_b4', 'efficientnet_b5', 'efficientnet_b6', 'efficientnet_b7', 'bit_s-r50x1', 'inception_v3', 'inception_resnet_v2', 'resnet_v1_50', 'resnet_v1_101', 'resnet_v1_152', 'resnet_v2_50', 'resnet_v2_101', 'resnet_v2_152', 'nasnet_large', 'nasnet_mobile', 'pnasnet_large', 'mobilenet_v2_100_224', 'mobilenet_v2_130_224', 'mobilenet_v2_140_224', 'mobilenet_v3_small_100_224', 'mobilenet_v3_small_075_224', 'mobilenet_v3_large_100_224', 'mobilenet_v3_large_075_224']\n",
+        "\n",
+        "model_handle_map = {\n",
+        "  \"efficientnetv2-s\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_s/feature_vector/2\",\n",
+        "  \"efficientnetv2-m\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_m/feature_vector/2\",\n",
+        "  \"efficientnetv2-l\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_l/feature_vector/2\",\n",
+        "  \"efficientnetv2-s-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_s/feature_vector/2\",\n",
+        "  \"efficientnetv2-m-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_m/feature_vector/2\",\n",
+        "  \"efficientnetv2-l-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_l/feature_vector/2\",\n",
+        "  \"efficientnetv2-xl-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_xl/feature_vector/2\",\n",
+        "  \"efficientnetv2-b0-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b0/feature_vector/2\",\n",
+        "  \"efficientnetv2-b1-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b1/feature_vector/2\",\n",
+        "  \"efficientnetv2-b2-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b2/feature_vector/2\",\n",
+        "  \"efficientnetv2-b3-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b3/feature_vector/2\",\n",
+        "  \"efficientnetv2-s-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_s/feature_vector/2\",\n",
+        "  \"efficientnetv2-m-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_m/feature_vector/2\",\n",
+        "  \"efficientnetv2-l-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_l/feature_vector/2\",\n",
+        "  \"efficientnetv2-xl-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_xl/feature_vector/2\",\n",
+        "  \"efficientnetv2-b0-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b0/feature_vector/2\",\n",
+        "  \"efficientnetv2-b1-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b1/feature_vector/2\",\n",
+        "  \"efficientnetv2-b2-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b2/feature_vector/2\",\n",
+        "  \"efficientnetv2-b3-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b3/feature_vector/2\",\n",
+        "  \"efficientnetv2-b0\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b0/feature_vector/2\",\n",
+        "  \"efficientnetv2-b1\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b1/feature_vector/2\",\n",
+        "  \"efficientnetv2-b2\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b2/feature_vector/2\",\n",
+        "  \"efficientnetv2-b3\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b3/feature_vector/2\",\n",
+        "  \"efficientnet_b0\": \"https://tfhub.dev/tensorflow/efficientnet/b0/feature-vector/1\",\n",
+        "  \"efficientnet_b1\": \"https://tfhub.dev/tensorflow/efficientnet/b1/feature-vector/1\",\n",
+        "  \"efficientnet_b2\": \"https://tfhub.dev/tensorflow/efficientnet/b2/feature-vector/1\",\n",
+        "  \"efficientnet_b3\": \"https://tfhub.dev/tensorflow/efficientnet/b3/feature-vector/1\",\n",
+        "  \"efficientnet_b4\": \"https://tfhub.dev/tensorflow/efficientnet/b4/feature-vector/1\",\n",
+        "  \"efficientnet_b5\": \"https://tfhub.dev/tensorflow/efficientnet/b5/feature-vector/1\",\n",
+        "  \"efficientnet_b6\": \"https://tfhub.dev/tensorflow/efficientnet/b6/feature-vector/1\",\n",
+        "  \"efficientnet_b7\": \"https://tfhub.dev/tensorflow/efficientnet/b7/feature-vector/1\",\n",
+        "  \"bit_s-r50x1\": \"https://tfhub.dev/google/bit/s-r50x1/1\",\n",
+        "  \"inception_v3\": \"https://tfhub.dev/google/imagenet/inception_v3/feature-vector/4\",\n",
+        "  \"inception_resnet_v2\": \"https://tfhub.dev/google/imagenet/inception_resnet_v2/feature-vector/4\",\n",
+        "  \"resnet_v1_50\": \"https://tfhub.dev/google/imagenet/resnet_v1_50/feature-vector/4\",\n",
+        "  \"resnet_v1_101\": \"https://tfhub.dev/google/imagenet/resnet_v1_101/feature-vector/4\",\n",
+        "  \"resnet_v1_152\": \"https://tfhub.dev/google/imagenet/resnet_v1_152/feature-vector/4\",\n",
+        "  \"resnet_v2_50\": \"https://tfhub.dev/google/imagenet/resnet_v2_50/feature-vector/4\",\n",
+        "  \"resnet_v2_101\": \"https://tfhub.dev/google/imagenet/resnet_v2_101/feature-vector/4\",\n",
+        "  \"resnet_v2_152\": \"https://tfhub.dev/google/imagenet/resnet_v2_152/feature-vector/4\",\n",
+        "  \"nasnet_large\": \"https://tfhub.dev/google/imagenet/nasnet_large/feature_vector/4\",\n",
+        "  \"nasnet_mobile\": \"https://tfhub.dev/google/imagenet/nasnet_mobile/feature_vector/4\",\n",
+        "  \"pnasnet_large\": \"https://tfhub.dev/google/imagenet/pnasnet_large/feature_vector/4\",\n",
+        "  \"mobilenet_v2_100_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/feature_vector/4\",\n",
+        "  \"mobilenet_v2_130_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v2_130_224/feature_vector/4\",\n",
+        "  \"mobilenet_v2_140_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v2_140_224/feature_vector/4\",\n",
+        "  \"mobilenet_v3_small_100_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v3_small_100_224/feature_vector/5\",\n",
+        "  \"mobilenet_v3_small_075_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v3_small_075_224/feature_vector/5\",\n",
+        "  \"mobilenet_v3_large_100_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v3_large_100_224/feature_vector/5\",\n",
+        "  \"mobilenet_v3_large_075_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v3_large_075_224/feature_vector/5\",\n",
+        "}\n",
+        "\n",
+        "model_image_size_map = {\n",
+        "  \"efficientnetv2-s\": 384,\n",
+        "  \"efficientnetv2-m\": 480,\n",
+        "  \"efficientnetv2-l\": 480,\n",
+        "  \"efficientnetv2-b0\": 224,\n",
+        "  \"efficientnetv2-b1\": 240,\n",
+        "  \"efficientnetv2-b2\": 260,\n",
+        "  \"efficientnetv2-b3\": 300,\n",
+        "  \"efficientnetv2-s-21k\": 384,\n",
+        "  \"efficientnetv2-m-21k\": 480,\n",
+        "  \"efficientnetv2-l-21k\": 480,\n",
+        "  \"efficientnetv2-xl-21k\": 512,\n",
+        "  \"efficientnetv2-b0-21k\": 224,\n",
+        "  \"efficientnetv2-b1-21k\": 240,\n",
+        "  \"efficientnetv2-b2-21k\": 260,\n",
+        "  \"efficientnetv2-b3-21k\": 300,\n",
+        "  \"efficientnetv2-s-21k-ft1k\": 384,\n",
+        "  \"efficientnetv2-m-21k-ft1k\": 480,\n",
+        "  \"efficientnetv2-l-21k-ft1k\": 480,\n",
+        "  \"efficientnetv2-xl-21k-ft1k\": 512,\n",
+        "  \"efficientnetv2-b0-21k-ft1k\": 224,\n",
+        "  \"efficientnetv2-b1-21k-ft1k\": 240,\n",
+        "  \"efficientnetv2-b2-21k-ft1k\": 260,\n",
+        "  \"efficientnetv2-b3-21k-ft1k\": 300, \n",
+        "  \"efficientnet_b0\": 224,\n",
+        "  \"efficientnet_b1\": 240,\n",
+        "  \"efficientnet_b2\": 260,\n",
+        "  \"efficientnet_b3\": 300,\n",
+        "  \"efficientnet_b4\": 380,\n",
+        "  \"efficientnet_b5\": 456,\n",
+        "  \"efficientnet_b6\": 528,\n",
+        "  \"efficientnet_b7\": 600,\n",
+        "  \"inception_v3\": 299,\n",
+        "  \"inception_resnet_v2\": 299,\n",
+        "  \"nasnet_large\": 331,\n",
+        "  \"pnasnet_large\": 331,\n",
+        "}\n",
+        "\n",
+        "model_handle = model_handle_map.get(model_name)\n",
+        "pixels = model_image_size_map.get(model_name, 224)\n",
+        "\n",
+        "print(f\"Selected model: {model_name} : {model_handle}\")\n",
+        "\n",
+        "IMAGE_SIZE = (pixels, pixels)\n",
+        "print(f\"Input size {IMAGE_SIZE}\")\n",
+        "\n",
+        "BATCH_SIZE = 16#@param {type:\"integer\"}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yTY8qzyYv3vl"
+      },
+      "source": [
+        "## Set up the Flowers dataset\n",
+        "\n",
+        "Inputs are suitably resized for the selected module. Dataset augmentation (i.e., random distortions of an image each time it is read) improves training, esp. when fine-tuning."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "WBtFK1hO8KsO"
+      },
+      "outputs": [],
+      "source": [
+        "data_dir = tf.keras.utils.get_file(\n",
+        "    'flower_photos',\n",
+        "    'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz',\n",
+        "    untar=True)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "umB5tswsfTEQ"
+      },
+      "outputs": [],
+      "source": [
+        "def build_dataset(subset):\n",
+        "  return tf.keras.preprocessing.image_dataset_from_directory(\n",
+        "      data_dir,\n",
+        "      validation_split=.20,\n",
+        "      subset=subset,\n",
+        "      label_mode=\"categorical\",\n",
+        "      # Seed needs to provided when using validation_split and shuffle = True.\n",
+        "      # A fixed seed is used so that the validation set is stable across runs.\n",
+        "      seed=123,\n",
+        "      image_size=IMAGE_SIZE,\n",
+        "      batch_size=1)\n",
+        "\n",
+        "train_ds = build_dataset(\"training\")\n",
+        "class_names = tuple(train_ds.class_names)\n",
+        "train_size = train_ds.cardinality().numpy()\n",
+        "train_ds = train_ds.unbatch().batch(BATCH_SIZE)\n",
+        "train_ds = train_ds.repeat()\n",
+        "\n",
+        "normalization_layer = tf.keras.layers.Rescaling(1. / 255)\n",
+        "preprocessing_model = tf.keras.Sequential([normalization_layer])\n",
+        "do_data_augmentation = False #@param {type:\"boolean\"}\n",
+        "if do_data_augmentation:\n",
+        "  preprocessing_model.add(\n",
+        "      tf.keras.layers.RandomRotation(40))\n",
+        "  preprocessing_model.add(\n",
+        "      tf.keras.layers.RandomTranslation(0, 0.2))\n",
+        "  preprocessing_model.add(\n",
+        "      tf.keras.layers.RandomTranslation(0.2, 0))\n",
+        "  # Like the old tf.keras.preprocessing.image.ImageDataGenerator(),\n",
+        "  # image sizes are fixed when reading, and then a random zoom is applied.\n",
+        "  # If all training inputs are larger than image_size, one could also use\n",
+        "  # RandomCrop with a batch size of 1 and rebatch later.\n",
+        "  preprocessing_model.add(\n",
+        "      tf.keras.layers.RandomZoom(0.2, 0.2))\n",
+        "  preprocessing_model.add(\n",
+        "      tf.keras.layers.RandomFlip(mode=\"horizontal\"))\n",
+        "train_ds = train_ds.map(lambda images, labels:\n",
+        "                        (preprocessing_model(images), labels))\n",
+        "\n",
+        "val_ds = build_dataset(\"validation\")\n",
+        "valid_size = val_ds.cardinality().numpy()\n",
+        "val_ds = val_ds.unbatch().batch(BATCH_SIZE)\n",
+        "val_ds = val_ds.map(lambda images, labels:\n",
+        "                    (normalization_layer(images), labels))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "FS_gVStowW3G"
+      },
+      "source": [
+        "## Defining the model\n",
+        "\n",
+        "All it takes is to put a linear classifier on top of the `feature_extractor_layer` with the Hub module.\n",
+        "\n",
+        "For speed, we start out with a non-trainable `feature_extractor_layer`, but you can also enable fine-tuning for greater accuracy."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "RaJW3XrPyFiF"
+      },
+      "outputs": [],
+      "source": [
+        "do_fine_tuning = False #@param {type:\"boolean\"}"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "50FYNIb1dmJH"
+      },
+      "outputs": [],
+      "source": [
+        "print(\"Building model with\", model_handle)\n",
+        "model = tf.keras.Sequential([\n",
+        "    # Explicitly define the input shape so the model can be properly\n",
+        "    # loaded by the TFLiteConverter\n",
+        "    tf.keras.layers.InputLayer(input_shape=IMAGE_SIZE + (3,)),\n",
+        "    hub.KerasLayer(model_handle, trainable=do_fine_tuning),\n",
+        "    tf.keras.layers.Dropout(rate=0.2),\n",
+        "    tf.keras.layers.Dense(len(class_names),\n",
+        "                          kernel_regularizer=tf.keras.regularizers.l2(0.0001))\n",
+        "])\n",
+        "model.build((None,)+IMAGE_SIZE+(3,))\n",
+        "model.summary()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "u2e5WupIw2N2"
+      },
+      "source": [
+        "## Training the model"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9f3yBUvkd_VJ"
+      },
+      "outputs": [],
+      "source": [
+        "model.compile(\n",
+        "  optimizer=tf.keras.optimizers.SGD(learning_rate=0.005, momentum=0.9), \n",
+        "  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0.1),\n",
+        "  metrics=['accuracy'])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "w_YKX2Qnfg6x"
+      },
+      "outputs": [],
+      "source": [
+        "steps_per_epoch = train_size // BATCH_SIZE\n",
+        "validation_steps = valid_size // BATCH_SIZE\n",
+        "hist = model.fit(\n",
+        "    train_ds,\n",
+        "    epochs=5, steps_per_epoch=steps_per_epoch,\n",
+        "    validation_data=val_ds,\n",
+        "    validation_steps=validation_steps).history"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "CYOw0fTO1W4x"
+      },
+      "outputs": [],
+      "source": [
+        "plt.figure()\n",
+        "plt.ylabel(\"Loss (training and validation)\")\n",
+        "plt.xlabel(\"Training Steps\")\n",
+        "plt.ylim([0,2])\n",
+        "plt.plot(hist[\"loss\"])\n",
+        "plt.plot(hist[\"val_loss\"])\n",
+        "\n",
+        "plt.figure()\n",
+        "plt.ylabel(\"Accuracy (training and validation)\")\n",
+        "plt.xlabel(\"Training Steps\")\n",
+        "plt.ylim([0,1])\n",
+        "plt.plot(hist[\"accuracy\"])\n",
+        "plt.plot(hist[\"val_accuracy\"])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jZ8DKKgeKv4-"
+      },
+      "source": [
+        "Try out the model on an image from the validation data:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "oi1iCNB9K1Ai"
+      },
+      "outputs": [],
+      "source": [
+        "x, y = next(iter(val_ds))\n",
+        "image = x[0, :, :, :]\n",
+        "true_index = np.argmax(y[0])\n",
+        "plt.imshow(image)\n",
+        "plt.axis('off')\n",
+        "plt.show()\n",
+        "\n",
+        "# Expand the validation image to (1, 224, 224, 3) before predicting the label\n",
+        "prediction_scores = model.predict(np.expand_dims(image, axis=0))\n",
+        "predicted_index = np.argmax(prediction_scores)\n",
+        "print(\"True label: \" + class_names[true_index])\n",
+        "print(\"Predicted label: \" + class_names[predicted_index])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YCsAsQM1IRvA"
+      },
+      "source": [
+        "Finally, the trained model can be saved for deployment to TF Serving or TFLite (on mobile) as follows."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "LGvTi69oIc2d"
+      },
+      "outputs": [],
+      "source": [
+        "saved_model_path = f\"/tmp/saved_flowers_model_{model_name}\"\n",
+        "tf.saved_model.save(model, saved_model_path)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "QzW4oNRjILaq"
+      },
+      "source": [
+        "## Optional: Deployment to TensorFlow Lite\n",
+        "\n",
+        "[TensorFlow Lite](https://www.tensorflow.org/lite) lets you deploy TensorFlow models to mobile and IoT devices. The code below shows how to convert the trained model to TFLite and apply post-training tools from the [TensorFlow Model Optimization Toolkit](https://www.tensorflow.org/model_optimization). Finally, it runs it in the TFLite Interpreter to examine the resulting quality\n",
+        "\n",
+        "  * Converting without optimization provides the same results as before (up to roundoff error).\n",
+        "  * Converting with optimization without any data quantizes the model weights to 8 bits, but inference still uses floating-point computation for the neural network activations. This reduces model size almost by a factor of 4 and improves CPU latency on mobile devices.\n",
+        "  * On top, computation of the neural network activations can be quantized to 8-bit integers as well if a small reference dataset is provided to calibrate the quantization range. On a mobile device, this accelerates inference further and makes it possible to run on accelerators like Edge TPU."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Va1Vo92fSyV6"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Optimization settings\n",
+        "optimize_lite_model = False  #@param {type:\"boolean\"}\n",
+        "#@markdown Setting a value greater than zero enables quantization of neural network activations. A few dozen is already a useful amount.\n",
+        "num_calibration_examples = 60  #@param {type:\"slider\", min:0, max:1000, step:1}\n",
+        "representative_dataset = None\n",
+        "if optimize_lite_model and num_calibration_examples:\n",
+        "  # Use a bounded number of training examples without labels for calibration.\n",
+        "  # TFLiteConverter expects a list of input tensors, each with batch size 1.\n",
+        "  representative_dataset = lambda: itertools.islice(\n",
+        "      ([image[None, ...]] for batch, _ in train_ds for image in batch),\n",
+        "      num_calibration_examples)\n",
+        "\n",
+        "converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_path)\n",
+        "if optimize_lite_model:\n",
+        "  converter.optimizations = [tf.lite.Optimize.DEFAULT]\n",
+        "  if representative_dataset:  # This is optional, see above.\n",
+        "    converter.representative_dataset = representative_dataset\n",
+        "lite_model_content = converter.convert()\n",
+        "\n",
+        "with open(f\"/tmp/lite_flowers_model_{model_name}.tflite\", \"wb\") as f:\n",
+        "  f.write(lite_model_content)\n",
+        "print(\"Wrote %sTFLite model of %d bytes.\" %\n",
+        "      (\"optimized \" if optimize_lite_model else \"\", len(lite_model_content)))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "_wqEmD0xIqeG"
+      },
+      "outputs": [],
+      "source": [
+        "interpreter = tf.lite.Interpreter(model_content=lite_model_content)\n",
+        "# This little helper wraps the TFLite Interpreter as a numpy-to-numpy function.\n",
+        "def lite_model(images):\n",
+        "  interpreter.allocate_tensors()\n",
+        "  interpreter.set_tensor(interpreter.get_input_details()[0]['index'], images)\n",
+        "  interpreter.invoke()\n",
+        "  return interpreter.get_tensor(interpreter.get_output_details()[0]['index'])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "JMMK-fZrKrk8"
+      },
+      "outputs": [],
+      "source": [
+        "#@markdown For rapid experimentation, start with a moderate number of examples.\n",
+        "num_eval_examples = 50  #@param {type:\"slider\", min:0, max:700}\n",
+        "eval_dataset = ((image, label)  # TFLite expects batch size 1.\n",
+        "                for batch in train_ds\n",
+        "                for (image, label) in zip(*batch))\n",
+        "count = 0\n",
+        "count_lite_tf_agree = 0\n",
+        "count_lite_correct = 0\n",
+        "for image, label in eval_dataset:\n",
+        "  probs_lite = lite_model(image[None, ...])[0]\n",
+        "  probs_tf = model(image[None, ...]).numpy()[0]\n",
+        "  y_lite = np.argmax(probs_lite)\n",
+        "  y_tf = np.argmax(probs_tf)\n",
+        "  y_true = np.argmax(label)\n",
+        "  count +=1\n",
+        "  if y_lite == y_tf: count_lite_tf_agree += 1\n",
+        "  if y_lite == y_true: count_lite_correct += 1\n",
+        "  if count >= num_eval_examples: break\n",
+        "print(\"TFLite model agrees with original model on %d of %d examples (%g%%).\" %\n",
+        "      (count_lite_tf_agree, count, 100.0 * count_lite_tf_agree / count))\n",
+        "print(\"TFLite model is accurate on %d of %d examples (%g%%).\" %\n",
+        "      (count_lite_correct, count, 100.0 * count_lite_correct / count))"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [
+        "ScitaPqhKtuW"
+      ],
+      "name": "tf2_image_retraining.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/tf2_object_detection.ipynb b/site/en/hub/tutorials/tf2_object_detection.ipynb
new file mode 100644
index 00000000000..d06ad401824
--- /dev/null
+++ b/site/en/hub/tutorials/tf2_object_detection.ipynb
@@ -0,0 +1,616 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "98rds-2OU-Rd"
+      },
+      "source": [
+        "##### Copyright 2020 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "1c95xMGcU5_Z"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Copyright 2020 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "V1UUX8SUUiMO"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/tf2_object_detection\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_object_detection.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_object_detection.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/tf2_object_detection.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/tensorflow/collections/object_detection/1\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub models</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rOvvWAVTkMR7"
+      },
+      "source": [
+        "# TensorFlow Hub Object Detection Colab\n",
+        "\n",
+        "Welcome to the TensorFlow Hub Object Detection Colab! This notebook will take you through the steps of running an \"out-of-the-box\" object detection model on images."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "IRImnk_7WOq1"
+      },
+      "source": [
+        "### More models\n",
+        "[This](https://tfhub.dev/tensorflow/collections/object_detection/1) collection contains TF2 object detection models that have been trained on the COCO 2017 dataset. [Here](https://tfhub.dev/s?module-type=image-object-detection) you can find all object detection models that are currently hosted on [tfhub.dev](https://tfhub.dev/)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vPs64QA1Zdov"
+      },
+      "source": [
+        "## Imports and Setup\n",
+        "\n",
+        "Let's start with the base imports."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Xk4FU-jx9kc3"
+      },
+      "outputs": [],
+      "source": [
+        "# This Colab requires a recent numpy version.\n",
+        "!pip install numpy==1.24.3\n",
+        "!pip install protobuf==3.20.3\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "yn5_uV1HLvaz"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "import pathlib\n",
+        "\n",
+        "import matplotlib\n",
+        "import matplotlib.pyplot as plt\n",
+        "\n",
+        "import io\n",
+        "import scipy.misc\n",
+        "import numpy as np\n",
+        "from six import BytesIO\n",
+        "from PIL import Image, ImageDraw, ImageFont\n",
+        "from six.moves.urllib.request import urlopen\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "import tensorflow_hub as hub\n",
+        "\n",
+        "tf.get_logger().setLevel('ERROR')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "IogyryF2lFBL"
+      },
+      "source": [
+        "## Utilities\n",
+        "\n",
+        "Run the following cell to create some utils that will be needed later:\n",
+        "\n",
+        "- Helper method to load an image\n",
+        "- Map of Model Name to TF Hub handle\n",
+        "- List of tuples with Human Keypoints for the COCO 2017 dataset. This is needed for models with keypoints."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "-y9R0Xllefec"
+      },
+      "outputs": [],
+      "source": [
+        "# @title Run this!!\n",
+        "\n",
+        "def load_image_into_numpy_array(path):\n",
+        "  \"\"\"Load an image from file into a numpy array.\n",
+        "\n",
+        "  Puts image into numpy array to feed into tensorflow graph.\n",
+        "  Note that by convention we put it into a numpy array with shape\n",
+        "  (height, width, channels), where channels=3 for RGB.\n",
+        "\n",
+        "  Args:\n",
+        "    path: the file path to the image\n",
+        "\n",
+        "  Returns:\n",
+        "    uint8 numpy array with shape (img_height, img_width, 3)\n",
+        "  \"\"\"\n",
+        "  image = None\n",
+        "  if(path.startswith('http')):\n",
+        "    response = urlopen(path)\n",
+        "    image_data = response.read()\n",
+        "    image_data = BytesIO(image_data)\n",
+        "    image = Image.open(image_data)\n",
+        "  else:\n",
+        "    image_data = tf.io.gfile.GFile(path, 'rb').read()\n",
+        "    image = Image.open(BytesIO(image_data))\n",
+        "\n",
+        "  (im_width, im_height) = image.size\n",
+        "  return np.array(image.getdata()).reshape(\n",
+        "      (1, im_height, im_width, 3)).astype(np.uint8)\n",
+        "\n",
+        "\n",
+        "ALL_MODELS = {\n",
+        "'CenterNet HourGlass104 512x512' : 'https://tfhub.dev/tensorflow/centernet/hourglass_512x512/1',\n",
+        "'CenterNet HourGlass104 Keypoints 512x512' : 'https://tfhub.dev/tensorflow/centernet/hourglass_512x512_kpts/1',\n",
+        "'CenterNet HourGlass104 1024x1024' : 'https://tfhub.dev/tensorflow/centernet/hourglass_1024x1024/1',\n",
+        "'CenterNet HourGlass104 Keypoints 1024x1024' : 'https://tfhub.dev/tensorflow/centernet/hourglass_1024x1024_kpts/1',\n",
+        "'CenterNet Resnet50 V1 FPN 512x512' : 'https://tfhub.dev/tensorflow/centernet/resnet50v1_fpn_512x512/1',\n",
+        "'CenterNet Resnet50 V1 FPN Keypoints 512x512' : 'https://tfhub.dev/tensorflow/centernet/resnet50v1_fpn_512x512_kpts/1',\n",
+        "'CenterNet Resnet101 V1 FPN 512x512' : 'https://tfhub.dev/tensorflow/centernet/resnet101v1_fpn_512x512/1',\n",
+        "'CenterNet Resnet50 V2 512x512' : 'https://tfhub.dev/tensorflow/centernet/resnet50v2_512x512/1',\n",
+        "'CenterNet Resnet50 V2 Keypoints 512x512' : 'https://tfhub.dev/tensorflow/centernet/resnet50v2_512x512_kpts/1',\n",
+        "'EfficientDet D0 512x512' : 'https://tfhub.dev/tensorflow/efficientdet/d0/1',\n",
+        "'EfficientDet D1 640x640' : 'https://tfhub.dev/tensorflow/efficientdet/d1/1',\n",
+        "'EfficientDet D2 768x768' : 'https://tfhub.dev/tensorflow/efficientdet/d2/1',\n",
+        "'EfficientDet D3 896x896' : 'https://tfhub.dev/tensorflow/efficientdet/d3/1',\n",
+        "'EfficientDet D4 1024x1024' : 'https://tfhub.dev/tensorflow/efficientdet/d4/1',\n",
+        "'EfficientDet D5 1280x1280' : 'https://tfhub.dev/tensorflow/efficientdet/d5/1',\n",
+        "'EfficientDet D6 1280x1280' : 'https://tfhub.dev/tensorflow/efficientdet/d6/1',\n",
+        "'EfficientDet D7 1536x1536' : 'https://tfhub.dev/tensorflow/efficientdet/d7/1',\n",
+        "'SSD MobileNet v2 320x320' : 'https://tfhub.dev/tensorflow/ssd_mobilenet_v2/2',\n",
+        "'SSD MobileNet V1 FPN 640x640' : 'https://tfhub.dev/tensorflow/ssd_mobilenet_v1/fpn_640x640/1',\n",
+        "'SSD MobileNet V2 FPNLite 320x320' : 'https://tfhub.dev/tensorflow/ssd_mobilenet_v2/fpnlite_320x320/1',\n",
+        "'SSD MobileNet V2 FPNLite 640x640' : 'https://tfhub.dev/tensorflow/ssd_mobilenet_v2/fpnlite_640x640/1',\n",
+        "'SSD ResNet50 V1 FPN 640x640 (RetinaNet50)' : 'https://tfhub.dev/tensorflow/retinanet/resnet50_v1_fpn_640x640/1',\n",
+        "'SSD ResNet50 V1 FPN 1024x1024 (RetinaNet50)' : 'https://tfhub.dev/tensorflow/retinanet/resnet50_v1_fpn_1024x1024/1',\n",
+        "'SSD ResNet101 V1 FPN 640x640 (RetinaNet101)' : 'https://tfhub.dev/tensorflow/retinanet/resnet101_v1_fpn_640x640/1',\n",
+        "'SSD ResNet101 V1 FPN 1024x1024 (RetinaNet101)' : 'https://tfhub.dev/tensorflow/retinanet/resnet101_v1_fpn_1024x1024/1',\n",
+        "'SSD ResNet152 V1 FPN 640x640 (RetinaNet152)' : 'https://tfhub.dev/tensorflow/retinanet/resnet152_v1_fpn_640x640/1',\n",
+        "'SSD ResNet152 V1 FPN 1024x1024 (RetinaNet152)' : 'https://tfhub.dev/tensorflow/retinanet/resnet152_v1_fpn_1024x1024/1',\n",
+        "'Faster R-CNN ResNet50 V1 640x640' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet50_v1_640x640/1',\n",
+        "'Faster R-CNN ResNet50 V1 1024x1024' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet50_v1_1024x1024/1',\n",
+        "'Faster R-CNN ResNet50 V1 800x1333' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet50_v1_800x1333/1',\n",
+        "'Faster R-CNN ResNet101 V1 640x640' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet101_v1_640x640/1',\n",
+        "'Faster R-CNN ResNet101 V1 1024x1024' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet101_v1_1024x1024/1',\n",
+        "'Faster R-CNN ResNet101 V1 800x1333' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet101_v1_800x1333/1',\n",
+        "'Faster R-CNN ResNet152 V1 640x640' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet152_v1_640x640/1',\n",
+        "'Faster R-CNN ResNet152 V1 1024x1024' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet152_v1_1024x1024/1',\n",
+        "'Faster R-CNN ResNet152 V1 800x1333' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet152_v1_800x1333/1',\n",
+        "'Faster R-CNN Inception ResNet V2 640x640' : 'https://tfhub.dev/tensorflow/faster_rcnn/inception_resnet_v2_640x640/1',\n",
+        "'Faster R-CNN Inception ResNet V2 1024x1024' : 'https://tfhub.dev/tensorflow/faster_rcnn/inception_resnet_v2_1024x1024/1',\n",
+        "'Mask R-CNN Inception ResNet V2 1024x1024' : 'https://tfhub.dev/tensorflow/mask_rcnn/inception_resnet_v2_1024x1024/1'\n",
+        "}\n",
+        "\n",
+        "IMAGES_FOR_TEST = {\n",
+        "  'Beach' : 'models/research/object_detection/test_images/image2.jpg',\n",
+        "  'Dogs' : 'models/research/object_detection/test_images/image1.jpg',\n",
+        "  # By Heiko Gorski, Source: https://commons.wikimedia.org/wiki/File:Naxos_Taverna.jpg\n",
+        "  'Naxos Taverna' : 'https://upload.wikimedia.org/wikipedia/commons/6/60/Naxos_Taverna.jpg',\n",
+        "  # Source: https://commons.wikimedia.org/wiki/File:The_Coleoptera_of_the_British_islands_(Plate_125)_(8592917784).jpg\n",
+        "  'Beatles' : 'https://upload.wikimedia.org/wikipedia/commons/1/1b/The_Coleoptera_of_the_British_islands_%28Plate_125%29_%288592917784%29.jpg',\n",
+        "  # By Américo Toledano, Source: https://commons.wikimedia.org/wiki/File:Biblioteca_Maim%C3%B3nides,_Campus_Universitario_de_Rabanales_007.jpg\n",
+        "  'Phones' : 'https://upload.wikimedia.org/wikipedia/commons/thumb/0/0d/Biblioteca_Maim%C3%B3nides%2C_Campus_Universitario_de_Rabanales_007.jpg/1024px-Biblioteca_Maim%C3%B3nides%2C_Campus_Universitario_de_Rabanales_007.jpg',\n",
+        "  # Source: https://commons.wikimedia.org/wiki/File:The_smaller_British_birds_(8053836633).jpg\n",
+        "  'Birds' : 'https://upload.wikimedia.org/wikipedia/commons/0/09/The_smaller_British_birds_%288053836633%29.jpg',\n",
+        "}\n",
+        "\n",
+        "COCO17_HUMAN_POSE_KEYPOINTS = [(0, 1),\n",
+        " (0, 2),\n",
+        " (1, 3),\n",
+        " (2, 4),\n",
+        " (0, 5),\n",
+        " (0, 6),\n",
+        " (5, 7),\n",
+        " (7, 9),\n",
+        " (6, 8),\n",
+        " (8, 10),\n",
+        " (5, 6),\n",
+        " (5, 11),\n",
+        " (6, 12),\n",
+        " (11, 12),\n",
+        " (11, 13),\n",
+        " (13, 15),\n",
+        " (12, 14),\n",
+        " (14, 16)]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "14bNk1gzh0TN"
+      },
+      "source": [
+        "## Visualization tools\n",
+        "\n",
+        "To visualize the images with the proper detected boxes, keypoints and segmentation, we will use the TensorFlow Object Detection API. To install it we will clone the repo."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "oi28cqGGFWnY"
+      },
+      "outputs": [],
+      "source": [
+        "# Clone the tensorflow models repository\n",
+        "!git clone --depth 1 https://github.com/tensorflow/models"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yX3pb_pXDjYA"
+      },
+      "source": [
+        "Installing the Object Detection API"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "NwdsBdGhFanc"
+      },
+      "outputs": [],
+      "source": [
+        "%%bash\n",
+        "sudo apt install -y protobuf-compiler\n",
+        "cd models/research/\n",
+        "protoc object_detection/protos/*.proto --python_out=.\n",
+        "cp object_detection/packages/tf2/setup.py .\n",
+        "python -m pip install .\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3yDNgIx-kV7X"
+      },
+      "source": [
+        "Now we can import the dependencies we will need later"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2JCeQU3fkayh"
+      },
+      "outputs": [],
+      "source": [
+        "from object_detection.utils import label_map_util\n",
+        "from object_detection.utils import visualization_utils as viz_utils\n",
+        "from object_detection.utils import ops as utils_ops\n",
+        "\n",
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "NKtD0IeclbL5"
+      },
+      "source": [
+        "### Load label map data (for plotting).\n",
+        "\n",
+        "Label maps correspond index numbers to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`.  Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine.\n",
+        "\n",
+        "We are going, for simplicity, to load from the repository that we loaded the Object Detection API code"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "5mucYUS6exUJ"
+      },
+      "outputs": [],
+      "source": [
+        "PATH_TO_LABELS = './models/research/object_detection/data/mscoco_label_map.pbtxt'\n",
+        "category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6917xnUSlp9x"
+      },
+      "source": [
+        "## Build a detection model and load pre-trained model weights\n",
+        "\n",
+        "Here we will choose which Object Detection model we will use.\n",
+        "Select the architecture and it will be loaded automatically.\n",
+        "If you want to change the model to try other architectures later, just change the next cell and execute following ones.\n",
+        "\n",
+        "**Tip:** if you want to read more details about the selected model, you can follow the link (model handle) and read additional documentation on TF Hub. After you select a model, we will print the handle to make it easier."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "HtwrSqvakTNn"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Model Selection { display-mode: \"form\", run: \"auto\" }\n",
+        "model_display_name = 'CenterNet HourGlass104 Keypoints 512x512' # @param ['CenterNet HourGlass104 512x512','CenterNet HourGlass104 Keypoints 512x512','CenterNet HourGlass104 1024x1024','CenterNet HourGlass104 Keypoints 1024x1024','CenterNet Resnet50 V1 FPN 512x512','CenterNet Resnet50 V1 FPN Keypoints 512x512','CenterNet Resnet101 V1 FPN 512x512','CenterNet Resnet50 V2 512x512','CenterNet Resnet50 V2 Keypoints 512x512','EfficientDet D0 512x512','EfficientDet D1 640x640','EfficientDet D2 768x768','EfficientDet D3 896x896','EfficientDet D4 1024x1024','EfficientDet D5 1280x1280','EfficientDet D6 1280x1280','EfficientDet D7 1536x1536','SSD MobileNet v2 320x320','SSD MobileNet V1 FPN 640x640','SSD MobileNet V2 FPNLite 320x320','SSD MobileNet V2 FPNLite 640x640','SSD ResNet50 V1 FPN 640x640 (RetinaNet50)','SSD ResNet50 V1 FPN 1024x1024 (RetinaNet50)','SSD ResNet101 V1 FPN 640x640 (RetinaNet101)','SSD ResNet101 V1 FPN 1024x1024 (RetinaNet101)','SSD ResNet152 V1 FPN 640x640 (RetinaNet152)','SSD ResNet152 V1 FPN 1024x1024 (RetinaNet152)','Faster R-CNN ResNet50 V1 640x640','Faster R-CNN ResNet50 V1 1024x1024','Faster R-CNN ResNet50 V1 800x1333','Faster R-CNN ResNet101 V1 640x640','Faster R-CNN ResNet101 V1 1024x1024','Faster R-CNN ResNet101 V1 800x1333','Faster R-CNN ResNet152 V1 640x640','Faster R-CNN ResNet152 V1 1024x1024','Faster R-CNN ResNet152 V1 800x1333','Faster R-CNN Inception ResNet V2 640x640','Faster R-CNN Inception ResNet V2 1024x1024','Mask R-CNN Inception ResNet V2 1024x1024']\n",
+        "model_handle = ALL_MODELS[model_display_name]\n",
+        "\n",
+        "print('Selected model:'+ model_display_name)\n",
+        "print('Model Handle at TensorFlow Hub: {}'.format(model_handle))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "muhUt-wWL582"
+      },
+      "source": [
+        "## Loading the selected model from TensorFlow Hub\n",
+        "\n",
+        "Here we just need the model handle that was selected and use the Tensorflow Hub library to load it to memory.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "rBuD07fLlcEO"
+      },
+      "outputs": [],
+      "source": [
+        "print('loading model...')\n",
+        "hub_model = hub.load(model_handle)\n",
+        "print('model loaded!')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GIawRDKPPnd4"
+      },
+      "source": [
+        "## Loading an image\n",
+        "\n",
+        "Let's try the model on a simple image. To help with this, we provide a list of test images.\n",
+        "\n",
+        "Here are some simple things to try out if you are curious:\n",
+        "* Try running inference on your own images, just upload them to colab and load the same way it's done in the cell below.\n",
+        "* Modify some of the input images and see if detection still works.  Some simple things to try out here include flipping the image horizontally, or converting to grayscale (note that we still expect the input image to have 3 channels).\n",
+        "\n",
+        "**Be careful:** when using images with an alpha channel, the model expect 3 channels images and the alpha will count as a 4th.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "hX-AWUQ1wIEr"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Image Selection (don't forget to execute the cell!) { display-mode: \"form\"}\n",
+        "selected_image = 'Beach' # @param ['Beach', 'Dogs', 'Naxos Taverna', 'Beatles', 'Phones', 'Birds']\n",
+        "flip_image_horizontally = False #@param {type:\"boolean\"}\n",
+        "convert_image_to_grayscale = False #@param {type:\"boolean\"}\n",
+        "\n",
+        "image_path = IMAGES_FOR_TEST[selected_image]\n",
+        "image_np = load_image_into_numpy_array(image_path)\n",
+        "\n",
+        "# Flip horizontally\n",
+        "if(flip_image_horizontally):\n",
+        "  image_np[0] = np.fliplr(image_np[0]).copy()\n",
+        "\n",
+        "# Convert image to grayscale\n",
+        "if(convert_image_to_grayscale):\n",
+        "  image_np[0] = np.tile(\n",
+        "    np.mean(image_np[0], 2, keepdims=True), (1, 1, 3)).astype(np.uint8)\n",
+        "\n",
+        "plt.figure(figsize=(24,32))\n",
+        "plt.imshow(image_np[0])\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "FTHsFjR6HNwb"
+      },
+      "source": [
+        "## Doing the inference\n",
+        "\n",
+        "To do the inference we just need to call our TF Hub loaded model.\n",
+        "\n",
+        "Things you can try:\n",
+        "* Print out `result['detection_boxes']` and try to match the box locations to the boxes in the image.  Notice that coordinates are given in normalized form (i.e., in the interval [0, 1]).\n",
+        "* inspect other output keys present in the result. A full documentation can be seen on the models documentation page (pointing your browser to the model handle printed earlier)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Gb_siXKcnnGC"
+      },
+      "outputs": [],
+      "source": [
+        "# running inference\n",
+        "results = hub_model(image_np)\n",
+        "\n",
+        "# different object detection models have additional results\n",
+        "# all of them are explained in the documentation\n",
+        "result = {key:value.numpy() for key,value in results.items()}\n",
+        "print(result.keys())"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "IZ5VYaBoeeFM"
+      },
+      "source": [
+        "## Visualizing the results\n",
+        "\n",
+        "Here is where we will need the TensorFlow Object Detection API to show the squares from the inference step (and the keypoints when available).\n",
+        "\n",
+        "the full documentation of this method can be seen [here](https://github.com/tensorflow/models/blob/master/research/object_detection/utils/visualization_utils.py)\n",
+        "\n",
+        "Here you can, for example, set `min_score_thresh` to other values (between 0 and 1) to allow more detections in or to filter out more detections."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2O7rV8g9s8Bz"
+      },
+      "outputs": [],
+      "source": [
+        "label_id_offset = 0\n",
+        "image_np_with_detections = image_np.copy()\n",
+        "\n",
+        "# Use keypoints if available in detections\n",
+        "keypoints, keypoint_scores = None, None\n",
+        "if 'detection_keypoints' in result:\n",
+        "  keypoints = result['detection_keypoints'][0]\n",
+        "  keypoint_scores = result['detection_keypoint_scores'][0]\n",
+        "\n",
+        "viz_utils.visualize_boxes_and_labels_on_image_array(\n",
+        "      image_np_with_detections[0],\n",
+        "      result['detection_boxes'][0],\n",
+        "      (result['detection_classes'][0] + label_id_offset).astype(int),\n",
+        "      result['detection_scores'][0],\n",
+        "      category_index,\n",
+        "      use_normalized_coordinates=True,\n",
+        "      max_boxes_to_draw=200,\n",
+        "      min_score_thresh=.30,\n",
+        "      agnostic_mode=False,\n",
+        "      keypoints=keypoints,\n",
+        "      keypoint_scores=keypoint_scores,\n",
+        "      keypoint_edges=COCO17_HUMAN_POSE_KEYPOINTS)\n",
+        "\n",
+        "plt.figure(figsize=(24,32))\n",
+        "plt.imshow(image_np_with_detections[0])\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Qaw6Xi08NpEP"
+      },
+      "source": [
+        "## [Optional]\n",
+        "\n",
+        "Among the available object detection models there's Mask R-CNN and the output of this model allows instance segmentation.\n",
+        "\n",
+        "To visualize it we will use the same method we did before but adding an additional parameter: `instance_masks=output_dict.get('detection_masks_reframed', None)`\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "zl3qdtR1OvM_"
+      },
+      "outputs": [],
+      "source": [
+        "# Handle models with masks:\n",
+        "image_np_with_mask = image_np.copy()\n",
+        "\n",
+        "if 'detection_masks' in result:\n",
+        "  # we need to convert np.arrays to tensors\n",
+        "  detection_masks = tf.convert_to_tensor(result['detection_masks'][0])\n",
+        "  detection_boxes = tf.convert_to_tensor(result['detection_boxes'][0])\n",
+        "\n",
+        "  # Reframe the bbox mask to the image size.\n",
+        "  detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(\n",
+        "            detection_masks, detection_boxes,\n",
+        "              image_np.shape[1], image_np.shape[2])\n",
+        "  detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5,\n",
+        "                                      tf.uint8)\n",
+        "  result['detection_masks_reframed'] = detection_masks_reframed.numpy()\n",
+        "\n",
+        "viz_utils.visualize_boxes_and_labels_on_image_array(\n",
+        "      image_np_with_mask[0],\n",
+        "      result['detection_boxes'][0],\n",
+        "      (result['detection_classes'][0] + label_id_offset).astype(int),\n",
+        "      result['detection_scores'][0],\n",
+        "      category_index,\n",
+        "      use_normalized_coordinates=True,\n",
+        "      max_boxes_to_draw=200,\n",
+        "      min_score_thresh=.30,\n",
+        "      agnostic_mode=False,\n",
+        "      instance_masks=result.get('detection_masks_reframed', None),\n",
+        "      line_thickness=8)\n",
+        "\n",
+        "plt.figure(figsize=(24,32))\n",
+        "plt.imshow(image_np_with_mask[0])\n",
+        "plt.show()"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "tf2_object_detection.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/tf2_semantic_approximate_nearest_neighbors.ipynb b/site/en/hub/tutorials/tf2_semantic_approximate_nearest_neighbors.ipynb
new file mode 100644
index 00000000000..786065ff5a5
--- /dev/null
+++ b/site/en/hub/tutorials/tf2_semantic_approximate_nearest_neighbors.ipynb
@@ -0,0 +1,790 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ACbjNjyO4f_8"
+      },
+      "source": [
+        "##### Copyright 2019 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "MCM50vaM4jiK"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9qOVy-_vmuUP"
+      },
+      "source": [
+        "# Semantic Search with Approximate Nearest Neighbors and Text Embeddings\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfBg1C5NB3X0"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/tf2_semantic_approximate_nearest_neighbors\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_semantic_approximate_nearest_neighbors.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_semantic_approximate_nearest_neighbors.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/tf2_semantic_approximate_nearest_neighbors.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/google/nnlm-en-dim128/2\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub model</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3T4d77AJaKte"
+      },
+      "source": [
+        "This tutorial illustrates how to generate embeddings from a [TensorFlow Hub](https://tfhub.dev) (TF-Hub) model given input data, and build an approximate nearest neighbours (ANN) index using the extracted embeddings. The index can then be used for real-time similarity matching and retrieval.\n",
+        "\n",
+        "When dealing with a large corpus of data, it's not efficient to perform exact matching by scanning the whole repository to find the most similar items to a given query in real-time. Thus, we use an approximate similarity matching algorithm which allows us to trade off a little bit of accuracy in finding exact nearest neighbor matches for a significant boost in speed.\n",
+        "\n",
+        "In this tutorial, we show an example of real-time text search over a corpus of news headlines to find the headlines that are most similar to a query. Unlike keyword search, this captures the semantic similarity encoded in the text embedding.\n",
+        "\n",
+        "The steps of this tutorial are:\n",
+        "1. Download sample data.\n",
+        "2. Generate embeddings for the data using a TF-Hub model\n",
+        "3. Build an ANN index for the embeddings\n",
+        "4. Use the index for similarity matching\n",
+        "\n",
+        "We use [Apache Beam](https://beam.apache.org/documentation/programming-guide/) to generate the embeddings from the TF-Hub model. We also use Spotify's [ANNOY](https://github.com/spotify/annoy) library to build the approximate nearest neighbor index."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nM17v_mEVSnd"
+      },
+      "source": [
+        "### More models\n",
+        "For models that have the same architecture but were trained on a different language, refer to [this](https://tfhub.dev/google/collections/nnlm/1) collection. [Here](https://tfhub.dev/s?module-type=text-embedding) you can find all text embeddings that are currently hosted on [tfhub.dev](https://tfhub.dev/). "
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Q0jr0QK9qO5P"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "whMRj9qeqed4"
+      },
+      "source": [
+        "Install the required libraries."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "qmXkLPoaqS--"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install apache_beam\n",
+        "!pip install 'scikit_learn~=0.23.0'  # For gaussian_random_matrix.\n",
+        "!pip install annoy"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "A-vBZiCCqld0"
+      },
+      "source": [
+        "Import the required libraries"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "6NTYbdWcseuK"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "import sys\n",
+        "import pickle\n",
+        "from collections import namedtuple\n",
+        "from datetime import datetime\n",
+        "import numpy as np\n",
+        "import apache_beam as beam\n",
+        "from apache_beam.transforms import util\n",
+        "import tensorflow as tf\n",
+        "import tensorflow_hub as hub\n",
+        "import annoy\n",
+        "from sklearn.random_projection import gaussian_random_matrix"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "tx0SZa6-7b-f"
+      },
+      "outputs": [],
+      "source": [
+        "print('TF version: {}'.format(tf.__version__))\n",
+        "print('TF-Hub version: {}'.format(hub.__version__))\n",
+        "print('Apache Beam version: {}'.format(beam.__version__))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "P6Imq876rLWx"
+      },
+      "source": [
+        "## 1. Download Sample Data\n",
+        "\n",
+        "[A Million News Headlines](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/SYBGZL#) dataset contains news headlines published over a period of 15 years sourced from the reputable Australian Broadcasting Corp. (ABC). This news dataset has a summarised historical record of noteworthy events in the globe from early-2003 to end-2017 with a more granular focus on Australia. \n",
+        "\n",
+        "**Format**: Tab-separated two-column data: 1) publication date and 2) headline text. We are only interested in the headline text.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "OpF57n8e5C9D"
+      },
+      "outputs": [],
+      "source": [
+        "!wget 'https://dataverse.harvard.edu/api/access/datafile/3450625?format=tab&gbrecs=true' -O raw.tsv\n",
+        "!wc -l raw.tsv\n",
+        "!head raw.tsv"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Reeoc9z0zTxJ"
+      },
+      "source": [
+        "For simplicity, we only keep the headline text and remove the publication date"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "INPWa4upv_yJ"
+      },
+      "outputs": [],
+      "source": [
+        "!rm -r corpus\n",
+        "!mkdir corpus\n",
+        "\n",
+        "with open('corpus/text.txt', 'w') as out_file:\n",
+        "  with open('raw.tsv', 'r') as in_file:\n",
+        "    for line in in_file:\n",
+        "      headline = line.split('\\t')[1].strip().strip('\"')\n",
+        "      out_file.write(headline+\"\\n\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "5-oedX40z6o2"
+      },
+      "outputs": [],
+      "source": [
+        "!tail corpus/text.txt"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2AngMtH50jNb"
+      },
+      "source": [
+        "## 2. Generate Embeddings for the Data.\n",
+        "\n",
+        "In this tutorial, we use the [Neural Network Language Model (NNLM)](https://tfhub.dev/google/nnlm-en-dim128/2) to generate embeddings for the headline data. The sentence embeddings can then be easily used to compute sentence level meaning similarity. We run the embedding generation process using Apache Beam."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "F_DvXnDB1pEX"
+      },
+      "source": [
+        "### Embedding extraction method"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "yL7OEY1E0A35"
+      },
+      "outputs": [],
+      "source": [
+        "embed_fn = None\n",
+        "\n",
+        "def generate_embeddings(text, model_url, random_projection_matrix=None):\n",
+        "  # Beam will run this function in different processes that need to\n",
+        "  # import hub and load embed_fn (if not previously loaded)\n",
+        "  global embed_fn\n",
+        "  if embed_fn is None:\n",
+        "    embed_fn = hub.load(model_url)\n",
+        "  embedding = embed_fn(text).numpy()\n",
+        "  if random_projection_matrix is not None:\n",
+        "    embedding = embedding.dot(random_projection_matrix)\n",
+        "  return text, embedding\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "g6pXBVxsVUbm"
+      },
+      "source": [
+        "### Convert to tf.Example method"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "JMjqjWZNVVzd"
+      },
+      "outputs": [],
+      "source": [
+        "def to_tf_example(entries):\n",
+        "  examples = []\n",
+        "\n",
+        "  text_list, embedding_list = entries\n",
+        "  for i in range(len(text_list)):\n",
+        "    text = text_list[i]\n",
+        "    embedding = embedding_list[i]\n",
+        "\n",
+        "    features = {\n",
+        "        'text': tf.train.Feature(\n",
+        "            bytes_list=tf.train.BytesList(value=[text.encode('utf-8')])),\n",
+        "        'embedding': tf.train.Feature(\n",
+        "            float_list=tf.train.FloatList(value=embedding.tolist()))\n",
+        "    }\n",
+        "  \n",
+        "    example = tf.train.Example(\n",
+        "        features=tf.train.Features(\n",
+        "            feature=features)).SerializeToString(deterministic=True)\n",
+        "  \n",
+        "    examples.append(example)\n",
+        "  \n",
+        "  return examples"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gDiV4uQCVYGH"
+      },
+      "source": [
+        "### Beam pipeline"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "jCGUIB172m2G"
+      },
+      "outputs": [],
+      "source": [
+        "def run_hub2emb(args):\n",
+        "  '''Runs the embedding generation pipeline'''\n",
+        "\n",
+        "  options = beam.options.pipeline_options.PipelineOptions(**args)\n",
+        "  args = namedtuple(\"options\", args.keys())(*args.values())\n",
+        "\n",
+        "  with beam.Pipeline(args.runner, options=options) as pipeline:\n",
+        "    (\n",
+        "        pipeline\n",
+        "        | 'Read sentences from files' >> beam.io.ReadFromText(\n",
+        "            file_pattern=args.data_dir)\n",
+        "        | 'Batch elements' >> util.BatchElements(\n",
+        "            min_batch_size=args.batch_size, max_batch_size=args.batch_size)\n",
+        "        | 'Generate embeddings' >> beam.Map(\n",
+        "            generate_embeddings, args.model_url, args.random_projection_matrix)\n",
+        "        | 'Encode to tf example' >> beam.FlatMap(to_tf_example)\n",
+        "        | 'Write to TFRecords files' >> beam.io.WriteToTFRecord(\n",
+        "            file_path_prefix='{}/emb'.format(args.output_dir),\n",
+        "            file_name_suffix='.tfrecords')\n",
+        "    )"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nlbQdiYNVvne"
+      },
+      "source": [
+        "### Generating Random Projection Weight Matrix\n",
+        "\n",
+        "[Random projection](https://en.wikipedia.org/wiki/Random_projection) is a simple, yet powerful technique used to reduce the dimensionality of a set of points which lie in Euclidean space. For a theoretical background, see the [Johnson-Lindenstrauss lemma](https://en.wikipedia.org/wiki/Johnson%E2%80%93Lindenstrauss_lemma).\n",
+        "\n",
+        "Reducing the dimensionality of the embeddings with random projection means less time needed to build and query the ANN index.\n",
+        "\n",
+        "In this tutorial we use [Gaussian Random Projection](https://en.wikipedia.org/wiki/Random_projection#Gaussian_random_projection) from the [Scikit-learn](https://scikit-learn.org/stable/modules/random_projection.html#gaussian-random-projection) library."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "1yw1xgtNVv52"
+      },
+      "outputs": [],
+      "source": [
+        "def generate_random_projection_weights(original_dim, projected_dim):\n",
+        "  random_projection_matrix = None\n",
+        "  random_projection_matrix = gaussian_random_matrix(\n",
+        "      n_components=projected_dim, n_features=original_dim).T\n",
+        "  print(\"A Gaussian random weight matrix was creates with shape of {}\".format(random_projection_matrix.shape))\n",
+        "  print('Storing random projection matrix to disk...')\n",
+        "  with open('random_projection_matrix', 'wb') as handle:\n",
+        "    pickle.dump(random_projection_matrix, \n",
+        "                handle, protocol=pickle.HIGHEST_PROTOCOL)\n",
+        "        \n",
+        "  return random_projection_matrix"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "aJZUfT3NE7kj"
+      },
+      "source": [
+        "### Set parameters\n",
+        "If you want to build an index using the original embedding space without random projection, set the `projected_dim` parameter to `None`. Note that this will slow down the indexing step for high-dimensional embeddings."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "77-Cow7uE74T"
+      },
+      "outputs": [],
+      "source": [
+        "model_url = 'https://tfhub.dev/google/nnlm-en-dim128/2' #@param {type:\"string\"}\n",
+        "projected_dim = 64  #@param {type:\"number\"}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "On-MbzD922kb"
+      },
+      "source": [
+        "### Run pipeline"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Y3I1Wv4i21yY"
+      },
+      "outputs": [],
+      "source": [
+        "import tempfile\n",
+        "\n",
+        "output_dir = tempfile.mkdtemp()\n",
+        "original_dim = hub.load(model_url)(['']).shape[1]\n",
+        "random_projection_matrix = None\n",
+        "\n",
+        "if projected_dim:\n",
+        "  random_projection_matrix = generate_random_projection_weights(\n",
+        "      original_dim, projected_dim)\n",
+        "\n",
+        "args = {\n",
+        "    'job_name': 'hub2emb-{}'.format(datetime.utcnow().strftime('%y%m%d-%H%M%S')),\n",
+        "    'runner': 'DirectRunner',\n",
+        "    'batch_size': 1024,\n",
+        "    'data_dir': 'corpus/*.txt',\n",
+        "    'output_dir': output_dir,\n",
+        "    'model_url': model_url,\n",
+        "    'random_projection_matrix': random_projection_matrix,\n",
+        "}\n",
+        "\n",
+        "print(\"Pipeline args are set.\")\n",
+        "args"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "iS9obmeP4ZOA"
+      },
+      "outputs": [],
+      "source": [
+        "print(\"Running pipeline...\")\n",
+        "%time run_hub2emb(args)\n",
+        "print(\"Pipeline is done.\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "JAwOo7gQWvVd"
+      },
+      "outputs": [],
+      "source": [
+        "!ls {output_dir}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "HVnee4e6U90u"
+      },
+      "source": [
+        "Read some of the generated embeddings..."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "-K7pGXlXOj1N"
+      },
+      "outputs": [],
+      "source": [
+        "embed_file = os.path.join(output_dir, 'emb-00000-of-00001.tfrecords')\n",
+        "sample = 5\n",
+        "\n",
+        "# Create a description of the features.\n",
+        "feature_description = {\n",
+        "    'text': tf.io.FixedLenFeature([], tf.string),\n",
+        "    'embedding': tf.io.FixedLenFeature([projected_dim], tf.float32)\n",
+        "}\n",
+        "\n",
+        "def _parse_example(example):\n",
+        "  # Parse the input `tf.Example` proto using the dictionary above.\n",
+        "  return tf.io.parse_single_example(example, feature_description)\n",
+        "\n",
+        "dataset = tf.data.TFRecordDataset(embed_file)\n",
+        "for record in dataset.take(sample).map(_parse_example):\n",
+        "  print(\"{}: {}\".format(record['text'].numpy().decode('utf-8'), record['embedding'].numpy()[:10]))\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "agGoaMSgY8wN"
+      },
+      "source": [
+        "## 3. Build the ANN Index for the Embeddings\n",
+        "\n",
+        "[ANNOY](https://github.com/spotify/annoy) (Approximate Nearest Neighbors Oh Yeah) is a C++ library with Python bindings to search for points in space that are close to a given query point. It also creates large read-only file-based data structures that are mapped into memory. It is built and used by [Spotify](https://www.spotify.com) for music recommendations. If you are interested you can play along with other alternatives to ANNOY such as [NGT](https://github.com/yahoojapan/NGT), [FAISS](https://github.com/facebookresearch/faiss), etc. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "UcPDspU3WjgH"
+      },
+      "outputs": [],
+      "source": [
+        "def build_index(embedding_files_pattern, index_filename, vector_length, \n",
+        "    metric='angular', num_trees=100):\n",
+        "  '''Builds an ANNOY index'''\n",
+        "\n",
+        "  annoy_index = annoy.AnnoyIndex(vector_length, metric=metric)\n",
+        "  # Mapping between the item and its identifier in the index\n",
+        "  mapping = {}\n",
+        "\n",
+        "  embed_files = tf.io.gfile.glob(embedding_files_pattern)\n",
+        "  num_files = len(embed_files)\n",
+        "  print('Found {} embedding file(s).'.format(num_files))\n",
+        "\n",
+        "  item_counter = 0\n",
+        "  for i, embed_file in enumerate(embed_files):\n",
+        "    print('Loading embeddings in file {} of {}...'.format(i+1, num_files))\n",
+        "    dataset = tf.data.TFRecordDataset(embed_file)\n",
+        "    for record in dataset.map(_parse_example):\n",
+        "      text = record['text'].numpy().decode(\"utf-8\")\n",
+        "      embedding = record['embedding'].numpy()\n",
+        "      mapping[item_counter] = text\n",
+        "      annoy_index.add_item(item_counter, embedding)\n",
+        "      item_counter += 1\n",
+        "      if item_counter % 100000 == 0:\n",
+        "        print('{} items loaded to the index'.format(item_counter))\n",
+        "\n",
+        "  print('A total of {} items added to the index'.format(item_counter))\n",
+        "\n",
+        "  print('Building the index with {} trees...'.format(num_trees))\n",
+        "  annoy_index.build(n_trees=num_trees)\n",
+        "  print('Index is successfully built.')\n",
+        "  \n",
+        "  print('Saving index to disk...')\n",
+        "  annoy_index.save(index_filename)\n",
+        "  print('Index is saved to disk.')\n",
+        "  print(\"Index file size: {} GB\".format(\n",
+        "    round(os.path.getsize(index_filename) / float(1024 ** 3), 2)))\n",
+        "  annoy_index.unload()\n",
+        "\n",
+        "  print('Saving mapping to disk...')\n",
+        "  with open(index_filename + '.mapping', 'wb') as handle:\n",
+        "    pickle.dump(mapping, handle, protocol=pickle.HIGHEST_PROTOCOL)\n",
+        "  print('Mapping is saved to disk.')\n",
+        "  print(\"Mapping file size: {} MB\".format(\n",
+        "    round(os.path.getsize(index_filename + '.mapping') / float(1024 ** 2), 2)))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "AgyOQhUq6FNE"
+      },
+      "outputs": [],
+      "source": [
+        "embedding_files = \"{}/emb-*.tfrecords\".format(output_dir)\n",
+        "embedding_dimension = projected_dim\n",
+        "index_filename = \"index\"\n",
+        "\n",
+        "!rm {index_filename}\n",
+        "!rm {index_filename}.mapping\n",
+        "\n",
+        "%time build_index(embedding_files, index_filename, embedding_dimension)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Ic31Tm5cgAd5"
+      },
+      "outputs": [],
+      "source": [
+        "!ls"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "maGxDl8ufP-p"
+      },
+      "source": [
+        "## 4. Use the Index for Similarity Matching\n",
+        "Now we can use the ANN index to find news headlines that are semantically close to an input query."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_dIs8W78fYPp"
+      },
+      "source": [
+        "### Load the index and the mapping files"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "jlTTrbQHayvb"
+      },
+      "outputs": [],
+      "source": [
+        "index = annoy.AnnoyIndex(embedding_dimension)\n",
+        "index.load(index_filename, prefault=True)\n",
+        "print('Annoy index is loaded.')\n",
+        "with open(index_filename + '.mapping', 'rb') as handle:\n",
+        "  mapping = pickle.load(handle)\n",
+        "print('Mapping file is loaded.')\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "y6liFMSUh08J"
+      },
+      "source": [
+        "### Similarity matching method"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "mUxjTag8hc16"
+      },
+      "outputs": [],
+      "source": [
+        "def find_similar_items(embedding, num_matches=5):\n",
+        "  '''Finds similar items to a given embedding in the ANN index'''\n",
+        "  ids = index.get_nns_by_vector(\n",
+        "  embedding, num_matches, search_k=-1, include_distances=False)\n",
+        "  items = [mapping[i] for i in ids]\n",
+        "  return items"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hjerNpmZja0A"
+      },
+      "source": [
+        "### Extract embedding from a given query"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "a0IIXzfBjZ19"
+      },
+      "outputs": [],
+      "source": [
+        "# Load the TF-Hub model\n",
+        "print(\"Loading the TF-Hub model...\")\n",
+        "%time embed_fn = hub.load(model_url)\n",
+        "print(\"TF-Hub model is loaded.\")\n",
+        "\n",
+        "random_projection_matrix = None\n",
+        "if os.path.exists('random_projection_matrix'):\n",
+        "  print(\"Loading random projection matrix...\")\n",
+        "  with open('random_projection_matrix', 'rb') as handle:\n",
+        "    random_projection_matrix = pickle.load(handle)\n",
+        "  print('random projection matrix is loaded.')\n",
+        "\n",
+        "def extract_embeddings(query):\n",
+        "  '''Generates the embedding for the query'''\n",
+        "  query_embedding =  embed_fn([query])[0].numpy()\n",
+        "  if random_projection_matrix is not None:\n",
+        "    query_embedding = query_embedding.dot(random_projection_matrix)\n",
+        "  return query_embedding\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "kCoCNROujEIO"
+      },
+      "outputs": [],
+      "source": [
+        "extract_embeddings(\"Hello Machine Learning!\")[:10]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "koINo8Du--8C"
+      },
+      "source": [
+        "### Enter a query to find the most similar items"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "wC0uLjvfk5nB"
+      },
+      "outputs": [],
+      "source": [
+        "#@title { run: \"auto\" }\n",
+        "query = \"confronting global challenges\" #@param {type:\"string\"}\n",
+        "\n",
+        "print(\"Generating embedding for the query...\")\n",
+        "%time query_embedding = extract_embeddings(query)\n",
+        "\n",
+        "print(\"\")\n",
+        "print(\"Finding relevant items in the index...\")\n",
+        "%time items = find_similar_items(query_embedding, 10)\n",
+        "\n",
+        "print(\"\")\n",
+        "print(\"Results:\")\n",
+        "print(\"=========\")\n",
+        "for item in items:\n",
+        "  print(item)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "TkRSqs77tDuX"
+      },
+      "source": [
+        "## Want to learn more?\n",
+        "\n",
+        "You can learn more about TensorFlow at [tensorflow.org](https://www.tensorflow.org/) and see the TF-Hub API documentation at [tensorflow.org/hub](https://www.tensorflow.org/hub/). Find available TensorFlow Hub models at [tfhub.dev](https://tfhub.dev/) including more text embedding models and image feature vector models.\n",
+        "\n",
+        "Also check out the [Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course/) which is Google's fast-paced, practical introduction to machine learning."
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [
+        "ACbjNjyO4f_8",
+        "g6pXBVxsVUbm"
+      ],
+      "name": "tf2_semantic_approximate_nearest_neighbors.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/tf2_text_classification.ipynb b/site/en/hub/tutorials/tf2_text_classification.ipynb
new file mode 100644
index 00000000000..e2dae15bde0
--- /dev/null
+++ b/site/en/hub/tutorials/tf2_text_classification.ipynb
@@ -0,0 +1,571 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ic4_occAAiAT"
+      },
+      "source": [
+        "##### Copyright 2019 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "both",
+        "id": "ioaprt5q5US7"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "yCl0eTNH5RS3"
+      },
+      "outputs": [],
+      "source": [
+        "#@title MIT License\n",
+        "#\n",
+        "# Copyright (c) 2017 François Chollet                                  # IGNORE_COPYRIGHT: cleared by OSS licensing\n",
+        "#\n",
+        "# Permission is hereby granted, free of charge, to any person obtaining a\n",
+        "# copy of this software and associated documentation files (the \"Software\"),\n",
+        "# to deal in the Software without restriction, including without limitation\n",
+        "# the rights to use, copy, modify, merge, publish, distribute, sublicense,\n",
+        "# and/or sell copies of the Software, and to permit persons to whom the\n",
+        "# Software is furnished to do so, subject to the following conditions:\n",
+        "#\n",
+        "# The above copyright notice and this permission notice shall be included in\n",
+        "# all copies or substantial portions of the Software.\n",
+        "#\n",
+        "# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n",
+        "# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n",
+        "# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL\n",
+        "# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n",
+        "# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n",
+        "# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER\n",
+        "# DEALINGS IN THE SOFTWARE."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ItXfxkxvosLH"
+      },
+      "source": [
+        "# Text Classification with Movie Reviews"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfBg1C5NB3X0"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/tf2_text_classification\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_text_classification.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_text_classification.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/tf2_text_classification.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/google/collections/nnlm/1\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub models</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Eg62Pmz3o83v"
+      },
+      "source": [
+        "This notebook classifies movie reviews as *positive* or *negative* using the text of the review. This is an example of *binary*—or two-class—classification, an important and widely applicable kind of machine learning problem. \n",
+        "\n",
+        "We'll use the [IMDB dataset](https://www.tensorflow.org/api_docs/python/tf/keras/datasets/imdb) that contains the text of 50,000 movie reviews from the [Internet Movie Database](https://www.imdb.com/). These are split into 25,000 reviews for training and 25,000 reviews for testing. The training and testing sets are *balanced*, meaning they contain an equal number of positive and negative reviews. \n",
+        "\n",
+        "This notebook uses [tf.keras](https://www.tensorflow.org/api_docs/python/tf/keras), a high-level API to build and train models in TensorFlow, and [TensorFlow Hub](https://www.tensorflow.org/hub), a library and platform for transfer learning. For a more advanced text classification tutorial using `tf.keras`, see the [MLCC Text Classification Guide](https://developers.google.com/machine-learning/guides/text-classification/)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "qrk8NjzhSBh-"
+      },
+      "source": [
+        "### More models\n",
+        "[Here](https://tfhub.dev/s?module-type=text-embedding) you can find more expressive or performant models that you could use to generate the text embedding."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Q4DN769E2O_R"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2ew7HTbPpCJH"
+      },
+      "outputs": [],
+      "source": [
+        "import numpy as np\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "import tensorflow_hub as hub\n",
+        "import tensorflow_datasets as tfds\n",
+        "\n",
+        "import matplotlib.pyplot as plt\n",
+        "\n",
+        "print(\"Version: \", tf.__version__)\n",
+        "print(\"Eager mode: \", tf.executing_eagerly())\n",
+        "print(\"Hub version: \", hub.__version__)\n",
+        "print(\"GPU is\", \"available\" if tf.config.list_physical_devices('GPU') else \"NOT AVAILABLE\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "iAsKG535pHep"
+      },
+      "source": [
+        "## Download the IMDB dataset\n",
+        "\n",
+        "The IMDB dataset is available on [TensorFlow datasets](https://github.com/tensorflow/datasets). The following code downloads the IMDB dataset to your machine (or the colab runtime):"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "zXXx5Oc3pOmN"
+      },
+      "outputs": [],
+      "source": [
+        "train_data, test_data = tfds.load(name=\"imdb_reviews\", split=[\"train\", \"test\"], \n",
+        "                                  batch_size=-1, as_supervised=True)\n",
+        "\n",
+        "train_examples, train_labels = tfds.as_numpy(train_data)\n",
+        "test_examples, test_labels = tfds.as_numpy(test_data)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "l50X3GfjpU4r"
+      },
+      "source": [
+        "## Explore the data \n",
+        "\n",
+        "Let's take a moment to understand the format of the data. Each example is a sentence representing the movie review and a corresponding label. The sentence is not preprocessed in any way. The label is an integer value of either 0 or 1, where 0 is a negative review, and 1 is a positive review."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "y8qCnve_-lkO"
+      },
+      "outputs": [],
+      "source": [
+        "print(\"Training entries: {}, test entries: {}\".format(len(train_examples), len(test_examples)))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "RnKvHWW4-lkW"
+      },
+      "source": [
+        "Let's print first 10 examples."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "QtTS4kpEpjbi"
+      },
+      "outputs": [],
+      "source": [
+        "train_examples[:10]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "IFtaCHTdc-GY"
+      },
+      "source": [
+        "Let's also print the first 10 labels."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "tvAjVXOWc6Mj"
+      },
+      "outputs": [],
+      "source": [
+        "train_labels[:10]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "LLC02j2g-llC"
+      },
+      "source": [
+        "## Build the model\n",
+        "\n",
+        "The neural network is created by stacking layers—this requires three main architectural decisions:\n",
+        "\n",
+        "* How to represent the text?\n",
+        "* How many layers to use in the model?\n",
+        "* How many *hidden units* to use for each layer?\n",
+        "\n",
+        "In this example, the input data consists of sentences. The labels to predict are either 0 or 1.\n",
+        "\n",
+        "One way to represent the text is to convert sentences into embeddings vectors. We can use a pre-trained text embedding as the first layer, which will have two advantages:\n",
+        "*   we don't have to worry about text preprocessing,\n",
+        "*   we can benefit from transfer learning.\n",
+        "\n",
+        "For this example we will use a model from [TensorFlow Hub](https://www.tensorflow.org/hub) called [google/nnlm-en-dim50/2](https://tfhub.dev/google/nnlm-en-dim50/2).\n",
+        "\n",
+        "There are two other models to test for the sake of this tutorial:\n",
+        "* [google/nnlm-en-dim50-with-normalization/2](https://tfhub.dev/google/nnlm-en-dim50-with-normalization/2) - same as [google/nnlm-en-dim50/2](https://tfhub.dev/google/nnlm-en-dim50/2), but with additional text normalization to remove punctuation. This can help to get better coverage of in-vocabulary embeddings for tokens on your input text.\n",
+        "* [google/nnlm-en-dim128-with-normalization/2](https://tfhub.dev/google/nnlm-en-dim128-with-normalization/2) - A larger model with an embedding dimension of 128 instead of the smaller 50."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "In2nDpTLkgKa"
+      },
+      "source": [
+        "Let's first create a Keras layer that uses a TensorFlow Hub model to embed the sentences, and try it out on a couple of input examples. Note that the output shape of the produced embeddings is a expected: `(num_examples, embedding_dimension)`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "_NUbzVeYkgcO"
+      },
+      "outputs": [],
+      "source": [
+        "model = \"https://tfhub.dev/google/nnlm-en-dim50/2\"\n",
+        "hub_layer = hub.KerasLayer(model, input_shape=[], dtype=tf.string, trainable=True)\n",
+        "hub_layer(train_examples[:3])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dfSbV6igl1EH"
+      },
+      "source": [
+        "Let's now build the full model:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "xpKOoWgu-llD"
+      },
+      "outputs": [],
+      "source": [
+        "model = tf.keras.Sequential()\n",
+        "model.add(hub_layer)\n",
+        "model.add(tf.keras.layers.Dense(16, activation='relu'))\n",
+        "model.add(tf.keras.layers.Dense(1))\n",
+        "\n",
+        "model.summary()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6PbKQ6mucuKL"
+      },
+      "source": [
+        "The layers are stacked sequentially to build the classifier:\n",
+        "\n",
+        "1. The first layer is a TensorFlow Hub layer. This layer uses a pre-trained Saved Model to map a sentence into its embedding vector. The model that we are using ([google/nnlm-en-dim50/2](https://tfhub.dev/google/nnlm-en-dim50/2)) splits the sentence into tokens, embeds each token and then combines the embedding. The resulting dimensions are: `(num_examples, embedding_dimension)`.\n",
+        "2. This fixed-length output vector is piped through a fully-connected (`Dense`) layer with 16 hidden units.\n",
+        "3. The last layer is densely connected with a single output node. This outputs logits: the log-odds of the true class, according to the model."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0XMwnDOp-llH"
+      },
+      "source": [
+        "### Hidden units\n",
+        "\n",
+        "The above model has two intermediate or \"hidden\" layers, between the input and output. The number of outputs (units, nodes, or neurons) is the dimension of the representational space for the layer. In other words, the amount of freedom the network is allowed when learning an internal representation.\n",
+        "\n",
+        "If a model has more hidden units (a higher-dimensional representation space), and/or more layers, then the network can learn more complex representations. However, it makes the network more computationally expensive and may lead to learning unwanted patterns—patterns that improve performance on training data but not on the test data. This is called *overfitting*, and we'll explore it later."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "L4EqVWg4-llM"
+      },
+      "source": [
+        "### Loss function and optimizer\n",
+        "\n",
+        "A model needs a loss function and an optimizer for training. Since this is a binary classification problem and the model outputs a probability (a single-unit layer with a sigmoid activation), we'll use the `binary_crossentropy` loss function. \n",
+        "\n",
+        "This isn't the only choice for a loss function, you could, for instance, choose `mean_squared_error`. But, generally, `binary_crossentropy` is better for dealing with probabilities—it measures the \"distance\" between probability distributions, or in our case, between the ground-truth distribution and the predictions.\n",
+        "\n",
+        "Later, when we are exploring regression problems (say, to predict the price of a house), we will see how to use another loss function called mean squared error.\n",
+        "\n",
+        "Now, configure the model to use an optimizer and a loss function:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Mr0GP-cQ-llN"
+      },
+      "outputs": [],
+      "source": [
+        "model.compile(optimizer='adam',\n",
+        "              loss=tf.losses.BinaryCrossentropy(from_logits=True),\n",
+        "              metrics=[tf.metrics.BinaryAccuracy(threshold=0.0, name='accuracy')])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hCWYwkug-llQ"
+      },
+      "source": [
+        "## Create a validation set\n",
+        "\n",
+        "When training, we want to check the accuracy of the model on data it hasn't seen before. Create a *validation set* by setting apart 10,000 examples from the original training data. (Why not use the testing set now? Our goal is to develop and tune our model using only the training data, then use the test data just once to evaluate our accuracy)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "-NpcXY9--llS"
+      },
+      "outputs": [],
+      "source": [
+        "x_val = train_examples[:10000]\n",
+        "partial_x_train = train_examples[10000:]\n",
+        "\n",
+        "y_val = train_labels[:10000]\n",
+        "partial_y_train = train_labels[10000:]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "35jv_fzP-llU"
+      },
+      "source": [
+        "## Train the model\n",
+        "\n",
+        "Train the model for 40 epochs in mini-batches of 512 samples. This is 40 iterations over all samples in the `x_train` and `y_train` tensors. While training, monitor the model's loss and accuracy on the 10,000 samples from the validation set:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "tXSGrjWZ-llW"
+      },
+      "outputs": [],
+      "source": [
+        "history = model.fit(partial_x_train,\n",
+        "                    partial_y_train,\n",
+        "                    epochs=40,\n",
+        "                    batch_size=512,\n",
+        "                    validation_data=(x_val, y_val),\n",
+        "                    verbose=1)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9EEGuDVuzb5r"
+      },
+      "source": [
+        "## Evaluate the model\n",
+        "\n",
+        "And let's see how the model performs. Two values will be returned. Loss (a number which represents our error, lower values are better), and accuracy."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "zOMKywn4zReN"
+      },
+      "outputs": [],
+      "source": [
+        "results = model.evaluate(test_examples, test_labels)\n",
+        "\n",
+        "print(results)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "z1iEXVTR0Z2t"
+      },
+      "source": [
+        "This fairly naive approach achieves an accuracy of about 87%. With more advanced approaches, the model should get closer to 95%."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5KggXVeL-llZ"
+      },
+      "source": [
+        "## Create a graph of accuracy and loss over time\n",
+        "\n",
+        "`model.fit()` returns a `History` object that contains a dictionary with everything that happened during training:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "VcvSXvhp-llb"
+      },
+      "outputs": [],
+      "source": [
+        "history_dict = history.history\n",
+        "history_dict.keys()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nRKsqL40-lle"
+      },
+      "source": [
+        "There are four entries: one for each monitored metric during training and validation. We can use these to plot the training and validation loss for comparison, as well as the training and validation accuracy:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "nGoYf2Js-lle"
+      },
+      "outputs": [],
+      "source": [
+        "acc = history_dict['accuracy']\n",
+        "val_acc = history_dict['val_accuracy']\n",
+        "loss = history_dict['loss']\n",
+        "val_loss = history_dict['val_loss']\n",
+        "\n",
+        "epochs = range(1, len(acc) + 1)\n",
+        "\n",
+        "# \"bo\" is for \"blue dot\"\n",
+        "plt.plot(epochs, loss, 'bo', label='Training loss')\n",
+        "# b is for \"solid blue line\"\n",
+        "plt.plot(epochs, val_loss, 'b', label='Validation loss')\n",
+        "plt.title('Training and validation loss')\n",
+        "plt.xlabel('Epochs')\n",
+        "plt.ylabel('Loss')\n",
+        "plt.legend()\n",
+        "\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "6hXx-xOv-llh"
+      },
+      "outputs": [],
+      "source": [
+        "plt.clf()   # clear figure\n",
+        "\n",
+        "plt.plot(epochs, acc, 'bo', label='Training acc')\n",
+        "plt.plot(epochs, val_acc, 'b', label='Validation acc')\n",
+        "plt.title('Training and validation accuracy')\n",
+        "plt.xlabel('Epochs')\n",
+        "plt.ylabel('Accuracy')\n",
+        "plt.legend()\n",
+        "\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "oFEmZ5zq-llk"
+      },
+      "source": [
+        "In this plot, the dots represent the training loss and accuracy, and the solid lines are the validation loss and accuracy.\n",
+        "\n",
+        "Notice the training loss *decreases* with each epoch and the training accuracy *increases* with each epoch. This is expected when using a gradient descent optimization—it should minimize the desired quantity on every iteration.\n",
+        "\n",
+        "This isn't the case for the validation loss and accuracy—they seem to peak after about twenty epochs. This is an example of overfitting: the model performs better on the training data than it does on data it has never seen before. After this point, the model over-optimizes and learns representations *specific* to the training data that do not *generalize* to test data.\n",
+        "\n",
+        "For this particular case, we could prevent overfitting by simply stopping the training after twenty or so epochs. Later, you'll see how to do this automatically with a callback."
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "tf2_text_classification.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/tf_hub_delf_module.ipynb b/site/en/hub/tutorials/tf_hub_delf_module.ipynb
new file mode 100644
index 00000000000..b6dec2eae00
--- /dev/null
+++ b/site/en/hub/tutorials/tf_hub_delf_module.ipynb
@@ -0,0 +1,372 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "RUymE2l9GZfO"
+      },
+      "source": [
+        "##### Copyright 2018 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "code",
+        "id": "JMyTNwSJGGWg"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0DmDwGPOGfaQ"
+      },
+      "source": [
+        "# How to match images using DELF and TensorFlow Hub\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfBg1C5NB3X0"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/tf_hub_delf_module\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/tf_hub_delf_module.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/tf_hub_delf_module.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/tf_hub_delf_module.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/google/delf/1\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub model</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "f3nk38tIKytQ"
+      },
+      "source": [
+        "TensorFlow Hub (TF-Hub) is a platform to share machine learning expertise packaged in reusable resources, notably pre-trained **modules**.\n",
+        "\n",
+        "In this colab, we will use a module that packages the [DELF](https://github.com/tensorflow/models/tree/master/research/delf) neural network and logic for processing images to identify keypoints and their descriptors. The weights of the neural network were trained on images of landmarks as described in [this paper](https://arxiv.org/abs/1612.06321)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Q4DN769E2O_R"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "lrKaWOB_cuS3"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install scikit-image"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "SI7eVflHHxvi"
+      },
+      "outputs": [],
+      "source": [
+        "from absl import logging\n",
+        "\n",
+        "import matplotlib.pyplot as plt\n",
+        "import numpy as np\n",
+        "from PIL import Image, ImageOps\n",
+        "from scipy.spatial import cKDTree\n",
+        "from skimage.feature import plot_matched_features\n",
+        "from skimage.measure import ransac\n",
+        "from skimage.transform import AffineTransform\n",
+        "from six import BytesIO\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "\n",
+        "import tensorflow_hub as hub\n",
+        "from six.moves.urllib.request import urlopen"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "qquo2HiONiDK"
+      },
+      "source": [
+        "## The data\n",
+        "\n",
+        "In the next cell, we specify the URLs of two images we would like to process with DELF in order to match and compare them."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "l93ye4WFIqIV"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Choose images\n",
+        "images = \"Bridge of Sighs\" #@param [\"Bridge of Sighs\", \"Golden Gate\", \"Acropolis\", \"Eiffel tower\"]\n",
+        "if images == \"Bridge of Sighs\":\n",
+        "  # from: https://commons.wikimedia.org/wiki/File:Bridge_of_Sighs,_Oxford.jpg\n",
+        "  # by: N.H. Fischer\n",
+        "  IMAGE_1_URL = 'https://upload.wikimedia.org/wikipedia/commons/2/28/Bridge_of_Sighs%2C_Oxford.jpg'\n",
+        "  # from https://commons.wikimedia.org/wiki/File:The_Bridge_of_Sighs_and_Sheldonian_Theatre,_Oxford.jpg\n",
+        "  # by: Matthew Hoser\n",
+        "  IMAGE_2_URL = 'https://upload.wikimedia.org/wikipedia/commons/c/c3/The_Bridge_of_Sighs_and_Sheldonian_Theatre%2C_Oxford.jpg'\n",
+        "elif images == \"Golden Gate\":\n",
+        "  IMAGE_1_URL = 'https://upload.wikimedia.org/wikipedia/commons/1/1e/Golden_gate2.jpg'\n",
+        "  IMAGE_2_URL = 'https://upload.wikimedia.org/wikipedia/commons/3/3e/GoldenGateBridge.jpg'\n",
+        "elif images == \"Acropolis\":\n",
+        "  IMAGE_1_URL = 'https://upload.wikimedia.org/wikipedia/commons/c/ce/2006_01_21_Ath%C3%A8nes_Parth%C3%A9non.JPG'\n",
+        "  IMAGE_2_URL = 'https://upload.wikimedia.org/wikipedia/commons/5/5c/ACROPOLIS_1969_-_panoramio_-_jean_melis.jpg'\n",
+        "else:\n",
+        "  IMAGE_1_URL = 'https://upload.wikimedia.org/wikipedia/commons/d/d8/Eiffel_Tower%2C_November_15%2C_2011.jpg'\n",
+        "  IMAGE_2_URL = 'https://upload.wikimedia.org/wikipedia/commons/a/a8/Eiffel_Tower_from_immediately_beside_it%2C_Paris_May_2008.jpg'"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ttlHtcmiN6QF"
+      },
+      "source": [
+        "Download, resize, save and display the images."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "E6RMomGJSfeb"
+      },
+      "outputs": [],
+      "source": [
+        "def download_and_resize(name, url, new_width=256, new_height=256):\n",
+        "  path = tf.keras.utils.get_file(url.split('/')[-1], url)\n",
+        "  image = Image.open(path)\n",
+        "  image = ImageOps.fit(image, (new_width, new_height), Image.LANCZOS)\n",
+        "  return image"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "reajtO7XSj7Y"
+      },
+      "outputs": [],
+      "source": [
+        "image1 = download_and_resize('image_1.jpg', IMAGE_1_URL)\n",
+        "image2 = download_and_resize('image_2.jpg', IMAGE_2_URL)\n",
+        "\n",
+        "plt.subplot(1,2,1)\n",
+        "plt.imshow(image1)\n",
+        "plt.subplot(1,2,2)\n",
+        "plt.imshow(image2)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "leKqkoT9OP7r"
+      },
+      "source": [
+        "## Apply the DELF module to the data"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "A3WoT1-SPoTI"
+      },
+      "source": [
+        "The DELF module takes an image as input and will describe noteworthy points with vectors. The following cell contains the core of this colab's logic."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "pXr2tUhvp1Ue"
+      },
+      "outputs": [],
+      "source": [
+        "delf = hub.load('https://tfhub.dev/google/delf/1').signatures['default']"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "pvAU_gUHoYcY"
+      },
+      "outputs": [],
+      "source": [
+        "def run_delf(image):\n",
+        "  np_image = np.array(image)\n",
+        "  float_image = tf.image.convert_image_dtype(np_image, tf.float32)\n",
+        "\n",
+        "  return delf(\n",
+        "      image=float_image,\n",
+        "      score_threshold=tf.constant(100.0),\n",
+        "      image_scales=tf.constant([0.25, 0.3536, 0.5, 0.7071, 1.0, 1.4142, 2.0]),\n",
+        "      max_feature_num=tf.constant(1000))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "FEzgHAT0UDNP"
+      },
+      "outputs": [],
+      "source": [
+        "result1 = run_delf(image1)\n",
+        "result2 = run_delf(image2)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "NByyBA5yOL2b"
+      },
+      "source": [
+        "## Use the locations and description vectors to match the images"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "both",
+        "id": "mVaKXT3cMSib"
+      },
+      "outputs": [],
+      "source": [
+        "#@title TensorFlow is not needed for this post-processing and visualization\n",
+        "def match_images(image1, image2, result1, result2):\n",
+        "  distance_threshold = 0.8\n",
+        "\n",
+        "  # Read features.\n",
+        "  num_features_1 = result1['locations'].shape[0]\n",
+        "  print(\"Loaded image 1's %d features\" % num_features_1)\n",
+        "  \n",
+        "  num_features_2 = result2['locations'].shape[0]\n",
+        "  print(\"Loaded image 2's %d features\" % num_features_2)\n",
+        "\n",
+        "  # Find nearest-neighbor matches using a KD tree.\n",
+        "  d1_tree = cKDTree(result1['descriptors'])\n",
+        "  _, indices = d1_tree.query(\n",
+        "      result2['descriptors'],\n",
+        "      distance_upper_bound=distance_threshold)\n",
+        "\n",
+        "  # Select feature locations for putative matches.\n",
+        "  locations_2_to_use = np.array([\n",
+        "      result2['locations'][i,]\n",
+        "      for i in range(num_features_2)\n",
+        "      if indices[i] != num_features_1\n",
+        "  ])\n",
+        "  locations_1_to_use = np.array([\n",
+        "      result1['locations'][indices[i],]\n",
+        "      for i in range(num_features_2)\n",
+        "      if indices[i] != num_features_1\n",
+        "  ])\n",
+        "\n",
+        "  # Perform geometric verification using RANSAC.\n",
+        "  _, inliers = ransac(\n",
+        "      (locations_1_to_use, locations_2_to_use),\n",
+        "      AffineTransform,\n",
+        "      min_samples=3,\n",
+        "      residual_threshold=20,\n",
+        "      max_trials=1000)\n",
+        "\n",
+        "  print('Found %d inliers' % sum(inliers))\n",
+        "\n",
+        "  # Visualize correspondences.\n",
+        "  _, ax = plt.subplots()\n",
+        "  inlier_idxs = np.nonzero(inliers)[0]\n",
+        "  plot_matched_features(\n",
+        "      image1,\n",
+        "      image2,\n",
+        "      keypoints0=locations_1_to_use,\n",
+        "      keypoints1=locations_2_to_use,\n",
+        "      matches=np.column_stack((inlier_idxs, inlier_idxs)),\n",
+        "      ax=ax,\n",
+        "  )\n",
+        "\n",
+        "  ax.axis('off')\n",
+        "  ax.set_title('DELF correspondences')\n",
+        "\n",
+        "  for line in ax.lines:\n",
+        "    line.set_color('b')\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "tpEgqOvCYlPY"
+      },
+      "outputs": [],
+      "source": [
+        "match_images(image1, image2, result1, result2)"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [
+        "RUymE2l9GZfO"
+      ],
+      "name": "tf_hub_delf_module.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/tf_hub_film_example.ipynb b/site/en/hub/tutorials/tf_hub_film_example.ipynb
new file mode 100644
index 00000000000..83bcd4bd12c
--- /dev/null
+++ b/site/en/hub/tutorials/tf_hub_film_example.ipynb
@@ -0,0 +1,576 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "qNLUPuRpkFv_"
+      },
+      "source": [
+        "##### Copyright 2022 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "DQcWZm0FkPk-"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Copyright 2022 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Exbxve1rHlrF"
+      },
+      "source": [
+        "# Frame interpolation using the FILM model\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jMWFVTlbrQ8m"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/tf_hub_film_example\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/tf_hub_film_example.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/tf_hub_film_example.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/tf_hub_film_example.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/google/film/1\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub model</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "61H28S7ArUAZ"
+      },
+      "source": [
+        "Frame interpolation is the task of synthesizing many in-between images from a given set of images. The technique is often used for frame rate upsampling or creating slow-motion video effects.\n",
+        "\n",
+        "In this colab, you will use the FILM model to do frame interpolation. The colab also provides code snippets to create videos from the interpolated in-between images.\n",
+        "\n",
+        "For more information on FILM research, you can read more here:\n",
+        "- Google AI Blog: [Large Motion Frame Interpolation](https://ai.googleblog.com/2022/10/large-motion-frame-interpolation.html)\n",
+        "- Project Page: FILM: [Frame Interpolation for Large Motion](https://film-net.github.io/)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dVX7s6zMulsu"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "oi5t2OEJsGBW"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install mediapy\n",
+        "!sudo apt-get install -y ffmpeg"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "BA1tq39MjOiF"
+      },
+      "outputs": [],
+      "source": [
+        "import tensorflow as tf\n",
+        "import tensorflow_hub as hub\n",
+        "\n",
+        "import requests\n",
+        "import numpy as np\n",
+        "\n",
+        "from typing import Generator, Iterable, List, Optional\n",
+        "import mediapy as media"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GTgXmeYGnT7q"
+      },
+      "source": [
+        "## Load the model from TFHub\n",
+        "\n",
+        "To load a model from TensorFlow Hub you need the tfhub library and the model handle which is its documentation url."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "GojhvyAtjUt0"
+      },
+      "outputs": [],
+      "source": [
+        "model = hub.load(\"https://tfhub.dev/google/film/1\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DOQJPsu2CwPk"
+      },
+      "source": [
+        "## Util function to load images from a url or locally\n",
+        "\n",
+        "This function loads an image and make it ready to be used by the model later."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "BPnh5uhQvFln"
+      },
+      "outputs": [],
+      "source": [
+        "_UINT8_MAX_F = float(np.iinfo(np.uint8).max)\n",
+        "\n",
+        "def load_image(img_url: str):\n",
+        "  \"\"\"Returns an image with shape [height, width, num_channels], with pixels in [0..1] range, and type np.float32.\"\"\"\n",
+        "\n",
+        "  if (img_url.startswith(\"https\")):\n",
+        "    user_agent = {'User-agent': 'Colab Sample (https://tensorflow.org)'}\n",
+        "    response = requests.get(img_url, headers=user_agent)\n",
+        "    image_data = response.content\n",
+        "  else:\n",
+        "    image_data = tf.io.read_file(img_url)\n",
+        "\n",
+        "  image = tf.io.decode_image(image_data, channels=3)\n",
+        "  image_numpy = tf.cast(image, dtype=tf.float32).numpy()\n",
+        "  return image_numpy / _UINT8_MAX_F\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yjDFns1zp5y6"
+      },
+      "source": [
+        "FILM's model input is a dictionary with the keys `time`, `x0`, `x1`:\n",
+        "\n",
+        "- `time`: position of the interpolated frame. Midway is `0.5`.\n",
+        "- `x0`: is the initial frame.\n",
+        "- `x1`: is the final frame.\n",
+        "\n",
+        "Both frames need to be normalized (done in the function `load_image` above) where each pixel is in the range of `[0..1]`.\n",
+        "\n",
+        "`time` is a value between `[0..1]` and it says where the generated image should be. 0.5 is midway between the input images.\n",
+        "\n",
+        "All three values need to have a batch dimension too."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "VEQNQlHGsWSM"
+      },
+      "outputs": [],
+      "source": [
+        "# using images from the FILM repository (https://github.com/google-research/frame-interpolation/)\n",
+        "\n",
+        "image_1_url = \"https://github.com/google-research/frame-interpolation/blob/main/photos/one.png?raw=true\"\n",
+        "image_2_url = \"https://github.com/google-research/frame-interpolation/blob/main/photos/two.png?raw=true\"\n",
+        "\n",
+        "time = np.array([0.5], dtype=np.float32)\n",
+        "\n",
+        "image1 = load_image(image_1_url)\n",
+        "image2 = load_image(image_2_url)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "r6_MQE9EuF_K"
+      },
+      "outputs": [],
+      "source": [
+        "input = {\n",
+        "    'time': np.expand_dims(time, axis=0), # adding the batch dimension to the time\n",
+        "     'x0': np.expand_dims(image1, axis=0), # adding the batch dimension to the image\n",
+        "     'x1': np.expand_dims(image2, axis=0)  # adding the batch dimension to the image\n",
+        "}\n",
+        "mid_frame = model(input)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nZkzYE2bptfD"
+      },
+      "source": [
+        "The model outputs a couple of results but what you'll use here is the `image` key, whose value is the interpolated frame."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "eClVbNFhA5Py"
+      },
+      "outputs": [],
+      "source": [
+        "print(mid_frame.keys())"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "rE2csH3u8ePe"
+      },
+      "outputs": [],
+      "source": [
+        "frames = [image1, mid_frame['image'][0].numpy(), image2]\n",
+        "\n",
+        "media.show_images(frames, titles=['input image one', 'generated image', 'input image two'], height=250)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fS1AT8kn-f_l"
+      },
+      "source": [
+        "Let's create a video from the generated frames"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "oFc53B3p37SH"
+      },
+      "outputs": [],
+      "source": [
+        "media.show_video(frames, fps=3, title='FILM interpolated video')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "x5AOFNkj-lfO"
+      },
+      "source": [
+        "## Define a Frame Interpolator Library\n",
+        "\n",
+        "As you can see, the transition is not too smooth. \n",
+        "\n",
+        "To improve that you'll need many more interpolated frames.\n",
+        "\n",
+        "You could just keep running the model many times with intermediary images but there is a better solution.\n",
+        "\n",
+        "To generate many interpolated images and have a  smoother video you'll create an interpolator library."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "tsoDv_9geoZn"
+      },
+      "outputs": [],
+      "source": [
+        "\"\"\"A wrapper class for running a frame interpolation based on the FILM model on TFHub\n",
+        "\n",
+        "Usage:\n",
+        "  interpolator = Interpolator()\n",
+        "  result_batch = interpolator(image_batch_0, image_batch_1, batch_dt)\n",
+        "  Where image_batch_1 and image_batch_2 are numpy tensors with TF standard\n",
+        "  (B,H,W,C) layout, batch_dt is the sub-frame time in range [0..1], (B,) layout.\n",
+        "\"\"\"\n",
+        "\n",
+        "\n",
+        "def _pad_to_align(x, align):\n",
+        "  \"\"\"Pads image batch x so width and height divide by align.\n",
+        "\n",
+        "  Args:\n",
+        "    x: Image batch to align.\n",
+        "    align: Number to align to.\n",
+        "\n",
+        "  Returns:\n",
+        "    1) An image padded so width % align == 0 and height % align == 0.\n",
+        "    2) A bounding box that can be fed readily to tf.image.crop_to_bounding_box\n",
+        "      to undo the padding.\n",
+        "  \"\"\"\n",
+        "  # Input checking.\n",
+        "  assert np.ndim(x) == 4\n",
+        "  assert align > 0, 'align must be a positive number.'\n",
+        "\n",
+        "  height, width = x.shape[-3:-1]\n",
+        "  height_to_pad = (align - height % align) if height % align != 0 else 0\n",
+        "  width_to_pad = (align - width % align) if width % align != 0 else 0\n",
+        "\n",
+        "  bbox_to_pad = {\n",
+        "      'offset_height': height_to_pad // 2,\n",
+        "      'offset_width': width_to_pad // 2,\n",
+        "      'target_height': height + height_to_pad,\n",
+        "      'target_width': width + width_to_pad\n",
+        "  }\n",
+        "  padded_x = tf.image.pad_to_bounding_box(x, **bbox_to_pad)\n",
+        "  bbox_to_crop = {\n",
+        "      'offset_height': height_to_pad // 2,\n",
+        "      'offset_width': width_to_pad // 2,\n",
+        "      'target_height': height,\n",
+        "      'target_width': width\n",
+        "  }\n",
+        "  return padded_x, bbox_to_crop\n",
+        "\n",
+        "\n",
+        "class Interpolator:\n",
+        "  \"\"\"A class for generating interpolated frames between two input frames.\n",
+        "\n",
+        "  Uses the Film model from TFHub\n",
+        "  \"\"\"\n",
+        "\n",
+        "  def __init__(self, align: int = 64) -> None:\n",
+        "    \"\"\"Loads a saved model.\n",
+        "\n",
+        "    Args:\n",
+        "      align: 'If >1, pad the input size so it divides with this before\n",
+        "        inference.'\n",
+        "    \"\"\"\n",
+        "    self._model = hub.load(\"https://tfhub.dev/google/film/1\")\n",
+        "    self._align = align\n",
+        "\n",
+        "  def __call__(self, x0: np.ndarray, x1: np.ndarray,\n",
+        "               dt: np.ndarray) -> np.ndarray:\n",
+        "    \"\"\"Generates an interpolated frame between given two batches of frames.\n",
+        "\n",
+        "    All inputs should be np.float32 datatype.\n",
+        "\n",
+        "    Args:\n",
+        "      x0: First image batch. Dimensions: (batch_size, height, width, channels)\n",
+        "      x1: Second image batch. Dimensions: (batch_size, height, width, channels)\n",
+        "      dt: Sub-frame time. Range [0,1]. Dimensions: (batch_size,)\n",
+        "\n",
+        "    Returns:\n",
+        "      The result with dimensions (batch_size, height, width, channels).\n",
+        "    \"\"\"\n",
+        "    if self._align is not None:\n",
+        "      x0, bbox_to_crop = _pad_to_align(x0, self._align)\n",
+        "      x1, _ = _pad_to_align(x1, self._align)\n",
+        "\n",
+        "    inputs = {'x0': x0, 'x1': x1, 'time': dt[..., np.newaxis]}\n",
+        "    result = self._model(inputs, training=False)\n",
+        "    image = result['image']\n",
+        "\n",
+        "    if self._align is not None:\n",
+        "      image = tf.image.crop_to_bounding_box(image, **bbox_to_crop)\n",
+        "    return image.numpy()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ZeGYaNBd_7a5"
+      },
+      "source": [
+        "## Frame and Video Generation Utility Functions"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "gOJxup6s_1DP"
+      },
+      "outputs": [],
+      "source": [
+        "def _recursive_generator(\n",
+        "    frame1: np.ndarray, frame2: np.ndarray, num_recursions: int,\n",
+        "    interpolator: Interpolator) -> Generator[np.ndarray, None, None]:\n",
+        "  \"\"\"Splits halfway to repeatedly generate more frames.\n",
+        "\n",
+        "  Args:\n",
+        "    frame1: Input image 1.\n",
+        "    frame2: Input image 2.\n",
+        "    num_recursions: How many times to interpolate the consecutive image pairs.\n",
+        "    interpolator: The frame interpolator instance.\n",
+        "\n",
+        "  Yields:\n",
+        "    The interpolated frames, including the first frame (frame1), but excluding\n",
+        "    the final frame2.\n",
+        "  \"\"\"\n",
+        "  if num_recursions == 0:\n",
+        "    yield frame1\n",
+        "  else:\n",
+        "    # Adds the batch dimension to all inputs before calling the interpolator,\n",
+        "    # and remove it afterwards.\n",
+        "    time = np.full(shape=(1,), fill_value=0.5, dtype=np.float32)\n",
+        "    mid_frame = interpolator(\n",
+        "        np.expand_dims(frame1, axis=0), np.expand_dims(frame2, axis=0), time)[0]\n",
+        "    yield from _recursive_generator(frame1, mid_frame, num_recursions - 1,\n",
+        "                                    interpolator)\n",
+        "    yield from _recursive_generator(mid_frame, frame2, num_recursions - 1,\n",
+        "                                    interpolator)\n",
+        "\n",
+        "\n",
+        "def interpolate_recursively(\n",
+        "    frames: List[np.ndarray], num_recursions: int,\n",
+        "    interpolator: Interpolator) -> Iterable[np.ndarray]:\n",
+        "  \"\"\"Generates interpolated frames by repeatedly interpolating the midpoint.\n",
+        "\n",
+        "  Args:\n",
+        "    frames: List of input frames. Expected shape (H, W, 3). The colors should be\n",
+        "      in the range[0, 1] and in gamma space.\n",
+        "    num_recursions: Number of times to do recursive midpoint\n",
+        "      interpolation.\n",
+        "    interpolator: The frame interpolation model to use.\n",
+        "\n",
+        "  Yields:\n",
+        "    The interpolated frames (including the inputs).\n",
+        "  \"\"\"\n",
+        "  n = len(frames)\n",
+        "  for i in range(1, n):\n",
+        "    yield from _recursive_generator(frames[i - 1], frames[i],\n",
+        "                                    times_to_interpolate, interpolator)\n",
+        "  # Separately yield the final frame.\n",
+        "  yield frames[-1]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "X1R2KjhEAHu0"
+      },
+      "outputs": [],
+      "source": [
+        "times_to_interpolate = 6\n",
+        "interpolator = Interpolator()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "AZUo8tg1AYvZ"
+      },
+      "source": [
+        "## Running the Interpolator"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "QMMNjs7sAWTG"
+      },
+      "outputs": [],
+      "source": [
+        "input_frames = [image1, image2]\n",
+        "frames = list(\n",
+        "    interpolate_recursively(input_frames, times_to_interpolate,\n",
+        "                                        interpolator))\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "s9mHHyCAAhrM"
+      },
+      "outputs": [],
+      "source": [
+        "print(f'video with {len(frames)} frames')\n",
+        "media.show_video(frames, fps=30, title='FILM interpolated video')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_0AZKeMVFwAc"
+      },
+      "source": [
+        "For more information, you can visit [FILM's model repository](https://github.com/google-research/frame-interpolation).\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8764ry3SGDks"
+      },
+      "source": [
+        "## Citation\n",
+        "\n",
+        "If you find this model and code useful in your works, please acknowledge it appropriately by citing:\n",
+        "\n",
+        "```\n",
+        "@inproceedings{reda2022film,\n",
+        " title = {FILM: Frame Interpolation for Large Motion},\n",
+        " author = {Fitsum Reda and Janne Kontkanen and Eric Tabellion and Deqing Sun and Caroline Pantofaru and Brian Curless},\n",
+        " booktitle = {The European Conference on Computer Vision (ECCV)},\n",
+        " year = {2022}\n",
+        "}\n",
+        "```\n",
+        "\n",
+        "```\n",
+        "@misc{film-tf,\n",
+        "  title = {Tensorflow 2 Implementation of \"FILM: Frame Interpolation for Large Motion\"},\n",
+        "  author = {Fitsum Reda and Janne Kontkanen and Eric Tabellion and Deqing Sun and Caroline Pantofaru and Brian Curless},\n",
+        "  year = {2022},\n",
+        "  publisher = {GitHub},\n",
+        "  journal = {GitHub repository},\n",
+        "  howpublished = {\\url{https://github.com/google-research/frame-interpolation}}\n",
+        "}\n",
+        "```"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "name": "tf_hub_film_example.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/tf_hub_generative_image_module.ipynb b/site/en/hub/tutorials/tf_hub_generative_image_module.ipynb
new file mode 100644
index 00000000000..4937bc2eb22
--- /dev/null
+++ b/site/en/hub/tutorials/tf_hub_generative_image_module.ipynb
@@ -0,0 +1,447 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "N6ZDpd9XzFeN"
+      },
+      "source": [
+        "##### Copyright 2018 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "both",
+        "id": "KUu4vOt5zI9d"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "CxmDMK4yupqg"
+      },
+      "source": [
+        "# Generate Artificial Faces with CelebA Progressive GAN Model\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfBg1C5NB3X0"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/tf_hub_generative_image_module\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/tf_hub_generative_image_module.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/tf_hub_generative_image_module.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/tf_hub_generative_image_module.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/google/progan-128/1\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub model</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Sy553YSVmYiK"
+      },
+      "source": [
+        "This Colab demonstrates use of a TF Hub module based on a generative adversarial network (GAN). The module maps from N-dimensional vectors, called latent space, to RGB images.\n",
+        "\n",
+        "Two examples are provided:\n",
+        "* **Mapping** from latent space to images, and\n",
+        "* Given a target image, **using gradient descent to find** a latent vector that generates an image similar to the target image."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "v4XGxDrCkeip"
+      },
+      "source": [
+        "## Optional prerequisites\n",
+        "\n",
+        "* Familiarity with [low level Tensorflow concepts](https://www.tensorflow.org/guide/eager).\n",
+        "* [Generative Adversarial Network](https://en.wikipedia.org/wiki/Generative_adversarial_network) on Wikipedia.\n",
+        "* Paper on Progressive GANs: [Progressive Growing of GANs for Improved Quality, Stability, and Variation](https://arxiv.org/abs/1710.10196)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "HK3Q2vIaVw56"
+      },
+      "source": [
+        "### More models\n",
+        "[Here](https://tfhub.dev/s?module-type=image-generator) you can find all models currently hosted on [tfhub.dev](https://tfhub.dev/) that can generate images."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Q4DN769E2O_R"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "KNM3kA0arrUu"
+      },
+      "outputs": [],
+      "source": [
+        "# Install imageio for creating animations.  \n",
+        "!pip -q install imageio\n",
+        "!pip -q install scikit-image\n",
+        "!pip install git+https://github.com/tensorflow/docs"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "both",
+        "id": "6cPY9Ou4sWs_"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Imports and function definitions\n",
+        "from absl import logging\n",
+        "\n",
+        "import imageio\n",
+        "import PIL.Image\n",
+        "import matplotlib.pyplot as plt\n",
+        "import numpy as np\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "tf.random.set_seed(0)\n",
+        "\n",
+        "import tensorflow_hub as hub\n",
+        "from tensorflow_docs.vis import embed\n",
+        "import time\n",
+        "\n",
+        "try:\n",
+        "  from google.colab import files\n",
+        "except ImportError:\n",
+        "  pass\n",
+        "\n",
+        "from IPython import display\n",
+        "from skimage import transform\n",
+        "\n",
+        "# We could retrieve this value from module.get_input_shapes() if we didn't know\n",
+        "# beforehand which module we will be using.\n",
+        "latent_dim = 512\n",
+        "\n",
+        "\n",
+        "# Interpolates between two vectors that are non-zero and don't both lie on a\n",
+        "# line going through origin. First normalizes v2 to have the same norm as v1. \n",
+        "# Then interpolates between the two vectors on the hypersphere.\n",
+        "def interpolate_hypersphere(v1, v2, num_steps):\n",
+        "  v1_norm = tf.norm(v1)\n",
+        "  v2_norm = tf.norm(v2)\n",
+        "  v2_normalized = v2 * (v1_norm / v2_norm)\n",
+        "\n",
+        "  vectors = []\n",
+        "  for step in range(num_steps):\n",
+        "    interpolated = v1 + (v2_normalized - v1) * step / (num_steps - 1)\n",
+        "    interpolated_norm = tf.norm(interpolated)\n",
+        "    interpolated_normalized = interpolated * (v1_norm / interpolated_norm)\n",
+        "    vectors.append(interpolated_normalized)\n",
+        "  return tf.stack(vectors)\n",
+        "\n",
+        "# Simple way to display an image.\n",
+        "def display_image(image):\n",
+        "  image = tf.constant(image)\n",
+        "  image = tf.image.convert_image_dtype(image, tf.uint8)\n",
+        "  return PIL.Image.fromarray(image.numpy())\n",
+        "\n",
+        "# Given a set of images, show an animation.\n",
+        "def animate(images):\n",
+        "  images = np.array(images)\n",
+        "  converted_images = np.clip(images * 255, 0, 255).astype(np.uint8)\n",
+        "  imageio.mimsave('./animation.gif', converted_images)\n",
+        "  return embed.embed_file('./animation.gif')\n",
+        "\n",
+        "logging.set_verbosity(logging.ERROR)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "f5EESfBvukYI"
+      },
+      "source": [
+        "## Latent space interpolation"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nJb9gFmRvynZ"
+      },
+      "source": [
+        "### Random vectors\n",
+        "\n",
+        "Latent space interpolation between two randomly initialized vectors. We will use a TF Hub module [progan-128](https://tfhub.dev/google/progan-128/1) that contains a pre-trained Progressive GAN."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "8StEe9x9wGma"
+      },
+      "outputs": [],
+      "source": [
+        "progan = hub.load(\"https://tfhub.dev/google/progan-128/1\").signatures['default']"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "fZ0O5_5Jhwio"
+      },
+      "outputs": [],
+      "source": [
+        "def interpolate_between_vectors():\n",
+        "  v1 = tf.random.normal([latent_dim])\n",
+        "  v2 = tf.random.normal([latent_dim])\n",
+        "    \n",
+        "  # Creates a tensor with 25 steps of interpolation between v1 and v2.\n",
+        "  vectors = interpolate_hypersphere(v1, v2, 50)\n",
+        "\n",
+        "  # Uses module to generate images from the latent space.\n",
+        "  interpolated_images = progan(vectors)['default']\n",
+        "\n",
+        "  return interpolated_images\n",
+        "\n",
+        "interpolated_images = interpolate_between_vectors()\n",
+        "animate(interpolated_images)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "L9-uXoTHuXQC"
+      },
+      "source": [
+        "## Finding closest vector in latent space\n",
+        "Fix a target image. As an example use an image generated from the module or upload your own."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "both",
+        "id": "phT4W66pMmko"
+      },
+      "outputs": [],
+      "source": [
+        "image_from_module_space = True  # @param { isTemplate:true, type:\"boolean\" }\n",
+        "\n",
+        "def get_module_space_image():\n",
+        "  vector = tf.random.normal([1, latent_dim])\n",
+        "  images = progan(vector)['default'][0]\n",
+        "  return images\n",
+        "\n",
+        "def upload_image():\n",
+        "  uploaded = files.upload()\n",
+        "  image = imageio.imread(uploaded[list(uploaded.keys())[0]])\n",
+        "  return transform.resize(image, [128, 128])\n",
+        "\n",
+        "if image_from_module_space:\n",
+        "  target_image = get_module_space_image()\n",
+        "else:\n",
+        "  target_image = upload_image()\n",
+        "\n",
+        "display_image(target_image)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rBIt3Q4qvhuq"
+      },
+      "source": [
+        "After defining a loss function between the target image and the image generated by a latent space variable, we can use gradient descent to find variable values that minimize the loss."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "cUGakLdbML2Q"
+      },
+      "outputs": [],
+      "source": [
+        "tf.random.set_seed(42)\n",
+        "initial_vector = tf.random.normal([1, latent_dim])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "u7MGzDE5MU20"
+      },
+      "outputs": [],
+      "source": [
+        "display_image(progan(initial_vector)['default'][0])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "q_4Z7tnyg-ZY"
+      },
+      "outputs": [],
+      "source": [
+        "def find_closest_latent_vector(initial_vector, num_optimization_steps,\n",
+        "                               steps_per_image):\n",
+        "  images = []\n",
+        "  losses = []\n",
+        "\n",
+        "  vector = tf.Variable(initial_vector)  \n",
+        "  optimizer = tf.optimizers.Adam(learning_rate=0.01)\n",
+        "  loss_fn = tf.losses.MeanAbsoluteError(reduction=\"sum\")\n",
+        "\n",
+        "  for step in range(num_optimization_steps):\n",
+        "    if (step % 100)==0:\n",
+        "      print()\n",
+        "    print('.', end='')\n",
+        "    with tf.GradientTape() as tape:\n",
+        "      image = progan(vector.read_value())['default'][0]\n",
+        "      if (step % steps_per_image) == 0:\n",
+        "        images.append(image.numpy())\n",
+        "      target_image_difference = loss_fn(image, target_image[:,:,:3])\n",
+        "      # The latent vectors were sampled from a normal distribution. We can get\n",
+        "      # more realistic images if we regularize the length of the latent vector to \n",
+        "      # the average length of vector from this distribution.\n",
+        "      regularizer = tf.abs(tf.norm(vector) - np.sqrt(latent_dim))\n",
+        "      \n",
+        "      loss = target_image_difference + regularizer\n",
+        "      losses.append(loss.numpy())\n",
+        "    grads = tape.gradient(loss, [vector])\n",
+        "    optimizer.apply_gradients(zip(grads, [vector]))\n",
+        "    \n",
+        "  return images, losses\n",
+        "\n",
+        "\n",
+        "num_optimization_steps=200\n",
+        "steps_per_image=5\n",
+        "images, loss = find_closest_latent_vector(initial_vector, num_optimization_steps, steps_per_image)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "pRbeF2oSAcOB"
+      },
+      "outputs": [],
+      "source": [
+        "plt.plot(loss)\n",
+        "plt.ylim([0,max(plt.ylim())])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "KnZkDy2FEsTt"
+      },
+      "outputs": [],
+      "source": [
+        "animate(np.stack(images))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GGKfuCdfPQKH"
+      },
+      "source": [
+        "Compare the result to the target:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "TK1P5z3bNuIl"
+      },
+      "outputs": [],
+      "source": [
+        "display_image(np.concatenate([images[-1], target_image], axis=1))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tDt15dLsJwMy"
+      },
+      "source": [
+        "### Playing with the above example\n",
+        "If image is from the module space, the descent is quick and converges to a reasonable sample. Try out descending to an image that is **not from the module space**. The descent will only converge if the image is reasonably close to the space of training images.\n",
+        "\n",
+        "How to make it descend faster and to a more realistic image? One can try:\n",
+        "* using different loss on the image difference, e.g., quadratic,\n",
+        "* using different regularizer on the latent vector,\n",
+        "* initializing from a random vector in multiple runs,\n",
+        "* etc.\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [
+        "N6ZDpd9XzFeN"
+      ],
+      "name": "tf_hub_generative_image_module.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/tweening_conv3d.ipynb b/site/en/hub/tutorials/tweening_conv3d.ipynb
new file mode 100644
index 00000000000..8c53929021f
--- /dev/null
+++ b/site/en/hub/tutorials/tweening_conv3d.ipynb
@@ -0,0 +1,297 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wC0PtNm3Sa_T"
+      },
+      "source": [
+        "##### Copyright 2019 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "hgOqPjRKSa-7"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "oKAkxAYuONU6"
+      },
+      "source": [
+        "# Video Inbetweening using 3D Convolutions\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfBg1C5NB3X0"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/tweening_conv3d\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/tweening_conv3d.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/tweening_conv3d.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/tweening_conv3d.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/google/tweening_conv3d_bair/1\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub model</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "cvMgkVIBpT-Y"
+      },
+      "source": [
+        "Yunpeng Li, Dominik Roblek, and Marco Tagliasacchi. From Here to There: Video Inbetweening Using Direct 3D Convolutions, 2019.\n",
+        "\n",
+        "https://arxiv.org/abs/1905.10240\n",
+        "\n",
+        "\n",
+        "Current Hub characteristics:\n",
+        "- has models for BAIR Robot pushing videos and KTH action video dataset (though this colab uses only BAIR)\n",
+        "- BAIR dataset already available in Hub. However, KTH videos need to be supplied by the users themselves.\n",
+        "- only evaluation (video generation) for now\n",
+        "- batch size and frame size are hard-coded\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Q4DN769E2O_R"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "EsQFWvxrYrHg"
+      },
+      "source": [
+        "Since `tfds.load('bair_robot_pushing_small', split='test')` would download a 30GB archive that also contains the training data, we download a separated archive that only contains the 190MB test data. The used dataset has been published by [this paper](https://arxiv.org/abs/1710.05268) and is licensed as Creative Commons BY 4.0."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "GhIKakhc7JYL"
+      },
+      "outputs": [],
+      "source": [
+        "import tensorflow as tf\n",
+        "\n",
+        "import matplotlib.pyplot as plt\n",
+        "import numpy as np\n",
+        "import seaborn as sns\n",
+        "import tensorflow_hub as hub\n",
+        "import tensorflow_datasets as tfds\n",
+        "\n",
+        "from tensorflow_datasets.core import SplitGenerator\n",
+        "from tensorflow_datasets.video.bair_robot_pushing import BairRobotPushingSmall\n",
+        "\n",
+        "import tempfile\n",
+        "import pathlib\n",
+        "\n",
+        "TEST_DIR = pathlib.Path(tempfile.mkdtemp()) / \"bair_robot_pushing_small/softmotion30_44k/test/\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "zBMz14GmYkwz"
+      },
+      "outputs": [],
+      "source": [
+        "# Download the test split to $TEST_DIR\n",
+        "!mkdir -p $TEST_DIR\n",
+        "!wget -nv https://storage.googleapis.com/download.tensorflow.org/data/bair_test_traj_0_to_255.tfrecords -O $TEST_DIR/traj_0_to_255.tfrecords"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "irRJ2Q0iYoW0"
+      },
+      "outputs": [],
+      "source": [
+        "# Since the dataset builder expects the train and test split to be downloaded,\n",
+        "# patch it so it only expects the test data to be available\n",
+        "builder = BairRobotPushingSmall()\n",
+        "test_generator = SplitGenerator(name='test', gen_kwargs={\"filedir\": str(TEST_DIR)})\n",
+        "builder._split_generators = lambda _: [test_generator]\n",
+        "builder.download_and_prepare()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "iaGU8hhBPi_6"
+      },
+      "source": [
+        "## BAIR: Demo based on numpy array inputs"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "IgWmW8YzEiDo"
+      },
+      "outputs": [],
+      "source": [
+        "# @title Load some example data (BAIR).\n",
+        "batch_size = 16\n",
+        "\n",
+        "# If unable to download the dataset automatically due to \"not enough disk space\", please download manually to Google Drive and\n",
+        "# load using tf.data.TFRecordDataset.\n",
+        "ds = builder.as_dataset(split=\"test\")\n",
+        "test_videos = ds.batch(batch_size)\n",
+        "first_batch = next(iter(test_videos))\n",
+        "input_frames = first_batch['image_aux1'][:, ::15]\n",
+        "input_frames = tf.cast(input_frames, tf.float32)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "96Jd5XefGHRr"
+      },
+      "outputs": [],
+      "source": [
+        "# @title Visualize loaded videos start and end frames.\n",
+        "\n",
+        "print('Test videos shape [batch_size, start/end frame, height, width, num_channels]: ', input_frames.shape)\n",
+        "sns.set_style('white')\n",
+        "plt.figure(figsize=(4, 2*batch_size))\n",
+        "\n",
+        "for i in range(batch_size)[:4]:\n",
+        "  plt.subplot(batch_size, 2, 1 + 2*i)\n",
+        "  plt.imshow(input_frames[i, 0] / 255.0)\n",
+        "  plt.title('Video {}: First frame'.format(i))\n",
+        "  plt.axis('off')\n",
+        "  plt.subplot(batch_size, 2, 2 + 2*i)\n",
+        "  plt.imshow(input_frames[i, 1] / 255.0)\n",
+        "  plt.title('Video {}: Last frame'.format(i))\n",
+        "  plt.axis('off')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "w0FFhkikQABy"
+      },
+      "source": [
+        "### Load Hub Module"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "cLAUiWfEQAB5"
+      },
+      "outputs": [],
+      "source": [
+        "hub_handle = 'https://tfhub.dev/google/tweening_conv3d_bair/1'\n",
+        "module = hub.load(hub_handle).signatures['default']"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PVHTdXnhbGsK"
+      },
+      "source": [
+        "### Generate and show the videos"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "FHAwBW-zyegP"
+      },
+      "outputs": [],
+      "source": [
+        "filled_frames = module(input_frames)['default'] / 255.0"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "tVesWHTnSW1Z"
+      },
+      "outputs": [],
+      "source": [
+        "# Show sequences of generated video frames.\n",
+        "\n",
+        "# Concatenate start/end frames and the generated filled frames for the new videos.\n",
+        "generated_videos = np.concatenate([input_frames[:, :1] / 255.0, filled_frames, input_frames[:, 1:] / 255.0], axis=1)\n",
+        "\n",
+        "for video_id in range(4):\n",
+        "  fig = plt.figure(figsize=(10 * 2, 2))\n",
+        "  for frame_id in range(1, 16):\n",
+        "    ax = fig.add_axes([frame_id * 1 / 16., 0, (frame_id + 1) * 1 / 16., 1],\n",
+        "                      xmargin=0, ymargin=0)\n",
+        "    ax.imshow(generated_videos[video_id, frame_id])\n",
+        "    ax.axis('off')"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [
+        "Q4DN769E2O_R"
+      ],
+      "name": "tweening_conv3d.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/wav2vec2_saved_model_finetuning.ipynb b/site/en/hub/tutorials/wav2vec2_saved_model_finetuning.ipynb
new file mode 100644
index 00000000000..879bdbd0edb
--- /dev/null
+++ b/site/en/hub/tutorials/wav2vec2_saved_model_finetuning.ipynb
@@ -0,0 +1,984 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yCs7P9JTMlzV"
+      },
+      "source": [
+        "##### Copyright 2021 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Jqn-HYw-Mkea"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Copyright 2021 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "stRetE8gMlmZ"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/wav2vec2_saved_model_finetuning\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/wav2vec2_saved_model_finetuning.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/wav2vec2_saved_model_finetuning.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/wav2vec2_saved_model_finetuning.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/vasudevgupta7/wav2vec2/1\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub model</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ndG8MjmJeicp"
+      },
+      "source": [
+        "# Fine-tuning Wav2Vec2 with an LM head\n",
+        "\n",
+        "In this notebook, we will load the pre-trained wav2vec2 model from [TFHub](https://tfhub.dev) and will fine-tune it on [LibriSpeech dataset](https://huggingface.co/datasets/librispeech_asr) by appending Language Modeling head (LM) over the top of our pre-trained model. The underlying task is to build a model for **Automatic Speech Recognition** i.e. given some speech, the model should be able to transcribe it into text."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rWk8nL6Ui-_0"
+      },
+      "source": [
+        "## Setting Up\n",
+        "\n",
+        "Before running this notebook, please ensure that you are on GPU runtime (`Runtime` > `Change runtime type` > `GPU`). The following cell will install [`gsoc-wav2vec2`](https://github.com/vasudevgupta7/gsoc-wav2vec2) package & its dependencies."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "seqTlMyeZvM4"
+      },
+      "outputs": [],
+      "source": [
+        "!pip3 install -q git+https://github.com/vasudevgupta7/gsoc-wav2vec2@main\n",
+        "!sudo apt-get install -y libsndfile1-dev\n",
+        "!pip3 install -q SoundFile"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wvuJL8-f0zn5"
+      },
+      "source": [
+        "## Model setup using `TFHub`\n",
+        "\n",
+        "We will start by importing some libraries/modules."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "M3_fgx4eZvM7"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "import tensorflow_hub as hub\n",
+        "from wav2vec2 import Wav2Vec2Config\n",
+        "\n",
+        "config = Wav2Vec2Config()\n",
+        "\n",
+        "print(\"TF version:\", tf.__version__)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "y0rVUxyWsS5f"
+      },
+      "source": [
+        "First, we will download our model from TFHub & will wrap our model signature with [`hub.KerasLayer`](https://www.tensorflow.org/hub/api_docs/python/hub/KerasLayer) to be able to use this model like any other Keras layer. Fortunately, `hub.KerasLayer` can do both in just 1 line.\n",
+        "\n",
+        "**Note:** When loading model with `hub.KerasLayer`, model becomes a bit opaque but sometimes we need finer controls over the model, then we can load the model with `tf.keras.models.load_model(...)`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "NO6QRC7KZvM9"
+      },
+      "outputs": [],
+      "source": [
+        "pretrained_layer = hub.KerasLayer(\"https://tfhub.dev/vasudevgupta7/wav2vec2/1\", trainable=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "pCputyVBv2e9"
+      },
+      "source": [
+        "You can refer to this [script](https://github.com/vasudevgupta7/gsoc-wav2vec2/blob/main/src/export2hub.py) in case you are interested in the model exporting script. Object `pretrained_layer` is the freezed version of [`Wav2Vec2Model`](https://github.com/vasudevgupta7/gsoc-wav2vec2/blob/main/src/wav2vec2/modeling.py). These pre-trained weights were converted from HuggingFace PyTorch [pre-trained weights](https://huggingface.co/facebook/wav2vec2-base) using [this script](https://github.com/vasudevgupta7/gsoc-wav2vec2/blob/main/src/convert_torch_to_tf.py).\n",
+        "\n",
+        "Originally, wav2vec2 was pre-trained with a masked language modelling approach with the objective to identify the true quantized latent speech representation for a masked time step. You can read more about the training objective in the paper- [wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations](https://arxiv.org/abs/2006.11477)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "SseDnCr7hyhC"
+      },
+      "source": [
+        "Now, we will be defining a few constants and hyper-parameters which will be useful in the next few cells. `AUDIO_MAXLEN` is intentionally set to `246000` as the model signature only accepts static sequence length of `246000`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "eiILuMBERxlO"
+      },
+      "outputs": [],
+      "source": [
+        "AUDIO_MAXLEN = 246000\n",
+        "LABEL_MAXLEN = 256\n",
+        "BATCH_SIZE = 2"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "1V4gTgGLgXvO"
+      },
+      "source": [
+        "In the following cell, we will wrap `pretrained_layer` & a dense layer (LM head) with the [Keras's Functional API](https://www.tensorflow.org/guide/keras/functional)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "a3CUN1KEB10Q"
+      },
+      "outputs": [],
+      "source": [
+        "inputs = tf.keras.Input(shape=(AUDIO_MAXLEN,))\n",
+        "hidden_states = pretrained_layer(inputs)\n",
+        "outputs = tf.keras.layers.Dense(config.vocab_size)(hidden_states)\n",
+        "\n",
+        "model = tf.keras.Model(inputs=inputs, outputs=outputs)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5zDXuoMXhDMo"
+      },
+      "source": [
+        "The dense layer (defined above) is having an output dimension of `vocab_size` as we want to predict probabilities of each token in the vocabulary at each time step."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "oPp18ZHRtnq-"
+      },
+      "source": [
+        "## Setting up training state"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ATQy1ZK3vFr7"
+      },
+      "source": [
+        "In TensorFlow, model weights are built only when `model.call` or `model.build` is called for the first time, so the following cell will build the model weights for us. Further, we will be running `model.summary()` to check the total number of trainable parameters."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ZgL5wyaXZvM-"
+      },
+      "outputs": [],
+      "source": [
+        "model(tf.random.uniform(shape=(BATCH_SIZE, AUDIO_MAXLEN)))\n",
+        "model.summary()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "EQxxA4Fevp7m"
+      },
+      "source": [
+        "Now, we need to define the `loss_fn` and optimizer to be able to train the model. The following cell will do that for us. We will be using the `Adam` optimizer for simplicity. `CTCLoss` is a common loss type that is used for tasks (like `ASR`) where input sub-parts can't be easily aligned with output sub-parts. You can read more about CTC-loss from this amazing [blog post](https://distill.pub/2017/ctc/).\n",
+        "\n",
+        "\n",
+        "`CTCLoss` (from [`gsoc-wav2vec2`](https://github.com/vasudevgupta7/gsoc-wav2vec2) package) accepts 3 arguments: `config`, `model_input_shape` & `division_factor`. If `division_factor=1`, then loss will simply get summed, so pass `division_factor` accordingly to get mean over batch."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "glDepVEHZvM_"
+      },
+      "outputs": [],
+      "source": [
+        "from wav2vec2 import CTCLoss\n",
+        "\n",
+        "LEARNING_RATE = 5e-5\n",
+        "\n",
+        "loss_fn = CTCLoss(config, (BATCH_SIZE, AUDIO_MAXLEN), division_factor=BATCH_SIZE)\n",
+        "optimizer = tf.keras.optimizers.Adam(LEARNING_RATE)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "1mvTuOXpwsQe"
+      },
+      "source": [
+        "## Loading & Pre-processing data\n",
+        "\n",
+        "Let's now download the LibriSpeech dataset from the [official website](http://www.openslr.org/12) and set it up."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "I4kIEC77cBCM"
+      },
+      "outputs": [],
+      "source": [
+        "!wget https://www.openslr.org/resources/12/dev-clean.tar.gz -P ./data/train/\n",
+        "!tar -xf ./data/train/dev-clean.tar.gz -C ./data/train/"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "LsQpmpn6jrMI"
+      },
+      "source": [
+        "**Note:** We are using `dev-clean` configuration as this notebook is just for demonstration purposes, so we need a small amount of data. Complete training data can be easily downloaded from [LibriSpeech website](http://www.openslr.org/12)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ynxAjtGHGFpM"
+      },
+      "outputs": [],
+      "source": [
+        "ls ./data/train/"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yBMiORo0xJD0"
+      },
+      "source": [
+        "Our dataset lies in the LibriSpeech directory. Let's explore these files."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "jkIu_Wt4ZvNA"
+      },
+      "outputs": [],
+      "source": [
+        "data_dir = \"./data/train/LibriSpeech/dev-clean/2428/83705/\"\n",
+        "all_files = os.listdir(data_dir)\n",
+        "\n",
+        "flac_files = [f for f in all_files if f.endswith(\".flac\")]\n",
+        "txt_files = [f for f in all_files if f.endswith(\".txt\")]\n",
+        "\n",
+        "print(\"Transcription files:\", txt_files, \"\\nSound files:\", flac_files)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XEObi_Apk3ZD"
+      },
+      "source": [
+        "Alright, so each sub-directory has many `.flac` files and a `.txt` file. The `.txt` file contains text transcriptions for all the speech samples (i.e. `.flac` files) present in that sub-directory."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WYW6WKJflO2e"
+      },
+      "source": [
+        "We can load this text data as follows:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "cEBKxQblHPwq"
+      },
+      "outputs": [],
+      "source": [
+        "def read_txt_file(f):\n",
+        "  with open(f, \"r\") as f:\n",
+        "    samples = f.read().split(\"\\n\")\n",
+        "    samples = {s.split()[0]: \" \".join(s.split()[1:]) for s in samples if len(s.split()) > 2}\n",
+        "  return samples"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ldkf_ceb0_YW"
+      },
+      "source": [
+        "Similarly, we will define a function for loading a speech sample from a `.flac` file.\n",
+        "\n",
+        "`REQUIRED_SAMPLE_RATE` is set to `16000` as wav2vec2 was pre-trained with `16K` frequency and it's recommended to fine-tune it without any major change in data distribution due to frequency."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "YOJ3OzPsTyXv"
+      },
+      "outputs": [],
+      "source": [
+        "import soundfile as sf\n",
+        "\n",
+        "REQUIRED_SAMPLE_RATE = 16000\n",
+        "\n",
+        "def read_flac_file(file_path):\n",
+        "  with open(file_path, \"rb\") as f:\n",
+        "      audio, sample_rate = sf.read(f)\n",
+        "  if sample_rate != REQUIRED_SAMPLE_RATE:\n",
+        "      raise ValueError(\n",
+        "          f\"sample rate (={sample_rate}) of your files must be {REQUIRED_SAMPLE_RATE}\"\n",
+        "      )\n",
+        "  file_id = os.path.split(file_path)[-1][:-len(\".flac\")]\n",
+        "  return {file_id: audio}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2sxDN8P4nWkW"
+      },
+      "source": [
+        "Now, we will pick some random samples & will try to visualize them."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "HI5J-2Dfm_wT"
+      },
+      "outputs": [],
+      "source": [
+        "from IPython.display import Audio\n",
+        "import random\n",
+        "\n",
+        "file_id = random.choice([f[:-len(\".flac\")] for f in flac_files])\n",
+        "flac_file_path, txt_file_path = os.path.join(data_dir, f\"{file_id}.flac\"), os.path.join(data_dir, \"2428-83705.trans.txt\")\n",
+        "\n",
+        "print(\"Text Transcription:\", read_txt_file(txt_file_path)[file_id], \"\\nAudio:\")\n",
+        "Audio(filename=flac_file_path)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "M8jJ7Ed81p_A"
+      },
+      "source": [
+        "Now, we will combine all the speech & text samples and will define the function (in next cell) for that purpose."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "MI-5YCzaTsei"
+      },
+      "outputs": [],
+      "source": [
+        "def fetch_sound_text_mapping(data_dir):\n",
+        "  all_files = os.listdir(data_dir)\n",
+        "\n",
+        "  flac_files = [os.path.join(data_dir, f) for f in all_files if f.endswith(\".flac\")]\n",
+        "  txt_files = [os.path.join(data_dir, f) for f in all_files if f.endswith(\".txt\")]\n",
+        "\n",
+        "  txt_samples = {}\n",
+        "  for f in txt_files:\n",
+        "    txt_samples.update(read_txt_file(f))\n",
+        "\n",
+        "  speech_samples = {}\n",
+        "  for f in flac_files:\n",
+        "    speech_samples.update(read_flac_file(f))\n",
+        "\n",
+        "  assert len(txt_samples) == len(speech_samples)\n",
+        "\n",
+        "  samples = [(speech_samples[file_id], txt_samples[file_id]) for file_id in speech_samples.keys() if len(speech_samples[file_id]) < AUDIO_MAXLEN]\n",
+        "  return samples"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mx95Lxvu0nT4"
+      },
+      "source": [
+        "It's time to have a look at a few samples ..."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "_Ls7X_jqIz4R"
+      },
+      "outputs": [],
+      "source": [
+        "samples = fetch_sound_text_mapping(data_dir)\n",
+        "samples[:5]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "TUjhSWfsnlCL"
+      },
+      "source": [
+        "Note: We are loading this data into memory as we working with a small amount of dataset in this notebook. But for training on the complete dataset (~300 GBs), you will have to load data lazily. You can refer to [this script](https://github.com/vasudevgupta7/gsoc-wav2vec2/blob/main/src/data_utils.py) to know more on that."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "xg8Zia1kzw0J"
+      },
+      "source": [
+        "Let's pre-process the data now !!!\n",
+        "\n",
+        "We will first define the tokenizer & processor using `gsoc-wav2vec2` package. Then, we will do very simple pre-processing. `processor` will normalize raw speech w.r.to frames axis and `tokenizer` will convert our model outputs into the string (using the defined vocabulary) & will take care of the removal of special tokens (depending on your tokenizer configuration)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "gaat_hMLNVHF"
+      },
+      "outputs": [],
+      "source": [
+        "from wav2vec2 import Wav2Vec2Processor\n",
+        "tokenizer = Wav2Vec2Processor(is_tokenizer=True)\n",
+        "processor = Wav2Vec2Processor(is_tokenizer=False)\n",
+        "\n",
+        "def preprocess_text(text):\n",
+        "  label = tokenizer(text)\n",
+        "  return tf.constant(label, dtype=tf.int32)\n",
+        "\n",
+        "def preprocess_speech(audio):\n",
+        "  audio = tf.constant(audio, dtype=tf.float32)\n",
+        "  return processor(tf.transpose(audio))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GyKl8QP-zRFC"
+      },
+      "source": [
+        "Now, we will define the python generator to call the preprocessing functions we defined in above cells."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "PoQrRalwMpQ6"
+      },
+      "outputs": [],
+      "source": [
+        "def inputs_generator():\n",
+        "  for speech, text in samples:\n",
+        "    yield preprocess_speech(speech), preprocess_text(text)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "7Vlm3ySFULsG"
+      },
+      "source": [
+        "## Setting up `tf.data.Dataset`\n",
+        "\n",
+        "Following cell will setup `tf.data.Dataset` object using its `.from_generator(...)` method. We will be using the `generator` object, we defined in the above cell.\n",
+        "\n",
+        "**Note:** For distributed training (especially on TPUs), `.from_generator(...)` doesn't work currently and it is recommended to train on data stored in `.tfrecord` format (Note: The TFRecords should ideally be stored inside a GCS Bucket in order for the TPUs to work to the fullest extent).\n",
+        "\n",
+        "You can refer to [this script](https://github.com/vasudevgupta7/gsoc-wav2vec2/blob/main/src/make_tfrecords.py) for more details on how to convert LibriSpeech data into tfrecords."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "LbQ_dMwGO62h"
+      },
+      "outputs": [],
+      "source": [
+        "output_signature = (\n",
+        "    tf.TensorSpec(shape=(None),  dtype=tf.float32),\n",
+        "    tf.TensorSpec(shape=(None), dtype=tf.int32),\n",
+        ")\n",
+        "\n",
+        "dataset = tf.data.Dataset.from_generator(inputs_generator, output_signature=output_signature)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "HXBbNsRyPyw3"
+      },
+      "outputs": [],
+      "source": [
+        "BUFFER_SIZE = len(flac_files)\n",
+        "SEED = 42\n",
+        "\n",
+        "dataset = dataset.shuffle(BUFFER_SIZE, seed=SEED)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9DAUmns3pXfr"
+      },
+      "source": [
+        "We will pass the dataset into multiple batches, so let's prepare batches in the following cell. Now, all the sequences in a batch should be padded to a constant length. We will use the`.padded_batch(...)` method for that purpose."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Okhko1IWRida"
+      },
+      "outputs": [],
+      "source": [
+        "dataset = dataset.padded_batch(BATCH_SIZE, padded_shapes=(AUDIO_MAXLEN, LABEL_MAXLEN), padding_values=(0.0, 0))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "A45CjQG5qSbV"
+      },
+      "source": [
+        "Accelerators (like GPUs/TPUs) are very fast and often data-loading (& pre-processing) becomes the bottleneck during training as the data-loading part happens on CPUs. This can increase the training time significantly especially when there is a lot of online pre-processing involved or data is streamed online from GCS buckets. To handle those issues, `tf.data.Dataset` offers the `.prefetch(...)` method. This method helps in preparing the next few batches in parallel (on CPUs) while the model is making predictions (on GPUs/TPUs) on the current batch."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "f-bKu2YjRior"
+      },
+      "outputs": [],
+      "source": [
+        "dataset = dataset.prefetch(tf.data.AUTOTUNE)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Lqk2cs6LxVIh"
+      },
+      "source": [
+        "Since this notebook is made for demonstration purposes, we will be taking first `num_train_batches` and will perform training over only that. You are encouraged to train on the whole dataset though. Similarly, we will evaluate only `num_val_batches`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "z6GO5oYUxXtz"
+      },
+      "outputs": [],
+      "source": [
+        "num_train_batches = 10\n",
+        "num_val_batches = 4\n",
+        "\n",
+        "train_dataset = dataset.take(num_train_batches)\n",
+        "val_dataset = dataset.skip(num_train_batches).take(num_val_batches)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "CzAOI78tky08"
+      },
+      "source": [
+        "## Model training\n",
+        "\n",
+        "For training our model, we will be directly calling `.fit(...)` method after compiling our model with `.compile(...)`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "vuBY2sZElgwg"
+      },
+      "outputs": [],
+      "source": [
+        "model.compile(optimizer, loss=loss_fn)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "qswxafSl0HjO"
+      },
+      "source": [
+        "The above cell will set up our training state. Now we can initiate training with the `.fit(...)` method."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "vtuSfnj1l-I_"
+      },
+      "outputs": [],
+      "source": [
+        "history = model.fit(train_dataset, validation_data=val_dataset, epochs=3)\n",
+        "history.history"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ySvp8r2E1q_V"
+      },
+      "source": [
+        "Let's save our model with `.save(...)` method to be able to perform inference later. You can also export this SavedModel to TFHub by following [TFHub documentation](https://www.tensorflow.org/hub/publish)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "C0KEYcwydwjF"
+      },
+      "outputs": [],
+      "source": [
+        "save_dir = \"finetuned-wav2vec2\"\n",
+        "model.save(save_dir, include_optimizer=False)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MkOpp9rZ211t"
+      },
+      "source": [
+        "Note: We are setting `include_optimizer=False` as we want to use this model for inference only."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "SJfPlTgezD0i"
+      },
+      "source": [
+        "## Evaluation\n",
+        "\n",
+        "Now we will be computing Word Error Rate over the validation dataset\n",
+        "\n",
+        "**Word error rate** (WER) is a common metric for measuring the performance of an automatic speech recognition system. The WER is derived from the Levenshtein distance, working at the word level. Word error rate can then be computed as: WER = (S + D + I) / N = (S + D + I) / (S + D + C) where S is the number of substitutions, D is the number of deletions, I is the number of insertions, C is the number of correct words, N is the number of words in the reference (N=S+D+C). This value indicates the percentage of words that were incorrectly predicted. \n",
+        "\n",
+        "You can refer to [this paper](https://www.isca-speech.org/archive_v0/interspeech_2004/i04_2765.html) to learn more about WER."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Io_91Y7-r3xu"
+      },
+      "source": [
+        "We will use `load_metric(...)` function from [HuggingFace datasets](https://huggingface.co/docs/datasets/) library. Let's first install the `datasets` library using `pip` and then define the `metric` object."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "GW9F_oVDU1TZ"
+      },
+      "outputs": [],
+      "source": [
+        "!pip3 install -q datasets\n",
+        "\n",
+        "from datasets import load_metric\n",
+        "metric = load_metric(\"wer\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ssWXWc7CZvNB"
+      },
+      "outputs": [],
+      "source": [
+        "@tf.function(jit_compile=True)\n",
+        "def eval_fwd(batch):\n",
+        "  logits = model(batch, training=False)\n",
+        "  return tf.argmax(logits, axis=-1)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "NFh1myg1x4ua"
+      },
+      "source": [
+        "It's time to run the evaluation on validation data now."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "EQTFVjZghckJ"
+      },
+      "outputs": [],
+      "source": [
+        "from tqdm.auto import tqdm\n",
+        "\n",
+        "for speech, labels in tqdm(val_dataset, total=num_val_batches):\n",
+        "    predictions  = eval_fwd(speech)\n",
+        "    predictions = [tokenizer.decode(pred) for pred in predictions.numpy().tolist()]\n",
+        "    references = [tokenizer.decode(label, group_tokens=False) for label in labels.numpy().tolist()]\n",
+        "    metric.add_batch(references=references, predictions=predictions)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WWCc8qBesv3e"
+      },
+      "source": [
+        "We are using the `tokenizer.decode(...)` method for decoding our predictions and labels back into the text and will add them to the metric for `WER` computation later."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XI_URj8Wtb2g"
+      },
+      "source": [
+        "Now, let's calculate the metric value in following cell:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "a83wekLgWMod"
+      },
+      "outputs": [],
+      "source": [
+        "metric.compute()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "c_cD1OgVEjl4"
+      },
+      "source": [
+        "**Note:** Here metric value doesn't make any sense as the model is trained on very small data and ASR-like tasks often require a large amount of data to learn a mapping from speech to text. You should probably train on large data to get some good results. This notebook gives you a template to fine-tune a pre-trained speech model."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "G14o706kdTE1"
+      },
+      "source": [
+        "## Inference\n",
+        "\n",
+        "Now that we are satisfied with the training process & have saved the model in `save_dir`, we will see how this model can be used for inference.\n",
+        "\n",
+        "First, we will load our model using `tf.keras.models.load_model(...)`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "wrTrExiUdaED"
+      },
+      "outputs": [],
+      "source": [
+        "finetuned_model = tf.keras.models.load_model(save_dir)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "luodSroz20SR"
+      },
+      "source": [
+        "Let's download some speech samples for performing inference. You can replace the following sample with your speech sample also."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "HUE0shded6Ej"
+      },
+      "outputs": [],
+      "source": [
+        "!wget https://github.com/vasudevgupta7/gsoc-wav2vec2/raw/main/data/SA2.wav"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ycBjU_U53FjL"
+      },
+      "source": [
+        "Now, we will read the speech sample using `soundfile.read(...)` and pad it to `AUDIO_MAXLEN` to satisfy the model signature. Then we will normalize that speech sample using the `Wav2Vec2Processor` instance & will feed it into the model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "z7CARje4d5_H"
+      },
+      "outputs": [],
+      "source": [
+        "import numpy as np\n",
+        "\n",
+        "speech, _ = sf.read(\"SA2.wav\")\n",
+        "speech = np.pad(speech, (0, AUDIO_MAXLEN - len(speech)))\n",
+        "speech = tf.expand_dims(processor(tf.constant(speech)), 0)\n",
+        "\n",
+        "outputs = finetuned_model(speech)\n",
+        "outputs"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "lUSttSPa30qP"
+      },
+      "source": [
+        "Let's decode numbers back into text sequence using the `Wav2Vec2tokenizer` instance, we defined above."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "RYdJqxQ4llgI"
+      },
+      "outputs": [],
+      "source": [
+        "predictions = tf.argmax(outputs, axis=-1)\n",
+        "predictions = [tokenizer.decode(pred) for pred in predictions.numpy().tolist()]\n",
+        "predictions"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "7DXC757bztJc"
+      },
+      "source": [
+        "This prediction is quite random as the model was never trained on large data in this notebook (as this notebook is not meant for doing complete training). You will get good predictions if you train this model on complete LibriSpeech dataset.\n",
+        "\n",
+        "Finally, we have reached an end to this notebook. But it's not an end of learning TensorFlow for speech-related tasks, this [repository](https://github.com/tulasiram58827/TTS_TFLite) contains some more amazing tutorials. In case you encountered any bug in this notebook, please create an issue [here](https://github.com/vasudevgupta7/gsoc-wav2vec2/issues)."
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [
+        "rWk8nL6Ui-_0",
+        "wvuJL8-f0zn5",
+        "oPp18ZHRtnq-",
+        "1mvTuOXpwsQe",
+        "7Vlm3ySFULsG",
+        "CzAOI78tky08",
+        "SJfPlTgezD0i",
+        "G14o706kdTE1"
+      ],
+      "name": "wav2vec2_saved_model_finetuning.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/wiki40b_lm.ipynb b/site/en/hub/tutorials/wiki40b_lm.ipynb
new file mode 100644
index 00000000000..ad94ce0aab8
--- /dev/null
+++ b/site/en/hub/tutorials/wiki40b_lm.ipynb
@@ -0,0 +1,451 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Oxb_tjw13y4G"
+      },
+      "source": [
+        "##### Copyright 2019 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "EAkh2aBJLg6q"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "owAopeOtirc9"
+      },
+      "source": [
+        "# Wiki40B Language Models\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "T-nCyGRri-KO"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/wiki40b_lm\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/wiki40b_lm.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/wiki40b_lm.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/wiki40b_lm.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/google/collections/wiki40b-lm/1\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub models</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8eY9jkGpjf3d"
+      },
+      "source": [
+        "Generate Wikipedia-like text using the **Wiki40B language models** from [TensorFlow Hub](https://tfhub.dev)!\n",
+        "\n",
+        "This notebook illustrates how to:\n",
+        "* Load the 41 monolingual and 2 multilingual language models that are part of the [Wiki40b-LM collection](https://tfhub.dev/google/collections/wiki40b-lm/1) on TF-Hub\n",
+        "* Use the models to obtain perplexity, per layer activations, and word embeddings for a given piece of text\n",
+        "* Generate text token-by-token from a piece of seed text\n",
+        "\n",
+        "The language models are trained on the newly published, cleaned-up [Wiki40B dataset](https://www.tensorflow.org/datasets/catalog/wiki40b) available on TensorFlow Datasets. The training setup is based on the paper [“Wiki-40B: Multilingual Language Model Dataset”](https://research.google/pubs/pub49029/)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wK2YnrEhLjDf"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "sv2CmI7BdaML"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Installing Dependencies\n",
+        "!pip install --quiet \"tensorflow-text==2.11.*\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "8uSkaQ-Vdon2"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Imports\n",
+        "import numpy as np\n",
+        "import tensorflow.compat.v1 as tf\n",
+        "import tensorflow_hub as hub\n",
+        "import tensorflow_text as tf_text\n",
+        "\n",
+        "tf.disable_eager_execution()\n",
+        "tf.logging.set_verbosity(tf.logging.WARN)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "d2MvP-cyL-BN"
+      },
+      "source": [
+        "## Choose Language\n",
+        "\n",
+        "Let's choose **which language model** to load from TF-Hub and the **length of text** to be generated. \n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "33zYlSXwMA_o"
+      },
+      "outputs": [],
+      "source": [
+        "#@title { run: \"auto\" }\n",
+        "language = \"en\" #@param [\"en\", \"ar\", \"zh-cn\", \"zh-tw\", \"nl\", \"fr\", \"de\", \"it\", \"ja\", \"ko\", \"pl\", \"pt\", \"ru\", \"es\", \"th\", \"tr\", \"bg\", \"ca\", \"cs\", \"da\", \"el\", \"et\", \"fa\", \"fi\", \"he\", \"hi\", \"hr\", \"hu\", \"id\", \"lt\", \"lv\", \"ms\", \"no\", \"ro\", \"sk\", \"sl\", \"sr\", \"sv\", \"tl\", \"uk\", \"vi\", \"multilingual-64k\", \"multilingual-128k\"]\n",
+        "hub_module = \"https://tfhub.dev/google/wiki40b-lm-{}/1\".format(language)\n",
+        "max_gen_len = 20 #@param\n",
+        "\n",
+        "print(\"Using the {} model to generate sequences of max length {}.\".format(hub_module, max_gen_len))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dgw2qW4xZbMj"
+      },
+      "source": [
+        "## Build the Model\n",
+        "\n",
+        "Okay, now that we've configured which pre-trained model to use, let's configure it to generate text up to `max_gen_len`. We will need to load the language model from TF-Hub, feed in a piece of starter text, and then iteratively feed in tokens as they are generated."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "pUypKuc3Mlpa"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Load the language model pieces\n",
+        "g = tf.Graph()\n",
+        "n_layer = 12\n",
+        "model_dim = 768\n",
+        "\n",
+        "with g.as_default():\n",
+        "  text = tf.placeholder(dtype=tf.string, shape=(1,))\n",
+        "\n",
+        "  # Load the pretrained model from TF-Hub\n",
+        "  module = hub.Module(hub_module)\n",
+        "\n",
+        "  # Get the word embeddings, activations at each layer, negative log likelihood\n",
+        "  # of the text, and calculate the perplexity.\n",
+        "  embeddings = module(dict(text=text), signature=\"word_embeddings\", as_dict=True)[\"word_embeddings\"]\n",
+        "  activations = module(dict(text=text), signature=\"activations\", as_dict=True)[\"activations\"]\n",
+        "  neg_log_likelihood = module(dict(text=text), signature=\"neg_log_likelihood\", as_dict=True)[\"neg_log_likelihood\"]\n",
+        "  ppl = tf.exp(tf.reduce_mean(neg_log_likelihood, axis=1))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "ZOS2Z2n0MsuC"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Construct the per-token generation graph\n",
+        "def feedforward_step(module, inputs, mems):\n",
+        "  \"\"\"Generate one step.\"\"\"\n",
+        "  # Set up the input dict for one step of generation\n",
+        "  inputs = tf.dtypes.cast(inputs, tf.int64)\n",
+        "  generation_input_dict = dict(input_tokens=inputs)\n",
+        "  mems_dict = {\"mem_{}\".format(i): mems[i] for i in range(n_layer)}\n",
+        "  generation_input_dict.update(mems_dict)\n",
+        "\n",
+        "  # Generate the tokens from the language model\n",
+        "  generation_outputs = module(generation_input_dict, signature=\"prediction\", as_dict=True)\n",
+        "\n",
+        "  # Get the probabilities and the inputs for the next steps\n",
+        "  probs = generation_outputs[\"probs\"]\n",
+        "  new_mems = [generation_outputs[\"new_mem_{}\".format(i)] for i in range(n_layer)]\n",
+        "\n",
+        "  return probs, new_mems"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "S9ss6amQMyVY"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Build the statically unrolled graph for `max_gen_len` tokens\n",
+        "with g.as_default():\n",
+        "  # Tokenization with the sentencepiece model.\n",
+        "  token_ids = module(dict(text=text), signature=\"tokenization\", as_dict=True)[\"token_ids\"]\n",
+        "  inputs_np = token_ids\n",
+        "  # Generate text by statically unrolling the computational graph\n",
+        "  mems_np = [np.zeros([1, 0, model_dim], dtype=np.float32) for _ in range(n_layer)]\n",
+        "\n",
+        "  # Generate up to `max_gen_len` tokens\n",
+        "  sampled_ids = []\n",
+        "  for step in range(max_gen_len):\n",
+        "    probs, mems_np = feedforward_step(module, inputs_np, mems_np)\n",
+        "    sampled_id = tf.random.categorical(tf.math.log(probs[0]), num_samples=1, dtype=tf.int32)\n",
+        "    sampled_id = tf.squeeze(sampled_id)\n",
+        "    sampled_ids.append(sampled_id)\n",
+        "    inputs_np = tf.reshape(sampled_id, [1, 1])\n",
+        "\n",
+        "  # Transform the ids into text\n",
+        "  sampled_ids = tf.expand_dims(sampled_ids, axis=0)\n",
+        "  generated_text = module(dict(token_ids=sampled_ids), signature=\"detokenization\", as_dict=True)[\"text\"]\n",
+        "\n",
+        "  init_op = tf.group([tf.global_variables_initializer(), tf.tables_initializer()])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "K5SYcRrxM7vS"
+      },
+      "source": [
+        "## Generate some text\n",
+        "\n",
+        "Let's generate some text! We'll set a text `seed` to prompt the language model.\n",
+        "\n",
+        "You can use one of the **predefined** seeds or _optionally_ **enter your own**. This text will be used as seed for the language model to help prompt the language model for what to generate next.\n",
+        "\n",
+        "You can use the following special tokens precede special parts of the generated article. Use **`_START_ARTICLE_`** to indicate the beginning of the article, **`_START_SECTION_`** to indicate the beginning of a section, and **`_START_PARAGRAPH_`** to generate text in the article\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "GmZxv7bzMIcL"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Predefined Seeds\n",
+        "lang_to_seed = {\"en\": \"\\n_START_ARTICLE_\\n1882 Prince Edward Island general election\\n_START_PARAGRAPH_\\nThe 1882 Prince Edward Island election was held on May 8, 1882 to elect members of the House of Assembly of the province of Prince Edward Island, Canada.\",\n",
+        "                \"ar\": \"\\n_START_ARTICLE_\\nأوليفيا كوك\\n_START_SECTION_\\nنشأتها والتعلي \\n_START_PARAGRAPH_\\nولدت أوليفيا كوك في أولدهام في مانشستر الكبرى لأسرة تتكون من أب يعمل كظابط شرطة، وأمها تعمل كممثلة مبيعات. عندما كانت صغيرة بدأت تأخذ دروساً في الباليه الجمباز. وفي المدرسة شاركت في المسرحيات المدرسية، إضافةً إلى عملها في مسرح سندريلا . وفي سن الرابعة عشر عاماً، حصلت على وكيلة لها في مانشستر وهي وقعت عقداً مع وكالة الفنانين المبدعين في مانشستر،\",\n",
+        "                \"zh-cn\": \"\\n_START_ARTICLE_\\n上尾事件\\n_START_SECTION_\\n日本国铁劳资关系恶化\\n_START_PARAGRAPH_\\n由于日本国铁财政恶化，管理层开始重整人手安排，令工会及员工感到受威胁。但日本国铁作为公营企业，其雇员均受公营企业等劳资关系法规管——该法第17条规定公营企业员工不得发动任何罢工行为。为了规避该法例\",\n",
+        "                \"zh-tw\": \"\\n_START_ARTICLE_\\n乌森\\n_START_PARAGRAPH_\\n烏森（法語：Houssen，發音：[usən]；德語：Hausen；阿爾薩斯語：Hüse）是法國上萊茵省的一個市鎮，位於該省北部，屬於科爾馬-里博維萊區（Colmar-Ribeauvillé）第二科爾馬縣（Colmar-2）。該市鎮總面積6.7平方公里，2009年時的人口為\",\n",
+        "                \"nl\": \"\\n_START_ARTICLE_\\n1001 vrouwen uit de Nederlandse geschiedenis\\n_START_SECTION_\\nSelectie van vrouwen\\n_START_PARAGRAPH_\\nDe 'oudste' biografie in het boek is gewijd aan de beschermheilige\",\n",
+        "                \"fr\": \"\\n_START_ARTICLE_\\nꝹ\\n_START_SECTION_\\nUtilisation\\n_START_PARAGRAPH_\\nLe d insulaire est utilisé comme lettre additionnelle dans l’édition de 1941 du recueil de chroniques galloises Brut y Tywysogion\",\n",
+        "                \"de\": \"\\n_START_ARTICLE_\\nÜnal Demirkıran\\n_START_SECTION_\\nLaufbahn\\n_START_PARAGRAPH_\\nDemirkıran debütierte als junges Talent am 25. September 1999 im Auswärtsspiel des SSV Ulm 1846 bei Werder Bremen (2:2) in der Bundesliga, als er kurz\",\n",
+        "                \"it\": \"\\n_START_ARTICLE_\\n28th Street (linea IRT Lexington Avenue)\\n_START_SECTION_\\nStoria\\n_START_PARAGRAPH_\\nLa stazione, i cui lavori di costruzione ebbero inizio nel 1900, venne aperta il 27 ottobre 1904, come\",\n",
+        "                \"ja\": \"\\n_START_ARTICLE_\\nしのぶ・まさみshow'05 恋してラララ\\n_START_SECTION_\\n概要\\n_START_PARAGRAPH_\\n『上海ルーキーSHOW』の打ち切り後に放送された年末特番で、同番組MCの大竹しのぶと久本雅美が恋愛にまつわるテーマでトークや音楽企画を展開していた。基本は女\",\n",
+        "                \"ko\": \"\\n_START_ARTICLE_\\n녹턴, Op. 9 (쇼팽)\\n_START_SECTION_\\n녹턴 3번 나장조\\n_START_PARAGRAPH_\\n쇼팽의 녹턴 3번은 세도막 형식인 (A-B-A)형식을 취하고 있다. 첫 부분은 알레그레토(Allegretto)의 빠르기가 지시되어 있으며 물 흐르듯이 부드럽게 전개되나\",\n",
+        "                \"pl\": \"\\n_START_ARTICLE_\\nAK-176\\n_START_SECTION_\\nHistoria\\n_START_PARAGRAPH_\\nPod koniec lat 60 XX w. w ZSRR dostrzeżono potrzebę posiadania lekkiej armaty uniwersalnej średniego kalibru o stosunkowo dużej mocy ogniowej, która\",\n",
+        "                \"pt\": \"\\n_START_ARTICLE_\\nÁcido ribonucleico\\n_START_SECTION_\\nIntermediário da transferência de informação\\n_START_PARAGRAPH_\\nEm 1957 Elliot Volkin e Lawrence Astrachan fizeram uma observação significativa. Eles descobriram que uma das mais marcantes mudanças\",\n",
+        "                \"ru\": \"\\n_START_ARTICLE_\\nАрнольд, Ремо\\n_START_SECTION_\\nКлубная карьера\\n_START_PARAGRAPH_\\nАрнольд перешёл в академию «Люцерна» в 12 лет. С 2014 года выступал за вторую команду, где провёл пятнадцать встреч. С сезона 2015/2016 находится в составе основной команды. 27 сентября 2015 года дебютировал\",\n",
+        "                \"es\": \"\\n_START_ARTICLE_\\n(200012) 2007 LK20\\n_START_SECTION_\\nDesignación y nombre\\n_START_PARAGRAPH_\\nDesignado provisionalmente como 2007 LK20.\\n_START_SECTION_\\nCaracterísticas orbitales\\n_START_PARAGRAPH_\\n2007 LK20\",\n",
+        "                \"th\": \"\\n_START_ARTICLE_\\nการนัดหยุดเรียนเพื่อภูมิอากาศ\\n_START_SECTION_\\nเกรียตา ทืนแบร์ย\\n_START_PARAGRAPH_\\nวันที่ 20 สิงหาคม 2561 เกรียตา ทืนแบร์ย นักกิจกรรมภูมิอากาศชาวสวีเดน ซึ่งขณะนั้นศึกษาอยู่ในชั้นเกรด 9 (เทียบเท่ามัธยมศึกษาปีที่ 3) ตัดสินใจไม่เข้าเรียนจนกระทั่งการเลือกตั้งทั่วไปในประเทศสวีเดนปี\",\n",
+        "                \"tr\": \"\\n_START_ARTICLE_\\nİsrail'in Muhafazakar Dostları\\n_START_SECTION_\\nFaaliyetleri\\n_START_PARAGRAPH_\\nGrubun 2005 stratejisi ile aşağıdaki faaliyet alanları tespit edilmiştir:_NEWLINE_İsrail'i destekleme\",\n",
+        "                \"bg\": \"\\n_START_ARTICLE_\\nАвтомобил с повишена проходимост\\n_START_SECTION_\\nОсобености на конструкцията\\n_START_PARAGRAPH_\\nВ исторически план леки автомобили с висока проходимост се произвеждат и имат военно\",\n",
+        "                \"ca\": \"\\n_START_ARTICLE_\\nAuchy-la-Montagne\\n_START_SECTION_\\nPoblació\\n_START_PARAGRAPH_\\nEl 2007 la població de fet d'Auchy-la-Montagne era de 469 persones. Hi havia 160 famílies de les quals 28\",\n",
+        "                \"cs\": \"\\n_START_ARTICLE_\\nŘemeslo\\n_START_PARAGRAPH_\\nŘemeslo je určitý druh manuální dovednosti, provozovaný za účelem obživy, resp. vytváření zisku. Pro řemeslné práce je charakteristický vysoký podíl ruční práce, spojený s používáním specializovaných nástrojů a pomůcek. Řemeslné práce\",\n",
+        "                \"da\": \"\\n_START_ARTICLE_\\nÖrenäs slot\\n_START_PARAGRAPH_\\nÖrenäs slot (svensk: Örenäs slott) er et slot nær Glumslöv i Landskrona stad tæt på Øresunds-kysten i Skåne i Sverige._NEWLINE_Örenäs ligger\",\n",
+        "                \"el\": \"\\n_START_ARTICLE_\\nΆλβαρο Ρεκόμπα\\n_START_SECTION_\\nΒιογραφικά στοιχεία\\n_START_PARAGRAPH_\\nΟ Άλβαρο Ρεκόμπα γεννήθηκε στις 17 Μαρτίου 1976 στο Μοντεβίδεο της Ουρουγουάης από\",\n",
+        "                \"et\": \"\\n_START_ARTICLE_\\nAus deutscher Geistesarbeit\\n_START_PARAGRAPH_\\nAus deutscher Geistesarbeit (alapealkiri Wochenblatt für wissenschaftliche und kulturelle Fragen der Gegenwart) oli ajakiri, mis 1924–1934 ilmus Tallinnas. Ajakirja andis 1932–1934\",\n",
+        "                \"fa\": \"\\n_START_ARTICLE_\\nتفسیر بغوی\\n_START_PARAGRAPH_\\nایرانی حسین بن مسعود بغوی است. این کتاب خلاصه ای از تفسیر الکشف و البیان عن تفسیر القرآن ابواسحاق احمد ثعلبی می‌باشد. این کتاب در ۴ جلد موجود می‌باش\",\n",
+        "                \"fi\": \"\\n_START_ARTICLE_\\nBovesin verilöyly\\n_START_SECTION_\\nVerilöyly\\n_START_PARAGRAPH_\\n19. syyskuuta 1943 partisaaniryhmä saapui Bovesiin tarkoituksenaan ostaa leipää kylästä. Kylässä sattui olemaan kaksi SS-miestä, jotka\",\n",
+        "                \"he\": \"\\n_START_ARTICLE_\\nאוגדה 85\\n_START_SECTION_\\nהיסטוריה\\n_START_PARAGRAPH_\\nהאוגדה הוקמה בהתחלה כמשלט העמקים בשנות השבעים. בשנות השמונים הפכה להיות אוגדה מרחבית עם שתי\",\n",
+        "                \"hi\": \"\\n_START_ARTICLE_\\nऑडी\\n_START_SECTION_\\nऑडी इंडिया\\n_START_PARAGRAPH_\\nऑडी इंडिया की स्थापना मार्च 2007 में फोक्सवैगन ग्रुप सेल्स इंडिया के एक विभाजन के रूप में की गई थी। दुनिया भर में 110\",\n",
+        "                \"hr\": \"\\n_START_ARTICLE_\\nČimariko (jezična porodica)\\n_START_PARAGRAPH_\\nChimarikan.-porodica sjevernoameričkih indijanskih jezika koja prema Powersu obuhvaća jezike Indijanaca Chimariko (Chemaŕeko) sa rijeke Trinity i Chimalakwe\",\n",
+        "                \"hu\": \"\\n_START_ARTICLE_\\nÁllami Politikai Igazgatóság\\n_START_PARAGRAPH_\\nAz Állami Politikai Igazgatóság (rövidítve: GPU, oroszul: Государственное политическое управление), majd később Egyesített Állami Politikai Igazgatóság Szovjet-Oroszország\",\n",
+        "                \"id\": \"\\n_START_ARTICLE_\\n(257195) 2008 QY41\\n_START_SECTION_\\nPembentukan\\n_START_PARAGRAPH_\\nSeperti asteroid secara keseluruhan, asteroid ini terbentuk dari nebula matahari primordial sebagai pecahan planetisimal, sesuatu di\",\n",
+        "                \"lt\": \"\\n_START_ARTICLE_\\nŠavijos–Uardigo regionas\\n_START_SECTION_\\nGeografija\\n_START_PARAGRAPH_\\nŠavijos-Uardigo regionas yra Atlanto vandenynu pakrantės lygumoje\",\n",
+        "                \"lv\": \"\\n_START_ARTICLE_\\nApatīts\\n_START_SECTION_\\nĪpašības\\n_START_PARAGRAPH_\\nApatīta kopējā ķīmiskā formula ir Ca₁₀(PO₄)₆(OH,F,Cl)₂, ir trīs atšķirīgi apatīta veidi: apatīts: Ca₁₀(PO₄)₆(OH)₂, fluorapatīts Ca₁₀(PO₄)₆(F)₂ un hlorapatīts: Ca₁₀(PO₄)₆(Cl)₂. Pēc sastāva\",\n",
+        "                \"ms\": \"\\n_START_ARTICLE_\\nEdward C. Prescott\\n_START_PARAGRAPH_\\nEdward Christian Prescott (lahir 26 Disember 1940) ialah seorang ahli ekonomi Amerika. Beliau menerima Hadiah Peringatan Nobel dalam Sains Ekonomi pada tahun 2004, berkongsi\",\n",
+        "                \"no\": \"\\n_START_ARTICLE_\\nAl-Minya\\n_START_SECTION_\\nEtymologi\\n_START_PARAGRAPH_\\nDet er sprikende forklaringer på bynavnet. Det kan komme fra gammelegyptisk Men'at Khufu, i betydning byen hvor Khufu ble ammet, noe som knytter byen til farao Khufu (Keops), som\",\n",
+        "                \"ro\": \"\\n_START_ARTICLE_\\nDealurile Cernăuțiului\\n_START_PARAGRAPH_\\nDealurile Cernăuțiului sunt un lanț deluros striat, care se întinde în partea centrală a interfluviului dintre Prut și Siret, în cadrul regiunii Cernăuți din\",\n",
+        "                \"sk\": \"\\n_START_ARTICLE_\\n10. peruť RAAF\\n_START_PARAGRAPH_\\n10. peruť RAAF je námorná hliadkovacia peruť kráľovských austrálskych vzdušných síl (Royal Australian Air Force – RAAF) založená na základni Edinburgh v Južnej Austrálii ako súčasť 92\",\n",
+        "                \"sl\": \"\\n_START_ARTICLE_\\n105 Artemida\\n_START_SECTION_\\nOdkritje\\n_START_PARAGRAPH_\\nAsteroid je 16. septembra 1868 odkril James Craig Watson (1838 – 1880). Poimenovan je po Artemidi, boginji Lune iz grške\",\n",
+        "                \"sr\": \"\\n_START_ARTICLE_\\nЉанос Морелос 1. Сексион (Истапангахоја)\\n_START_SECTION_\\nСтановништво\\n_START_PARAGRAPH_\\nПрема подацима из 2010. године у насељу је живело 212\",\n",
+        "                \"sv\": \"\\n_START_ARTICLE_\\nÖstra Torps landskommun\\n_START_SECTION_\\nAdministrativ historik\\n_START_PARAGRAPH_\\nKommunen bildades i Östra Torps socken i Vemmenhögs härad i Skåne när 1862 års kommunalförordningar trädde i kraft. _NEWLINE_Vid kommunreformen\",\n",
+        "                \"tl\": \"\\n_START_ARTICLE_\\nBésame Mucho\\n_START_PARAGRAPH_\\nAng Bésame Mucho ay isang awit na nasa Kastila. Isinulat ito ng Mehikanang si Consuelo Velázquez noong 1940, bago sumapit ang kanyang ika-16 na\",\n",
+        "                \"uk\": \"\\n_START_ARTICLE_\\nІслам та інші релігії\\n_START_PARAGRAPH_\\nПротягом багатовікової ісламської історії мусульманські правителі, ісламські вчені і звичайні мусульмани вступали у різні відносини з представниками інших релігій. Стиль цих\",\n",
+        "                \"vi\": \"\\n_START_ARTICLE_\\nĐường tỉnh 316\\n_START_PARAGRAPH_\\nĐường tỉnh 316 hay tỉnh lộ 316, viết tắt ĐT316 hay TL316, là đường tỉnh ở các huyện Thanh Sơn, Thanh Thủy, Tam Nông tỉnh Phú Thọ ._NEWLINE_ĐT316 bắt đầu từ xã Tinh Nhuệ\",\n",
+        "                \"multilingual-64k\": \"\\n_START_ARTICLE_\\n1882 Prince Edward Island general election\\n_START_PARAGRAPH_\\nThe 1882 Prince Edward Island election was held on May 8, 1882 to elect members of the House of Assembly of the province of Prince Edward Island, Canada.\",\n",
+        "                \"multilingual-128k\": \"\\n_START_ARTICLE_\\n1882 Prince Edward Island general election\\n_START_PARAGRAPH_\\nThe 1882 Prince Edward Island election was held on May 8, 1882 to elect members of the House of Assembly of the province of Prince Edward Island, Canada.\"}\n",
+        "\n",
+        "seed = lang_to_seed[language]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "mZDGsSyUM_Mg"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Enter your own seed (Optional).\n",
+        "user_seed = \"\" #@param { type: \"string\" }\n",
+        "if user_seed.strip():\n",
+        "  seed = user_seed.strip()\n",
+        "\n",
+        "# The seed must start with \"_START_ARTICLE_\" or the generated text will be gibberish\n",
+        "START_ARTICLE = \"_START_ARTICLE_\"\n",
+        "if START_ARTICLE not in seed:\n",
+        "  seed = \"\\n{}\\n{}\".format(START_ARTICLE, seed)\n",
+        "\n",
+        "print(\"Generating text from seed:\\n{}\".format(seed))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "5dMuShi3XuLd"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Initialize session.\n",
+        "with tf.Session(graph=g).as_default() as session:\n",
+        "  session.run(init_op)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "aS53xjmbbw0Z"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Generate text\n",
+        "\n",
+        "with session.as_default():\n",
+        "  results = session.run([embeddings, neg_log_likelihood, ppl, activations, token_ids, generated_text], feed_dict={text: [seed]})\n",
+        "  embeddings_result, neg_log_likelihood_result, ppl_result, activations_result, token_ids_result, generated_text_result = results\n",
+        "  generated_text_output = generated_text_result[0].decode('utf-8')\n",
+        "\n",
+        "print(generated_text_output)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tjQf3N1wdND0"
+      },
+      "source": [
+        "We can also look at the other outputs of the model - the perplexity, the token ids, the intermediate activations, and the embeddings"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "pGfw3CQWNC_n"
+      },
+      "outputs": [],
+      "source": [
+        "ppl_result"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "FLlgJObFNEmj"
+      },
+      "outputs": [],
+      "source": [
+        "token_ids_result"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "5SaH36M-NGXc"
+      },
+      "outputs": [],
+      "source": [
+        "activations_result.shape"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "k9Eb_DPfQdUu"
+      },
+      "outputs": [],
+      "source": [
+        "embeddings_result"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "wiki40b_lm.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/hub/tutorials/yamnet.ipynb b/site/en/hub/tutorials/yamnet.ipynb
new file mode 100644
index 00000000000..e6c9fbca5a1
--- /dev/null
+++ b/site/en/hub/tutorials/yamnet.ipynb
@@ -0,0 +1,359 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "laa9tRjJ59bl"
+      },
+      "source": [
+        "##### Copyright 2020 The TensorFlow Hub Authors.\n",
+        "\n",
+        "Licensed under the Apache License, Version 2.0 (the \"License\");"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "T4ZHtBpK6Dom"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Copyright 2020 The TensorFlow Hub Authors. All Rights Reserved.\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n",
+        "# =============================================================================="
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hk5u_9KN1m-t"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/hub/tutorials/yamnet\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/yamnet.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/yamnet.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/hub/tutorials/yamnet.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://tfhub.dev/google/yamnet/1\"><img src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" />See TF Hub model</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "x2ep-q7k_5R-"
+      },
+      "source": [
+        "# Sound classification with YAMNet\n",
+        "\n",
+        "YAMNet is a deep net that predicts 521 audio event [classes](https://github.com/tensorflow/models/blob/master/research/audioset/yamnet/yamnet_class_map.csv) from the [AudioSet-YouTube corpus](http://g.co/audioset) it was trained on. It employs the\n",
+        "[Mobilenet_v1](https://arxiv.org/pdf/1704.04861.pdf) depthwise-separable\n",
+        "convolution architecture."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Bteu7pfkpt_f"
+      },
+      "outputs": [],
+      "source": [
+        "import tensorflow as tf\n",
+        "import tensorflow_hub as hub\n",
+        "import numpy as np\n",
+        "import csv\n",
+        "\n",
+        "import matplotlib.pyplot as plt\n",
+        "from IPython.display import Audio\n",
+        "from scipy.io import wavfile"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YSVs3zRrrYmY"
+      },
+      "source": [
+        "Load the Model from TensorFlow Hub.\n",
+        "\n",
+        "Note: to read the documentation just follow the model's [url](https://tfhub.dev/google/yamnet/1)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "VX8Vzs6EpwMo"
+      },
+      "outputs": [],
+      "source": [
+        "# Load the model.\n",
+        "model = hub.load('https://tfhub.dev/google/yamnet/1')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "lxWx6tOdtdBP"
+      },
+      "source": [
+        "The labels file will be loaded from the models assets and is present at `model.class_map_path()`.\n",
+        "You will load it on the `class_names` variable."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "EHSToAW--o4U"
+      },
+      "outputs": [],
+      "source": [
+        "# Find the name of the class with the top score when mean-aggregated across frames.\n",
+        "def class_names_from_csv(class_map_csv_text):\n",
+        "  \"\"\"Returns list of class names corresponding to score vector.\"\"\"\n",
+        "  class_names = []\n",
+        "  with tf.io.gfile.GFile(class_map_csv_text) as csvfile:\n",
+        "    reader = csv.DictReader(csvfile)\n",
+        "    for row in reader:\n",
+        "      class_names.append(row['display_name'])\n",
+        "\n",
+        "  return class_names\n",
+        "\n",
+        "class_map_path = model.class_map_path().numpy()\n",
+        "class_names = class_names_from_csv(class_map_path)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mSFjRwkZ59lU"
+      },
+      "source": [
+        "Add a method to verify and convert a loaded audio is on the proper sample_rate (16K), otherwise it would affect the model's results."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "LizGwWjc5w6A"
+      },
+      "outputs": [],
+      "source": [
+        "def ensure_sample_rate(original_sample_rate, waveform,\n",
+        "                       desired_sample_rate=16000):\n",
+        "  \"\"\"Resample waveform if required.\"\"\"\n",
+        "  if original_sample_rate != desired_sample_rate:\n",
+        "    desired_length = int(round(float(len(waveform)) /\n",
+        "                               original_sample_rate * desired_sample_rate))\n",
+        "    waveform = scipy.signal.resample(waveform, desired_length)\n",
+        "  return desired_sample_rate, waveform"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "AZEgCobA9bWl"
+      },
+      "source": [
+        "## Downloading and preparing the sound file\n",
+        "\n",
+        "Here you will download a wav file and listen to it.\n",
+        "If you have a file already available, just upload it to colab and use it instead.\n",
+        "\n",
+        "Note: The expected audio file should be a mono wav file at 16kHz sample rate."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "WzZHvyTtsJrc"
+      },
+      "outputs": [],
+      "source": [
+        "!curl -O https://storage.googleapis.com/audioset/speech_whistling2.wav"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "D8LKmqvGzZzr"
+      },
+      "outputs": [],
+      "source": [
+        "!curl -O https://storage.googleapis.com/audioset/miaow_16k.wav"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Wo9KJb-5zuz1"
+      },
+      "outputs": [],
+      "source": [
+        "# wav_file_name = 'speech_whistling2.wav'\n",
+        "wav_file_name = 'miaow_16k.wav'\n",
+        "sample_rate, wav_data = wavfile.read(wav_file_name, 'rb')\n",
+        "sample_rate, wav_data = ensure_sample_rate(sample_rate, wav_data)\n",
+        "\n",
+        "# Show some basic information about the audio.\n",
+        "duration = len(wav_data)/sample_rate\n",
+        "print(f'Sample rate: {sample_rate} Hz')\n",
+        "print(f'Total duration: {duration:.2f}s')\n",
+        "print(f'Size of the input: {len(wav_data)}')\n",
+        "\n",
+        "# Listening to the wav file.\n",
+        "Audio(wav_data, rate=sample_rate)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "P9I290COsMBm"
+      },
+      "source": [
+        "The `wav_data` needs to be normalized to values in `[-1.0, 1.0]` (as stated in the model's [documentation](https://tfhub.dev/google/yamnet/1))."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "bKr78aCBsQo3"
+      },
+      "outputs": [],
+      "source": [
+        "waveform = wav_data / tf.int16.max"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "e_Xwd4GPuMsB"
+      },
+      "source": [
+        "## Executing the Model\n",
+        "\n",
+        "Now the easy part: using the data already prepared, you just call the model and get the: scores, embedding and the spectrogram.\n",
+        "\n",
+        "The score is the main result you will use.\n",
+        "The spectrogram you will use to do some visualizations later."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "BJGP6r-At_Jc"
+      },
+      "outputs": [],
+      "source": [
+        "# Run the model, check the output.\n",
+        "scores, embeddings, spectrogram = model(waveform)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Vmo7griQprDk"
+      },
+      "outputs": [],
+      "source": [
+        "scores_np = scores.numpy()\n",
+        "spectrogram_np = spectrogram.numpy()\n",
+        "infered_class = class_names[scores_np.mean(axis=0).argmax()]\n",
+        "print(f'The main sound is: {infered_class}')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Uj2xLf-P_ndS"
+      },
+      "source": [
+        "## Visualization\n",
+        "\n",
+        "YAMNet also returns some additional information that we can use for visualization.\n",
+        "Let's take a look on the Waveform, spectrogram and the top classes inferred."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "_QSTkmv7wr2M"
+      },
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(10, 6))\n",
+        "\n",
+        "# Plot the waveform.\n",
+        "plt.subplot(3, 1, 1)\n",
+        "plt.plot(waveform)\n",
+        "plt.xlim([0, len(waveform)])\n",
+        "\n",
+        "# Plot the log-mel spectrogram (returned by the model).\n",
+        "plt.subplot(3, 1, 2)\n",
+        "plt.imshow(spectrogram_np.T, aspect='auto', interpolation='nearest', origin='lower')\n",
+        "\n",
+        "# Plot and label the model output scores for the top-scoring classes.\n",
+        "mean_scores = np.mean(scores, axis=0)\n",
+        "top_n = 10\n",
+        "top_class_indices = np.argsort(mean_scores)[::-1][:top_n]\n",
+        "plt.subplot(3, 1, 3)\n",
+        "plt.imshow(scores_np[:, top_class_indices].T, aspect='auto', interpolation='nearest', cmap='gray_r')\n",
+        "\n",
+        "# patch_padding = (PATCH_WINDOW_SECONDS / 2) / PATCH_HOP_SECONDS\n",
+        "# values from the model documentation\n",
+        "patch_padding = (0.025 / 2) / 0.01\n",
+        "plt.xlim([-patch_padding-0.5, scores.shape[0] + patch_padding-0.5])\n",
+        "# Label the top_N classes.\n",
+        "yticks = range(0, top_n, 1)\n",
+        "plt.yticks(yticks, [class_names[top_class_indices[x]] for x in yticks])\n",
+        "_ = plt.ylim(-0.5 + np.array([top_n, 0]))"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "yamnet.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/install/_index.yaml b/site/en/install/_index.yaml
index 0946e24ef41..71bc660f81d 100644
--- a/site/en/install/_index.yaml
+++ b/site/en/install/_index.yaml
@@ -20,13 +20,14 @@ landing_page:
         <table class="columns">
           <tr><td>
             <ul>
-              <li>Python 3.6–3.9</li>
+              <li>Python 3.9–3.12</li>
               <li>Ubuntu 16.04 or later</li>
               <li>Windows 7 or later (with <a href="https://support.microsoft.com/help/2977003/the-latest-supported-visual-c-downloads">C++ redistributable</a>)</li>
             </ul>
           </td><td>
             <ul>
               <li>macOS 10.12.6 (Sierra) or later (no GPU support)</li>
+              <li>WSL2 via Windows 10 19044 or higher including GPUs (Experimental)</li>
             </ul>
           </td></tr>
         </table>
@@ -40,7 +41,6 @@ landing_page:
         <p>Install TensorFlow with Python's <em>pip</em> package manager.</p>
         <aside class="note">TensorFlow 2 packages require a <code>pip</code> version >19.0 (or >20.3 for macOS).</aside>
         <p>Official packages available for Ubuntu, Windows, and macOS.</p>
-        <p>See the <a href="./gpu">GPU guide</a> for CUDA®-enabled cards.</p>
 
       buttons:
       - label: Read the pip install guide
@@ -51,8 +51,10 @@ landing_page:
         <pre class="prettyprint lang-bsh devsite-disable-click-to-copy">
         # Requires the latest pip
         <code class="devsite-terminal">pip install --upgrade pip</code><br/>
-        # Current stable release for CPU and GPU
+        # Current stable release for CPU
         <code class="devsite-terminal">pip install tensorflow</code><br/>
+        # Current stable release for GPU (Linux / WSL2)
+        <code class="devsite-terminal">pip install tensorflow[and-cuda]</code><br/>
         # Or try the preview build (unstable)
         <code class="devsite-terminal">pip install tf-nightly</code><br/>
 
@@ -66,8 +68,7 @@ landing_page:
           The <a href="https://hub.docker.com/r/tensorflow/tensorflow/" class="external">TensorFlow
           Docker images</a> are already configured to run TensorFlow. A
           <a href="https://docs.docker.com/install/" class="external">Docker</a> container runs in a
-          virtual environment and is the easiest way to set up <a href="/install/gpu">GPU
-          support</a>.
+          virtual environment and is the easiest way to set up GPU support.
         </p>
         <pre class="prettyprint lang-bsh">
         <code class="devsite-terminal">docker pull tensorflow/tensorflow:latest  # Download latest stable image</code><br/>
diff --git a/site/en/install/_toc.yaml b/site/en/install/_toc.yaml
index c8f60bde852..26cdb270bb8 100644
--- a/site/en/install/_toc.yaml
+++ b/site/en/install/_toc.yaml
@@ -7,8 +7,6 @@ toc:
 - title: Docker
   path: /install/docker
 - heading: Additional setup
-- title: GPU support
-  path: /install/gpu
 - title: GPU device plugins
   path: /install/gpu_plugins
 - title: Problems
diff --git a/site/en/install/docker.md b/site/en/install/docker.md
index 30942924688..376ca0820a7 100644
--- a/site/en/install/docker.md
+++ b/site/en/install/docker.md
@@ -1,45 +1,43 @@
 # Docker
 
-[Docker](https://docs.docker.com/install/){:.external} uses *containers* to
+[Docker](https://docs.docker.com/install/) uses *containers* to
 create virtual environments that isolate a TensorFlow installation from the rest
 of the system. TensorFlow programs are run *within* this virtual environment that
 can share resources with its host machine (access directories, use the GPU,
 connect to the Internet, etc.). The
-[TensorFlow Docker images](https://hub.docker.com/r/tensorflow/tensorflow/){:.external}
+[TensorFlow Docker images](https://hub.docker.com/r/tensorflow/tensorflow/)
 are tested for each release.
 
-Docker is the easiest way to enable TensorFlow [GPU support](./gpu.md) on Linux since only the
-[NVIDIA® GPU driver](https://github.com/NVIDIA/nvidia-docker/wiki/Frequently-Asked-Questions#how-do-i-install-the-nvidia-driver){:.external}
+Docker is the easiest way to enable TensorFlow [GPU support](./pip.md) on Linux since only the
+[NVIDIA® GPU driver](https://github.com/NVIDIA/nvidia-docker/wiki/Frequently-Asked-Questions#how-do-i-install-the-nvidia-driver)
 is required on the *host* machine (the *NVIDIA® CUDA® Toolkit* does not need to
 be installed).
 
 
 ## TensorFlow Docker requirements
 
-1. [Install Docker](https://docs.docker.com/install/){:.external} on
+1. [Install Docker](https://docs.docker.com/install/) on
    your local *host* machine.
-2. For GPU support on Linux, [install NVIDIA Docker support](https://github.com/NVIDIA/nvidia-docker){:.external}.
+2. For GPU support on Linux, [install NVIDIA Docker support](https://github.com/NVIDIA/nvidia-container-toolkit).
    * Take note of your Docker version with `docker -v`. Versions __earlier than__ 19.03 require nvidia-docker2 and the `--runtime=nvidia` flag. On versions __including and after__ 19.03, you will use the `nvidia-container-toolkit` package and the `--gpus all` flag. Both options are documented on the page linked above.
 
 Note: To run the `docker` command without `sudo`, create the `docker` group and
 add your user. For details, see the
-[post-installation steps for Linux](https://docs.docker.com/install/linux/linux-postinstall/){:.external}.
+[post-installation steps for Linux](https://docs.docker.com/install/linux/linux-postinstall/).
 
 
 ## Download a TensorFlow Docker image
 
 The official TensorFlow Docker images are located in the 
-[tensorflow/tensorflow](https://hub.docker.com/r/tensorflow/tensorflow/){:.external}
-Docker Hub repository. Image releases [are tagged](https://hub.docker.com/r/tensorflow/tensorflow/tags/){:.external}
+[tensorflow/tensorflow](https://hub.docker.com/r/tensorflow/tensorflow/)
+Docker Hub repository. Image releases [are tagged](https://hub.docker.com/r/tensorflow/tensorflow/tags/)
 using the following format:
 
 | Tag         | Description                                                                                                          |
 |-------------|----------------------------------------------------------------------------------------------------------------------|
 | `latest`    | The latest release of TensorFlow CPU binary image. Default.                                                          |
 | `nightly`   | Nightly builds of the TensorFlow image. (Unstable.)                                                                  |
-| *`version`* | Specify the *version* of the TensorFlow binary image, for example\: *2.1.0*                                          |
-| `devel`     | Nightly builds of a TensorFlow `master` development environment. Includes TensorFlow source code.                    |
-| `custom-op` | Special experimental image for developing TF custom ops.  More info [here](https://github.com/tensorflow/custom-op). |
+| *`version`* | Specify the *version* of the TensorFlow binary image, for example\: *2.8.3*                                          |
 
 Each base *tag* has variants that add or change functionality:
 
@@ -66,7 +64,7 @@ To start a TensorFlow-configured container, use the following command form:
 docker run [-it] [--rm] [-p <em>hostPort</em>:<em>containerPort</em>] tensorflow/tensorflow[:<em>tag</em>] [<em>command</em>]
 </pre>
 
-For details, see the [docker run reference](https://docs.docker.com/engine/reference/run/){:.external}.
+For details, see the [docker run reference](https://docs.docker.com/engine/reference/run/).
 
 ### Examples using CPU-only images
 
@@ -100,7 +98,7 @@ docker run -it --rm -v $PWD:/tmp -w /tmp tensorflow/tensorflow python ./script.p
 Permission issues can arise when files created within a container are exposed to
 the host. It's usually best to edit files on the host system.
 
-Start a [Jupyter Notebook](https://jupyter.org/){:.external} server using
+Start a [Jupyter Notebook](https://jupyter.org/) server using
 TensorFlow's nightly build:
 
 <pre class="devsite-terminal devsite-click-to-copy">
@@ -114,13 +112,13 @@ Follow the instructions and open the URL in your host web browser:
 ## GPU support
 
 Docker is the easiest way to run TensorFlow on a GPU since the *host* machine
-only requires the [NVIDIA® driver](https://github.com/NVIDIA/nvidia-docker/wiki/Frequently-Asked-Questions#how-do-i-install-the-nvidia-driver){:.external}
+only requires the [NVIDIA® driver](https://github.com/NVIDIA/nvidia-docker/wiki/Frequently-Asked-Questions#how-do-i-install-the-nvidia-driver)
 (the *NVIDIA® CUDA® Toolkit* is not required).
 
-Install the [Nvidia Container Toolkit](https://github.com/NVIDIA/nvidia-docker/blob/master/README.md#quickstart){:.external} 
+Install the [Nvidia Container Toolkit](https://github.com/NVIDIA/nvidia-docker/blob/master/README.md#quickstart) 
 to add NVIDIA® GPU support to Docker. `nvidia-container-runtime` is only
 available for Linux. See the `nvidia-container-runtime` 
-[platform support FAQ](https://github.com/NVIDIA/nvidia-docker/wiki/Frequently-Asked-Questions#platform-support){:.external}
+[platform support FAQ](https://github.com/NVIDIA/nvidia-docker/wiki/Frequently-Asked-Questions#platform-support)
 for details.
 
 Check if a GPU is available:
@@ -132,7 +130,7 @@ lspci | grep -i nvidia
 Verify your `nvidia-docker` installation:
 
 <pre class="devsite-terminal devsite-click-to-copy">
-docker run --gpus all --rm nvidia/cuda nvidia-smi
+docker run --rm --runtime=nvidia --gpus all ubuntu nvidia-smi
 </pre>
 
 Note: `nvidia-docker` v2 uses `--runtime=nvidia` instead of `--gpus all`. `nvidia-docker` v1 uses the `nvidia-docker` alias, 
diff --git a/site/en/install/errors.md b/site/en/install/errors.md
index 0d52c00f898..938ba8b454f 100644
--- a/site/en/install/errors.md
+++ b/site/en/install/errors.md
@@ -1,8 +1,9 @@
 # Build and install error messages
 
-TensorFlow uses [GitHub issues](https://github.com/tensorflow/tensorflow/issues){:.external}
-and [Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow){:.external}
-to track and document build and installation problems.
+TensorFlow uses [GitHub issues](https://github.com/tensorflow/tensorflow/issues),
+[Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow) and
+[TensorFlow Forum](https://discuss.tensorflow.org/c/general-discussion/6)
+to track, document, and discuss build and installation problems.
 
 The following list links error messages to a solution or discussion. If you find
 an installation or build problem that is not listed, please search the GitHub
@@ -13,10 +14,10 @@ question on Stack Overflow with the `tensorflow` tag.
 <tr><th>GitHub issue or Stack&nbsp;Overflow</th> <th>Error Message</th></tr>
   <tr><td><a href="https://stackoverflow.com/q/38896424">38896424</a>
           <a href=https://github.com/tensorflow/tensorflow/issues/31058>31058</a></td>
-    <td>"No matching distribution found for tensorflow": 
+    <td>"No matching distribution found for tensorflow":
       Pip can't find a TensorFlow package compatible with your system. Check the
       <a href="https://tensorflow.org/install">system requirements and
-        python version</a>
+        Python version</a>
     </td>
   </tr>
 <tr>
@@ -43,12 +44,12 @@ unzip: cannot find zipfile directory in one of ./bazel-bin/tensorflow/tools/pip_
   No such file or directory</pre></td>
 </tr>
 <tr>
-  <td><a href="http://stackoverflow.com/q/36371137">36371137</a> and
-  <a href="#Protobuf31">here</a></td>
+  <td><a href="http://stackoverflow.com/q/36371137">36371137</a></td>
   <td><pre>libprotobuf ERROR google/protobuf/src/google/protobuf/io/coded_stream.cc:207] A
   protocol message was rejected because it was too big (more than 67108864 bytes).
   To increase the limit (or to disable these warnings), see
-  CodedInputStream::SetTotalBytesLimit() in google/protobuf/io/coded_stream.h.</pre></td>
+  <a href="https://github.com/protocolbuffers/protobuf/blob/4ffb31e90681ca06bfeca92a6068206ab78959ec/src/google/protobuf/io/coded_stream.h#L389-L406">
+CodedInputStream::SetTotalBytesLimit() in google/protobuf/io/coded_stream.h.</a></pre></td>
 </tr>
 <tr>
   <td><a href="https://stackoverflow.com/q/35252888">35252888</a></td>
@@ -60,7 +61,7 @@ unzip: cannot find zipfile directory in one of ./bazel-bin/tensorflow/tools/pip_
 <tr>
   <td><a href="https://stackoverflow.com/q/33623453">33623453</a></td>
   <td><pre>IOError: [Errno 2] No such file or directory:
-  '/tmp/pip-o6Tpui-build/setup.py'</tt></pre>
+  '/tmp/pip-o6Tpui-build/setup.py'</pre></td>
 </tr>
 <tr>
   <td><a href="http://stackoverflow.com/q/42006320">42006320</a></td>
@@ -111,7 +112,7 @@ ImportError: cannot import name 'descriptor'</pre>
 <tr>
   <td><a href="https://stackoverflow.com/q/33623453">33623453</a></td>
   <td><pre>IOError: [Errno 2] No such file or directory:
-  '/tmp/pip-o6Tpui-build/setup.py'</tt></pre>
+  '/tmp/pip-o6Tpui-build/setup.py'</pre></td>
 </tr>
 <tr>
   <td><a href="https://stackoverflow.com/questions/35190574">35190574</a> </td>
@@ -226,7 +227,7 @@ ImportError: cannot import name 'descriptor'</pre>
 <tr>
   <td><a href="https://stackoverflow.com/q/33623453">33623453</a></td>
   <td><pre>IOError: [Errno 2] No such file or directory:
-  '/tmp/pip-o6Tpui-build/setup.py'</tt></pre>
+  '/tmp/pip-o6Tpui-build/setup.py'</pre></td>
 </tr>
 <tr>
   <td><a href="https://stackoverflow.com/questions/35190574">35190574</a> </td>
diff --git a/site/en/install/gpu.md b/site/en/install/gpu.md
deleted file mode 100644
index 2879873189d..00000000000
--- a/site/en/install/gpu.md
+++ /dev/null
@@ -1,188 +0,0 @@
-# GPU support
-
-Note: GPU support is available for Ubuntu and Windows with CUDA®-enabled cards.
-
-TensorFlow GPU support requires an assortment of drivers and libraries. To
-simplify installation and avoid library conflicts, we recommend using a
-[TensorFlow Docker image with GPU support](./docker.md) (Linux only). This setup
-only requires the [NVIDIA® GPU drivers](https://www.nvidia.com/drivers){:.external}.
-
-These install instructions are for the latest release of TensorFlow. See the
-[tested build configurations](./source.md#gpu) for CUDA® and cuDNN versions to
-use with older TensorFlow releases.
-
-## Pip package
-
-See the [pip install guide](./pip) for available packages, systems requirements,
-and instructions. The TensorFlow `pip` package includes GPU support for
-CUDA®-enabled cards:
-
-<pre class="prettyprint lang-bsh">
-<code class="devsite-terminal">pip install tensorflow</code>
-</pre>
-
-This guide covers GPU support and installation steps for the latest *stable*
-TensorFlow release.
-
-### Older versions of TensorFlow
-
-For releases 1.15 and older, CPU and GPU packages are separate:
-
-<pre class="prettyprint lang-bsh">
-<code class="devsite-terminal">pip install tensorflow==1.15      # CPU</code>
-<code class="devsite-terminal">pip install tensorflow-gpu==1.15  # GPU</code>
-</pre>
-
-## Hardware requirements
-
-The following GPU-enabled devices are supported:
-
-*   NVIDIA® GPU card with CUDA® architectures 3.5, 5.0, 6.0, 7.0, 7.5, 8.0 and
-    higher than 8.0. See the list of
-    <a href="https://developer.nvidia.com/cuda-gpus" class="external">CUDA®-enabled
-    GPU cards</a>.
-*   For GPUs with unsupported CUDA® architectures, or to avoid JIT compilation
-    from PTX, or to use different versions of the NVIDIA® libraries, see the
-    [Linux build from source](./source.md) guide.
-*   Packages do not contain PTX code except for the latest supported CUDA®
-    architecture; therefore, TensorFlow fails to load on older GPUs when
-    `CUDA_FORCE_PTX_JIT=1` is set. (See
-    <a href="http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#application-compatibility" class="external">Application
-    Compatibility</a> for details.)
-
-Note: The error message "Status: device kernel image is invalid" indicates that
-the TensorFlow package does not contain PTX for your architecture. You can
-enable compute capabilities by [building TensorFlow from source](./source.md).
-
-## Software requirements
-
-The following NVIDIA® software must be installed on your system:
-
-*   [NVIDIA® GPU drivers](https://www.nvidia.com/drivers){:.external} —CUDA®
-    11.2 requires 450.80.02 or higher.
-*   [CUDA® Toolkit](https://developer.nvidia.com/cuda-toolkit-archive){:.external}
-    —TensorFlow supports CUDA® 11.2 (TensorFlow >= 2.5.0)
-*   [CUPTI](http://docs.nvidia.com/cuda/cupti/){:.external} ships with the CUDA®
-    Toolkit.
-*   [cuDNN SDK 8.1.0](https://developer.nvidia.com/cudnn){:.external}
-    [cuDNN versions](https://developer.nvidia.com/rdp/cudnn-archive){:.external}).
-*   *(Optional)*
-    [TensorRT 6.0](https://docs.nvidia.com/deeplearning/tensorrt/archives/index.html#trt_6){:.external}
-    to improve latency and throughput for inference on some models.
-
-## Linux setup
-
-The `apt` instructions below are the easiest way to install the required NVIDIA
-software on Ubuntu. However, if [building TensorFlow from source](./source.md),
-manually install the software requirements listed above, and consider using a
-`-devel` [TensorFlow Docker image](./docker.md) as a base.
-
-Install [CUPTI](http://docs.nvidia.com/cuda/cupti/){:.external} which ships with
-the CUDA® Toolkit. Append its installation directory to the `$LD_LIBRARY_PATH`
-environmental variable:
-
-<pre class="devsite-click-to-copy">
-<code class="devsite-terminal">export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/extras/CUPTI/lib64</code>
-</pre>
-
-### Install CUDA with apt
-
-This section shows how to install CUDA® 11 (TensorFlow >= 2.4.0) on Ubuntu
-16.04 and 18.04. These instructions may work for other Debian-based distros.
-
-Caution: [Secure Boot](https://wiki.ubuntu.com/UEFI/SecureBoot){:.external}
-complicates installation of the NVIDIA driver and is beyond the scope of these instructions.
-
-
-#### Ubuntu 18.04 (CUDA 11.0)
-
-<pre class="prettyprint lang-bsh">
-# Add NVIDIA package repositories
-<code class="devsite-terminal">wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin</code>
-<code class="devsite-terminal">sudo mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600</code>
-<code class="devsite-terminal">sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub</code>
-<code class="devsite-terminal">sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/ /"</code>
-<code class="devsite-terminal">sudo apt-get update</code>
-
-<code class="devsite-terminal">wget http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb</code>
-
-<code class="devsite-terminal">sudo apt install ./nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb</code>
-<code class="devsite-terminal">sudo apt-get update</code>
-
-<code class="devsite-terminal">wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/libnvinfer7_7.1.3-1+cuda11.0_amd64.deb</code>
-<code class="devsite-terminal">sudo apt install ./libnvinfer7_7.1.3-1+cuda11.0_amd64.deb</code>
-<code class="devsite-terminal">sudo apt-get update</code>
-
-# Install development and runtime libraries (~4GB)
-<code class="devsite-terminal">sudo apt-get install --no-install-recommends \
-    cuda-11-0 \
-    libcudnn8=8.0.4.30-1+cuda11.0  \
-    libcudnn8-dev=8.0.4.30-1+cuda11.0
-</code>
-# Reboot. Check that GPUs are visible using the command: nvidia-smi
-
-# Install TensorRT. Requires that libcudnn8 is installed above.
-<code class="devsite-terminal">sudo apt-get install -y --no-install-recommends libnvinfer7=7.1.3-1+cuda11.0 \
-    libnvinfer-dev=7.1.3-1+cuda11.0 \
-    libnvinfer-plugin7=7.1.3-1+cuda11.0
-</code>
-</pre>
-
-#### Ubuntu 16.04 (CUDA 11.0)
-
-<pre class="prettyprint lang-bsh">
-# Add NVIDIA package repositories
-# Add HTTPS support for apt-key
-<code class="devsite-terminal">sudo apt-get install gnupg-curl</code>
-<code class="devsite-terminal">wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/cuda-ubuntu1604.pin</code>
-<code class="devsite-terminal">sudo mv cuda-ubuntu1604.pin /etc/apt/preferences.d/cuda-repository-pin-600</code>
-<code class="devsite-terminal">sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub</code>
-<code class="devsite-terminal">sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/ /"</code>
-<code class="devsite-terminal">sudo apt-get update</code>
-<code class="devsite-terminal">wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/nvidia-machine-learning-repo-ubuntu1604_1.0.0-1_amd64.deb</code>
-<code class="devsite-terminal">sudo apt install ./nvidia-machine-learning-repo-ubuntu1604_1.0.0-1_amd64.deb</code>
-<code class="devsite-terminal">sudo apt-get update</code>
-<code class="devsite-terminal">wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/libnvinfer7_7.1.3-1+cuda11.0_amd64.deb</code>
-<code class="devsite-terminal">sudo apt install ./libnvinfer7_7.1.3-1+cuda11.0_amd64.deb</code>
-<code class="devsite-terminal">sudo apt-get update</code>
-
-# Install development and runtime libraries (~4GB)
-<code class="devsite-terminal">sudo apt-get install --no-install-recommends \
-    cuda-11-0 \
-    libcudnn8=8.0.4.30-1+cuda11.0  \
-    libcudnn8-dev=8.0.4.30-1+cuda11.0
-</code>
-
-# Reboot. Check that GPUs are visible using the command: nvidia-smi
-
-# Install TensorRT. Requires that libcudnn7 is installed above.
-<code class="devsite-terminal">sudo apt-get install -y --no-install-recommends \
-    libnvinfer7=7.1.3-1+cuda11.0 \
-    libnvinfer-dev=7.1.3-1+cuda11.0 \
-    libnvinfer-plugin7=7.1.3-1+cuda11.0 \
-    libnvinfer-plugin-dev=7.1.3-1+cuda11.0
-</code>
-</pre>
-
-
-## Windows setup
-
-See the [hardware requirements](#hardware_requirements) and
-[software requirements](#software_requirements) listed above. Read the
-[CUDA® install guide for Windows](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/){:.external}.
-
-Make sure the installed NVIDIA software packages match the versions listed above. In
-particular, TensorFlow will not load without the `cuDNN64_8.dll` file. To use a
-different version, see the [Windows build from source](./source_windows.md) guide.
-
-Add the CUDA®, CUPTI, and cuDNN installation directories to the `%PATH%`
-environmental variable. For example, if the CUDA® Toolkit is installed to
-`C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0` and cuDNN to
-`C:\tools\cuda`, update your `%PATH%` to match:
-
-<pre class="devsite-click-to-copy">
-<code class="devsite-terminal tfo-terminal-windows">SET PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0\bin;%PATH%</code>
-<code class="devsite-terminal tfo-terminal-windows">SET PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0\extras\CUPTI\lib64;%PATH%</code>
-<code class="devsite-terminal tfo-terminal-windows">SET PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0\include;%PATH%</code>
-<code class="devsite-terminal tfo-terminal-windows">SET PATH=C:\tools\cuda\bin;%PATH%</code>
-</pre>
diff --git a/site/en/install/gpu_plugins.md b/site/en/install/gpu_plugins.md
index 358db01b312..39e3cf09b29 100644
--- a/site/en/install/gpu_plugins.md
+++ b/site/en/install/gpu_plugins.md
@@ -1,12 +1,12 @@
 # GPU device plugins
 
-Note: This page is for non-NVIDIA® GPU devices. For NVIDIA® GPU support, click
-[here](./gpu.md).
+Note: This page is for non-NVIDIA® GPU devices. For NVIDIA® GPU support, go to
+the [Install TensorFlow with pip](./pip.md) guide.
 
 TensorFlow's
-<a href="https://github.com/tensorflow/community/blob/master/rfcs/20200624-pluggable-device-for-tensorflow.md" class="external">pluggable
-device</a> architecture adds new device support as separate plug-in packages
-that are installed alongside the official TensorFlow package.
+[pluggable device](https://github.com/tensorflow/community/blob/master/rfcs/20200624-pluggable-device-for-tensorflow.md)
+architecture adds new device support as separate plug-in packages that are
+installed alongside the official TensorFlow package.
 
 The mechanism requires no device-specific changes in the TensorFlow code. It
 relies on C APIs to communicate with the TensorFlow binary in a stable manner.
@@ -57,6 +57,24 @@ run()  # PluggableDevices also work with tf.function and graph mode.
 
 Metal `PluggableDevice` for macOS GPUs:
 
-*   [Getting started guide](https://developer.apple.com/metal/tensorflow-plugin/){:.external}.
+*   Works with TF 2.5 or later.
+*   [Getting started guide](https://developer.apple.com/metal/tensorflow-plugin/).
 *   For questions and feedback, please visit the
-    [Apple Developer Forum](https://developer.apple.com/forums/tags/tensorflow-metal){:.external}.
+    [Apple Developer Forum](https://developer.apple.com/forums/tags/tensorflow-metal).
+
+DirectML `PluggableDevice` for Windows and WSL (preview):
+
+*   Works with `tensorflow-cpu` package, version 2.10 or later.
+*   [PyPI wheel](https://pypi.org/project/tensorflow-directml-plugin/).
+*   [GitHub repo](https://github.com/microsoft/tensorflow-directml-plugin).
+*   For questions, feedback or to raise issues, please visit the
+    [Issues page of `tensorflow-directml-plugin` on GitHub](https://github.com/microsoft/tensorflow-directml-plugin/issues).
+
+Intel® Extension for TensorFlow `PluggableDevice` for Linux and WSL:
+
+*   Works with TF 2.10 or later.
+*   [Getting started guide](https://intel.github.io/intel-extension-for-tensorflow/latest/get_started.html)
+*   [PyPI wheel](https://pypi.org/project/intel-extension-for-tensorflow/).
+*   [GitHub repo](https://github.com/intel/intel-extension-for-tensorflow).
+*   For questions, feedback, or to raise issues, please visit the
+    [Issues page of `intel-extension-for-tensorflow` on GitHub](https://github.com/intel/intel-extension-for-tensorflow/issues).
diff --git a/site/en/install/lang_c.ipynb b/site/en/install/lang_c.ipynb
new file mode 100644
index 00000000000..788a5e6c891
--- /dev/null
+++ b/site/en/install/lang_c.ipynb
@@ -0,0 +1,383 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Tce3stUlHN0L"
+      },
+      "source": [
+        "##### Copyright 2018 The TensorFlow Authors.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "tuOe1ymfHZPu"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "s7Bo2MipUnXX"
+      },
+      "source": [
+        "# Install TensorFlow for C"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Birwb-khUOIq"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/install/lang_c\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/install/lang_c.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/install/lang_c.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/install/lang_c.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kFmEkitOFJSw"
+      },
+      "source": [
+        "TensorFlow provides a C API that can be used to build\n",
+        "[bindings for other languages](https://github.com/tensorflow/docs/tree/master/site/en/r1/guide/extend/bindings.md).\n",
+        "The API is defined in\n",
+        "<a href=\"https://github.com/tensorflow/tensorflow/blob/master/tensorflow/c/c_api.h\" class=\"external\"><code>c_api.h</code></a>\n",
+        "and designed for simplicity and uniformity rather than convenience.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Vk--31hqIwSV"
+      },
+      "source": [
+        "## Nightly libtensorflow C packages\n",
+        "\n",
+        "libtensorflow packages are built nightly and uploaded to GCS for all supported\n",
+        "platforms. They are uploaded to the\n",
+        "[libtensorflow-nightly GCS bucket](https://storage.googleapis.com/libtensorflow-nightly)\n",
+        "and are indexed by operating system and date built. For MacOS and Linux shared\n",
+        "objects, there is a\n",
+        "[script](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/ci_build/builds/libtensorflow_nightly_symlink.sh)\n",
+        "that renames the `.so` files versioned to the current date copied into the\n",
+        "directory with the artifacts."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "qowtdsijFMYZ"
+      },
+      "source": [
+        "## Supported Platforms\n",
+        "\n",
+        "TensorFlow for C is supported on the following systems:\n",
+        "\n",
+        "* Linux, 64-bit, x86\n",
+        "* macOS, Version 10.12.6 (Sierra) or higher\n",
+        "* Windows, 64-bit x86"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hnhAk8y-FSBN"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "y50y01XUFVb2"
+      },
+      "source": [
+        "### Download and extract\n",
+        "\n",
+        "<table>\n",
+        "  <tr><th>TensorFlow C library</th><th>URL</th></tr>\n",
+        "  <tr class=\"alt\"><td colspan=\"2\">Linux\n",
+        "  <aside class=\"caution\">\n",
+        "      <p><b>Caution:</b> TensorFlow 2.16 was the last TensorFlow release that supported macOS x86.</p>\n",
+        "      <p><b>Caution:</b> TensorFlow 2.18 was the last release of Linux x86 libtensorflow packages.</p>\n",
+        "  </aside>\n",
+        "  </td></tr>\n",
+        "  <tr>\n",
+        "    <td>Linux CPU only</td>\n",
+        "    <td class=\"devsite-click-to-copy\"><a href=\"https://storage.googleapis.com/tensorflow/versions/2.18.0/libtensorflow-cpu-linux-x86_64.tar.gz\">https://storage.googleapis.com/tensorflow/versions/2.18.0/libtensorflow-cpu-linux-x86_64.tar.gz</a></td>\n",
+        "  </tr>\n",
+        "  <tr>\n",
+        "    <td>Linux GPU support</td>\n",
+        "    <td class=\"devsite-click-to-copy\"><a href=\"https://storage.googleapis.com/tensorflow/versions/2.18.0/libtensorflow-gpu-linux-x86_64.tar.gz\">https://storage.googleapis.com/tensorflow/versions/2.18.0/libtensorflow-gpu-linux-x86_64.tar.gz</a></td>\n",
+        "  </tr>\n",
+        "  <tr class=\"alt\"><td colspan=\"2\">macOS\n",
+        "    <aside class=\"caution\">\n",
+        "      <p><b>Caution:</b> TensorFlow 2.16 was the last TensorFlow release that supported macOS x86.</p>\n",
+        "      <p><b>Caution:</b> TensorFlow 2.18 was the last release of Mac Arm64 Libtensorflow packages.</p>\n",
+        "    </aside>\n",
+        "  </td></tr>\n",
+        "  <tr>\n",
+        "    <td>macOS CPU only</td>\n",
+        "    <td class=\"devsite-click-to-copy\"><a href=\"https://storage.googleapis.com/tensorflow/versions/2.16.2/libtensorflow-cpu-darwin-x86_64.tar.gz\">https://storage.googleapis.com/tensorflow/versions/2.16.2/libtensorflow-cpu-darwin-x86_64.tar.gz</a></td>\n",
+        "  </tr>\n",
+        "    <td>macOS ARM64 CPU only</td>\n",
+        "    <td class=\"devsite-click-to-copy\"><a href=\"https://storage.googleapis.com/tensorflow/versions/2.18.0/libtensorflow-cpu-darwin-arm64.tar.gz\">https://storage.googleapis.com/tensorflow/versions/2.18.0/libtensorflow-cpu-darwin-arm64.tar.gz</a></td>\n",
+        "  </tr>\n",
+        "  <tr class=\"alt\"><td colspan=\"2\">Windows\n",
+        "    <aside class=\"caution\">\n",
+        "      <p><b>Caution:</b> TensorFlow 2.10 was the last TensorFlow release that supported GPU on native-Windows.</p>\n",
+        "      <p><b>Caution:</b> TensorFlow 2.18 was the last release of Windows x86 libtensorflow packages.</p>\n",
+        "    </aside>\n",
+        "  </td></tr>\n",
+        "  <tr>\n",
+        "    <td>Windows CPU only</td>\n",
+        "    <td class=\"devsite-click-to-copy\"><a href=\"https://storage.googleapis.com/tensorflow/versions/2.18.1/libtensorflow-cpu-windows-x86_64.zip\">https://storage.googleapis.com/tensorflow/versions/2.18.1/libtensorflow-cpu-windows-x86_64.zip</a></td>\n",
+        "  </tr>\n",
+        "  <tr>\n",
+        "    <td>Windows GPU only</td>\n",
+        "    <td class=\"devsite-click-to-copy\"><a href=\"https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-gpu-windows-x86_64-2.10.0.zip\">https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-gpu-windows-x86_64-2.10.0.zip</a></td>\n",
+        "  </tr>\n",
+        "</table><br>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "b4kWu6k0FaT9"
+      },
+      "source": [
+        "Extract the downloaded archive, which contains the header files to include in\n",
+        "your C program and the shared libraries to link against.\n",
+        "\n",
+        "On Linux and macOS, you may want to extract to `/usr/local/lib`:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "DrjVyjVJFcon"
+      },
+      "outputs": [],
+      "source": [
+        "%%bash\n",
+        "FILENAME=libtensorflow-cpu-linux-x86_64.tar.gz\n",
+        "wget -q --no-check-certificate https://storage.googleapis.com/tensorflow/versions/2.18.1/${FILENAME}\n",
+        "sudo tar -C /usr/local -xzf ${FILENAME}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fcBJDdojJDyk"
+      },
+      "source": [
+        "### Linker\n",
+        "\n",
+        "On Linux/macOS, if you extract the TensorFlow C library to a system directory,\n",
+        "such as `/usr/local`, configure the linker with `ldconfig`:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "h0STAG82JDZs"
+      },
+      "outputs": [],
+      "source": [
+        "%%bash\n",
+        "sudo ldconfig /usr/local/lib"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ix4HdnNGH6aF"
+      },
+      "source": [
+        "If you extract the TensorFlow C library to a non-system directory, such as\n",
+        "`~/mydir`, then configure the linker environmental variables:"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "E6E99eJzIJQs"
+      },
+      "source": [
+        "<div class=\"ds-selector-tabs\">\n",
+        "<section>\n",
+        "<h3>Linux</h3>\n",
+        "<pre class=\"prettyprint lang-bsh\">\n",
+        "export LIBRARY_PATH=$LIBRARY_PATH:~/mydir/lib\n",
+        "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:~/mydir/lib\n",
+        "</pre>\n",
+        "</section>\n",
+        "<section>\n",
+        "<h3>macOS</h3>\n",
+        "<pre class=\"prettyprint lang-bsh\">\n",
+        "export LIBRARY_PATH=$LIBRARY_PATH:~/mydir/lib\n",
+        "export DYLD_LIBRARY_PATH=$DYLD_LIBRARY_PATH:~/mydir/lib\n",
+        "</pre>\n",
+        "</section>\n",
+        "</div><!--/ds-selector-tabs-->"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "qYVWjxqaJVPs"
+      },
+      "source": [
+        "## Build"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "UoMUuMJrJXp8"
+      },
+      "source": [
+        "### Example program\n",
+        "\n",
+        "With the TensorFlow C library installed, create an example program with the\n",
+        "following source code (`hello_tf.c`):"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "b5851f1b"
+      },
+      "outputs": [],
+      "source": [
+        "%%writefile hello_tf.c\n",
+        "#include <stdio.h>\n",
+        "#include <tensorflow/c/c_api.h>\n",
+        "\n",
+        "int main() {\n",
+        "  printf(\"Hello from TensorFlow C library version %s\\n\", TF_Version());\n",
+        "  return 0;\n",
+        "}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "H1GFidbrIWzU"
+      },
+      "source": [
+        "### Compile\n",
+        "\n",
+        "Compile the example program to create an executable, then run:\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Jph67SAjIX0M"
+      },
+      "outputs": [],
+      "source": [
+        "%%bash\n",
+        "gcc hello_tf.c -ltensorflow -o hello_tf\n",
+        "\n",
+        "./hello_tf"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0qtHXROoJwoz"
+      },
+      "source": [
+        "Success: The TensorFlow C library is configured.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YbqbjqOSJ0IL"
+      },
+      "source": [
+        "If the program doesn't build, make sure that `gcc` can access the TensorFlow C\n",
+        "library. If extracted to `/usr/local`, explicitly pass the library location to\n",
+        "the compiler:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "CdPmM35VJ_77"
+      },
+      "outputs": [],
+      "source": [
+        "%%bash\n",
+        "gcc -I/usr/local/include -L/usr/local/lib hello_tf.c -ltensorflow -o hello_tf\n",
+        "\n",
+        "./hello_tf"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ea5fd208"
+      },
+      "source": [
+        "## Build from source\n",
+        "\n",
+        "TensorFlow is open source. Read\n",
+        "[the instructions](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/lib_package/README.md)\n",
+        "to build TensorFlow's C library from source code."
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "name": "lang_c.ipynb",
+      "provenance": [],
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/install/lang_c.md b/site/en/install/lang_c.md
deleted file mode 100644
index 4b3e3d8ebe4..00000000000
--- a/site/en/install/lang_c.md
+++ /dev/null
@@ -1,144 +0,0 @@
-# Install TensorFlow for C
-
-TensorFlow provides a C API that can be used to build
-[bindings for other languages](https://github.com/tensorflow/docs/tree/master/site/en/r1/guide/extend/bindings.md).
-The API is defined in
-<a href="https://github.com/tensorflow/tensorflow/blob/master/tensorflow/c/c_api.h" class="external"><code>c_api.h</code></a>
-and designed for simplicity and uniformity rather than convenience.
-
-## Nightly Libtensorflow C packages
-
-Libtensorflow packages are built nightly and uploaded to GCS for all supported
-platforms. They are uploaded to the
-[libtensorflow-nightly GCS bucket](https://storage.googleapis.com/libtensorflow-nightly)
-and are indexed by operating system and date built. For MacOS and Linux shared
-objects, we have a
-[script](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/ci_build/builds/libtensorflow_nightly_symlink.sh)
-that renames the .so files versioned to the current date copied into the
-directory with the artifacts.
-
-## Supported Platforms
-
-TensorFlow for C is supported on the following systems:
-
-* Linux, 64-bit, x86
-* macOS, Version 10.12.6 (Sierra) or higher
-* Windows, 64-bit x86
-
-## Setup
-
-### Download
-
-<table>
-  <tr><th>TensorFlow C library</th><th>URL</th></tr>
-  <tr class="alt"><td colspan="2">Linux</td></tr>
-  <tr>
-    <td>Linux CPU only</td>
-    <td class="devsite-click-to-copy"><a href="https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-linux-x86_64-2.6.0.tar.gz">https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-linux-x86_64-2.6.0.tar.gz</a></td>
-  </tr>
-  <tr>
-    <td>Linux GPU support</td>
-    <td class="devsite-click-to-copy"><a href="https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-gpu-linux-x86_64-2.6.0.tar.gz">https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-gpu-linux-x86_64-2.6.0.tar.gz</a></td>
-  </tr>
-  <tr class="alt"><td colspan="2">macOS</td></tr>
-  <tr>
-    <td>macOS CPU only</td>
-    <td class="devsite-click-to-copy"><a href="https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-darwin-x86_64-2.6.0.tar.gz">https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-darwin-x86_64-2.6.0.tar.gz</a></td>
-  </tr>
-  <tr class="alt"><td colspan="2">Windows</td></tr>
-  <tr>
-    <td>Windows CPU only</td>
-    <td class="devsite-click-to-copy"><a href="https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-windows-x86_64-2.6.0.zip">https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-windows-x86_64-2.6.0.zip</a></td>
-  </tr>
-  <tr>
-    <td>Windows GPU only</td>
-    <td class="devsite-click-to-copy"><a href="https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-gpu-windows-x86_64-2.6.0.zip">https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-gpu-windows-x86_64-2.6.0.zip</a></td>
-  </tr>
-</table>
-
-### Extract
-
-Extract the downloaded archive, which contains the header files to include in
-your C program and the shared libraries to link against.
-
-On Linux and macOS, you may want to extract to `/usr/local/lib`:
-
-<pre class="devsite-terminal devsite-click-to-copy">
-sudo tar -C /usr/local -xzf <var>(downloaded file)</var>
-</pre>
-
-### Linker
-
-On Linux/macOS, if you extract the TensorFlow C library to a system directory,
-such as `/usr/local`, configure the linker with `ldconfig`:
-
-<pre class="devsite-terminal devsite-click-to-copy">
-sudo ldconfig
-</pre>
-
-If you extract the TensorFlow C library to a non-system directory, such as
-`~/mydir`, then configure the linker environmental variables:
-
-<div class="ds-selector-tabs">
-<section>
-<h3>Linux</h3>
-<pre class="prettyprint lang-bsh">
-export LIBRARY_PATH=$LIBRARY_PATH:~/mydir/lib
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:~/mydir/lib
-</pre>
-</section>
-<section>
-<h3>macOS</h3>
-<pre class="prettyprint lang-bsh">
-export LIBRARY_PATH=$LIBRARY_PATH:~/mydir/lib
-export DYLD_LIBRARY_PATH=$DYLD_LIBRARY_PATH:~/mydir/lib
-</pre>
-</section>
-</div><!--/ds-selector-tabs-->
-
-
-## Build
-
-### Example program
-
-With the TensorFlow C library installed, create an example program with the
-following source code (`hello_tf.c`):
-
-```c
-#include <stdio.h>
-#include <tensorflow/c/c_api.h>
-
-int main() {
-  printf("Hello from TensorFlow C library version %s\n", TF_Version());
-  return 0;
-}
-```
-
-### Compile
-
-Compile the example program to create an executable, then run:
-
-<pre class="prettyprint lang-bsh">
-<code class="devsite-terminal">gcc hello_tf.c -ltensorflow -o hello_tf</code>
-
-<code class="devsite-terminal">./hello_tf</code>
-</pre>
-
-The command outputs: <code>Hello from TensorFlow C library version <em>number</em></code>
-
-Success: The TensorFlow C library is configured.
-
-If the program doesn't build, make sure that `gcc` can access the TensorFlow C
-library. If extracted to `/usr/local`, explicitly pass the library location to
-the compiler:
-
-<pre class="devsite-terminal devsite-click-to-copy">
-gcc -I/usr/local/include -L/usr/local/lib hello_tf.c -ltensorflow -o hello_tf
-</pre>
-
-
-## Build from source
-
-TensorFlow is open source. Read
-[the instructions](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/lib_package/README.md){:.external}
-to build TensorFlow's C library from source code.
diff --git a/site/en/install/lang_java_legacy.md b/site/en/install/lang_java_legacy.md
index af177dc0950..37341c36659 100644
--- a/site/en/install/lang_java_legacy.md
+++ b/site/en/install/lang_java_legacy.md
@@ -1,7 +1,7 @@
 # Install TensorFlow for Java
 
 Warning: TensorFlow for Java is deprecated and will be removed in a future
-version of TensorFlow once <a href=/java>the replacement</a> is stable.
+version of TensorFlow once [the replacement](https://www.tensorflow.org/jvm) is stable.
 
 TensorFlow provides a
 [Java API](https://www.tensorflow.org/api_docs/java/reference/org/tensorflow/package-summary)—
@@ -27,7 +27,7 @@ To use TensorFlow on Android see [TensorFlow Lite](https://tensorflow.org/lite)
 
 ## TensorFlow with Apache Maven
 
-To use TensorFlow with [Apache Maven](https://maven.apache.org){:.external},
+To use TensorFlow with [Apache Maven](https://maven.apache.org),
 add the dependency to the project's `pom.xml` file:
 
 ```xml
@@ -40,7 +40,7 @@ add the dependency to the project's `pom.xml` file:
 
 ### GPU support
 
-If your system has [GPU support](./gpu.md), add the following TensorFlow
+If your system has [GPU support](./pip.md), add the following TensorFlow
 dependencies to the project's `pom.xml` file:
 
 ```xml
@@ -167,11 +167,11 @@ system and processor support:
 Note: On Windows, the native library (`tensorflow_jni.dll`) requires
 `msvcp140.dll` at runtime. See the
 [Windows build from source](./source_windows.md) guide to install the
-[Visual C++ 2019 Redistributable](https://visualstudio.microsoft.com/vs/){:.external}.
+[Visual C++ 2019 Redistributable](https://visualstudio.microsoft.com/vs/).
 
 ### Compile
 
-Using the `HelloTensorFlow.java` file from the [previous example](#example),
+Using the `HelloTensorFlow.java` file from the [previous example](#example-program),
 compile a program that uses TensorFlow. Make sure the `libtensorflow.jar` is
 accessible to your `classpath`:
 
@@ -203,5 +203,5 @@ Success: TensorFlow for Java is configured.
 ## Build from source
 
 TensorFlow is open source. Read
-[the instructions](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/java/README.md){:.external}
+[the instructions](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/java/README.md)
 to build TensorFlow's Java and native libraries from source code.
diff --git a/site/en/install/pip.html b/site/en/install/pip.html
deleted file mode 100644
index 3bd415aad9c..00000000000
--- a/site/en/install/pip.html
+++ /dev/null
@@ -1,350 +0,0 @@
-<html devsite>
-  <head>
-    <title>Install TensorFlow with pip</title>
-    <meta name="project_path" value="/_project.yaml" />
-    <meta name="book_path" value="/_book.yaml" />
-    <link rel="stylesheet" href="/site-assets/css/style.css">
-  </head>
-  <body>
-
-<h3>TensorFlow 2 packages are available</h3>
-<ul>
-  <li><code>tensorflow</code> —Latest stable release with CPU and <a href="./gpu">GPU support</a> <em>(Ubuntu and Windows)</em></li>
-  <li><code>tf-nightly</code> —Preview build <em>(unstable)</em>. Ubuntu and Windows include <a href="./gpu">GPU support</a>. </li>
-</ul>
-
-
-<h3>Older versions of TensorFlow</h3>
-
-<p>For TensorFlow 1.x, CPU and GPU packages are separate:</p>
-
-<ul>
-  <li><code>tensorflow==1.15</code> —Release for CPU-only</li>
-  <li><code>tensorflow-gpu==1.15</code> —Release with <a href="./gpu">GPU support</a> <em>(Ubuntu and Windows)</em></li>
-</ul>
-
-
-<h3>System requirements</h3>
-<ul>
-  <li>Python 3.6–3.9
-    <ul>
-      <li>Python 3.9 support requires TensorFlow 2.5 or later.</li>
-      <li>Python 3.8 support requires TensorFlow 2.2 or later.</li>
-    </ul>
-  </li>
-  <li>pip 19.0 or later (requires <code>manylinux2010</code> support)</li>
-  <li>Ubuntu 16.04 or later (64-bit)</li>
-  <li>macOS 10.12.6 (Sierra) or later (64-bit) <em>(no GPU support)</em>
-    <ul>
-      <li>macOS requires pip 20.3 or later</li>
-    </ul>
-  </li>
-  <li>Windows 7 or later (64-bit)
-    <ul>
-      <li><a href="https://support.microsoft.com/help/2977003/the-latest-supported-visual-c-downloads">Microsoft Visual C++
-          Redistributable for Visual Studio 2015, 2017 and 2019</a></li>
-    </ul>
-  </li>
-  <li><a href="./gpu">GPU support</a> requires a CUDA®-enabled card <em>(Ubuntu and Windows)</em></li>
-</ul>
-
-<aside class="note">
-  <b>Note:</b> Installing TensorFlow 2 <em>requires</em> a newer version of <code>pip</code>.
-</aside>
-
-<h3>Hardware requirements</h3>
-<ul>
-  <li>Starting with TensorFlow 1.6, binaries use <a href="https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#CPUs_with_AVX" class="external">AVX instructions</a> which may not run on older CPUs.</li>
-  <li>Read the <a href="./gpu">GPU support guide</a> to set up a CUDA®-enabled GPU card on Ubuntu or Windows.</li>
-</ul>
-
-
-<h2>1. Install the Python development environment on your system</h2>
-
-<p>
-  Check if your Python environment is already configured:
-</p>
-
-<aside class="note">Requires Python 3.6–3.9, and pip &gt;= 19.0</aside>
-
-<pre class="prettyprint lang-bsh">
-<code class="devsite-terminal">python3 --version</code>
-<code class="devsite-terminal">pip3 --version</code>
-</pre>
-
-<p>
-  If these packages are already installed, skip to the next step.<br/>
-  Otherwise, install <a href="https://www.python.org/" class="external">Python</a>, the
-  <a href="https://pip.pypa.io/en/stable/installing/" class="external">pip package manager</a>,
-  and <a href="https://docs.python.org/3/library/venv.html" class="external">venv</a>:
-</p>
-
-<div class="ds-selector-tabs">
-<section>
-<h3>Ubuntu</h3>
-<pre class="prettyprint lang-bsh">
-<code class="devsite-terminal">sudo apt update</code>
-<code class="devsite-terminal">sudo apt install python3-dev python3-pip python3-venv</code>
-</pre>
-</section>
-
-<section>
-<h3>macOS</h3>
-<p>Install using the <a href="https://brew.sh/" class="external">Homebrew</a> package manager:</p>
-<pre class="prettyprint lang-bsh">
-<code class="devsite-terminal">/usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"</code>
-<code class="devsite-terminal">export PATH="/usr/local/opt/python/libexec/bin:$PATH"</code>
-<code class="devsite-terminal"># if you are on macOS 10.12 (Sierra) use `export PATH="/usr/local/bin:/usr/local/sbin:$PATH"`</code>
-<code class="devsite-terminal">brew update</code>
-<code class="devsite-terminal">brew install python  # Python 3</code>
-</pre>
-</section>
-
-<section>
-<h3>Windows</h3>
-<p>
-  Install the <em>Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017,
-  and 2019</em>. Starting with the TensorFlow 2.1.0 version, the <code>msvcp140_1.dll</code>
-  file is required from this package (which may not be provided from older redistributable packages).
-  The redistributable comes with <em>Visual Studio 2019</em> but can be installed separately:
-</p>
-<ol>
-<li>Go to the <a href="https://support.microsoft.com/help/2977003/the-latest-supported-visual-c-downloads" class="external">Microsoft Visual C++ downloads</a>,</li>
-<li>Scroll down the page to the <em>Visual Studio 2015, 2017 and 2019</em> section.</li>
-<li>Download and install the <em>Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017 and 2019</em> for your platform.</li>
-</ol>
-<p>Make sure <a href="https://superuser.com/questions/1119883/windows-10-enable-ntfs-long-paths-policy-option-missing" class="external">long paths are enabled</a> on Windows.</p>
-<p>Install the <em>64-bit</em> <a href="https://www.python.org/downloads/windows/" class="external">Python 3 release for Windows</a> (select <code>pip</code> as an optional feature).</p>
-</section>
-
-<section>
-<h3>Other</h3>
-<pre class="prettyprint lang-bsh">
-<code class="devsite-terminal">curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py</code>
-<code class="devsite-terminal">python get-pip.py</code>
-</pre>
-</section>
-</div><!--/ds-selector-tabs-->
-
-<aside class="caution"><b>Caution:</b> Upgrading the <em>system pip</em> can cause
-  <a href="https://github.com/pypa/pip/issues/5599" class="external">problems</a>.<br/>
-  If <em>not</em> in a virtual environment, use <code>python3 -m pip</code> for
-  the commands below. This ensures that you upgrade and use the
-  <em>Python pip</em> instead of the <em>system pip</em>.
-</aside>
-
-
-<h2>2. Create a virtual environment (recommended)</h2>
-
-<p>
-  Python virtual environments are used to isolate package installation from the system.
-</p>
-
-<div class="ds-selector-tabs">
-<section>
-<h3>Ubuntu / macOS</h3>
-<p>
-  Create a new virtual environment by choosing a Python interpreter and making a
-  <code>./venv</code> directory to hold it:
-</p>
-<pre class="devsite-terminal devsite-click-to-copy">python3 -m venv --system-site-packages <var>./venv</var></pre>
-<p>
-  Activate the virtual environment using a shell-specific command:
-</p>
-<pre class="devsite-terminal prettyprint lang-bsh">source <var>./venv</var>/bin/activate  # sh, bash, or zsh</pre>
-<pre class="devsite-terminal prettyprint lang-bsh">. <var>./venv</var>/bin/activate.fish  # fish</pre>
-<pre class="devsite-terminal prettyprint lang-bsh">source <var>./venv</var>/bin/activate.csh  # csh or tcsh</pre>
-
-<p>
-  When the virtual environment is active, your shell prompt is prefixed with <code>(venv)</code>.
-</p>
-<p>
-  Install packages within a virtual environment without affecting the host system
-  setup. Start by upgrading <code>pip</code>:
-</p>
-<pre class="prettyprint lang-bsh">
-<code class="devsite-terminal tfo-terminal-venv">pip install --upgrade pip</code>
-
-<code class="devsite-terminal tfo-terminal-venv">pip list  # show packages installed within the virtual environment</code>
-</pre>
-<p>
-  And to exit the virtual environment later:
-</p>
-<pre class="devsite-terminal tfo-terminal-venv prettyprint lang-bsh">deactivate  # don't exit until you're done using TensorFlow</pre>
-</section>
-
-
-<section>
-  <h3>Windows</h3>
-  <p>
-  Create a new virtual environment by choosing a Python interpreter and making a
-  <code>.\venv</code> directory to hold it:
-</p>
-<pre class="devsite-terminal tfo-terminal-windows devsite-click-to-copy">python -m venv --system-site-packages <var>.\venv</var></pre>
-<p>
-  Activate the virtual environment:
-</p>
-<pre class="devsite-terminal tfo-terminal-windows-venv"><var>.\venv</var>\Scripts\activate</pre>
-<p>
-  Install packages within a virtual environment without affecting the host system
-  setup. Start by upgrading <code>pip</code>:
-</p>
-<pre class="prettyprint lang-bsh">
-<code class="devsite-terminal tfo-terminal-windows-venv">pip install --upgrade pip</code>
-
-<code class="devsite-terminal tfo-terminal-windows-venv">pip list  # show packages installed within the virtual environment</code>
-</pre>
-<p>
-  And to exit the virtual environment later:
-</p>
-<pre class="devsite-terminal tfo-terminal-windows-venv prettyprint lang-bsh">deactivate  # don't exit until you're done using TensorFlow</pre>
-</section>
-
-
-<section>
-<h3>Conda</h3>
-<p>
-While the TensorFlow provided <em>pip</em> package is recommended, a
-<em>community-supported</em> <a href="https://anaconda.org/anaconda/tensorflow" class="external">Anaconda package</a>
-is available. To install, read the <a href="https://docs.anaconda.com/anaconda/user-guide/tasks/tensorflow/" class="external">Anaconda TensorFlow guide</a>.
-</p>
-</section>
-</div><!--/ds-selector-tabs-->
-
-
-<h2>3. Install the TensorFlow pip package</h2>
-
-<p>
-  Choose one of the following TensorFlow packages to install <a href="https://pypi.org/project/tensorflow/" class="external">from PyPI</a>:
-</p>
-
-<ul>
-  <li><code>tensorflow</code> —Latest stable release with CPU and <a href="./gpu">GPU support</a> <em>(Ubuntu and Windows)</em>.</li>
-  <li><code>tf-nightly</code> —Preview build <em>(unstable)</em>. Ubuntu and Windows include <a href="./gpu">GPU support</a>.</li>
-  <li><code>tensorflow==1.15</code> —The final version of TensorFlow 1.x.</li>
-</ul>
-
-<aside class="note">
-  Package dependencies are automatically installed. These are listed in the
-  <a href="https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/pip_package/setup.py" class="external"><code>setup.py</code></a>
-  file under <code>REQUIRED_PACKAGES</code>.
-</aside>
-
-<div class="ds-selector-tabs">
-<section>
-<h3>Virtual environment install</h3>
-<pre class="devsite-terminal tfo-terminal-venv devsite-click-to-copy prettyprint lang-bsh">pip install --upgrade tensorflow</pre>
-<p>Verify the install:</p>
-<pre class="devsite-terminal tfo-terminal-venv devsite-click-to-copy prettyprint lang-bsh">python -c "import tensorflow as tf;print(tf.reduce_sum(tf.random.normal([1000, 1000])))"</pre>
-</section>
-
-<section>
-<h3>System install</h3>
-<pre class="devsite-terminal devsite-click-to-copy prettyprint lang-bsh">pip3 install --user --upgrade tensorflow  # install in $HOME</pre>
-<p>Verify the install:</p>
-<pre class="devsite-terminal devsite-click-to-copy prettyprint lang-bsh">python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))"</pre>
-</section>
-</div><!--/ds-selector-tabs-->
-
-<aside class="success"><b>Success:</b> If a tensor is returned, you've installed TensorFlow successfully. Read the <a href="../tutorials">tutorials</a> to get started.</aside>
-
-<h2>Package location</h2>
-
-<p>
-  A few installation mechanisms require the URL of the TensorFlow Python package.
-  The value you specify depends on your Python version.
-</p>
-
-
-<table>
-  <tr><th>Version</th><th>URL</th></tr>
-  <tr class="alt"><td colspan="2">Linux</td></tr>
-  <tr>
-    <td>Python 3.6 GPU&nbsp;support</td>
-    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-2.6.0-cp36-cp36m-manylinux2010_x86_64.whl</td>
-  </tr>
-  <tr>
-    <td>Python 3.6 CPU-only</td>
-    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow_cpu-2.6.0-cp36-cp36m-manylinux2010_x86_64.whl</td>
-  </tr>
-  <tr>
-    <td>Python 3.7 GPU&nbsp;support</td>
-    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-2.6.0-cp37-cp37m-manylinux2010_x86_64.whl</td>
-  </tr>
-  <tr>
-    <td>Python 3.7 CPU-only</td>
-    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow_cpu-2.6.0-cp37-cp37m-manylinux2010_x86_64.whl</td>
-  </tr>
-  <tr>
-    <td>Python 3.8 GPU&nbsp;support</td>
-    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-2.6.0-cp38-cp38-manylinux2010_x86_64.whl</td>
-  </tr>
-  <tr>
-    <td>Python 3.8 CPU-only</td>
-    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow_cpu-2.6.0-cp38-cp38-manylinux2010_x86_64.whl</td>
-  </tr>
-  <tr>
-    <td>Python 3.9 GPU&nbsp;support</td>
-    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-2.6.0-cp39-cp39-manylinux2010_x86_64.whl</td>
-  </tr>
-  <tr>
-    <td>Python 3.9 CPU-only</td>
-    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow_cpu-2.6.0-cp39-cp39-manylinux2010_x86_64.whl</td>
-  </tr>
-
-  <tr class="alt"><td colspan="2">macOS (CPU-only)</td></tr>
-  <tr>
-    <td>Python 3.6</td>
-    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-2.6.0-cp36-cp36m-macosx_10_11_x86_64.whl</td>
-  </tr>
-  <tr>
-    <td>Python 3.7</td>
-    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-2.6.0-cp37-cp37m-macosx_10_11_x86_64.whl</td>
-  </tr>
-  <tr>
-    <td>Python 3.8</td>
-    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-2.6.0-cp38-cp38-macosx_10_11_x86_64.whl</td>
-  </tr>
-  <tr>
-    <td>Python 3.9</td>
-    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-2.6.0-cp39-cp39-macosx_10_11_x86_64.whl</td>
-  </tr>
-
-  <tr class="alt"><td colspan="2">Windows</td></tr>
-  <tr>
-    <td>Python 3.6 GPU&nbsp;support</td>
-    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/windows/gpu/tensorflow_gpu-2.6.0-cp36-cp36m-win_amd64.whl</td>
-  </tr>
-  <tr>
-    <td>Python 3.6 CPU-only</td>
-    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/windows/cpu/tensorflow_cpu-2.6.0-cp36-cp36m-win_amd64.whl</td>
-  </tr>
-  <tr>
-    <td>Python 3.7 GPU&nbsp;support</td>
-    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/windows/gpu/tensorflow_gpu-2.6.0-cp37-cp37m-win_amd64.whl</td>
-  </tr>
-  <tr>
-    <td>Python 3.7 CPU-only</td>
-    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/windows/cpu/tensorflow_cpu-2.6.0-cp37-cp37m-win_amd64.whl</td>
-  </tr>
-  <tr>
-    <td>Python 3.8 GPU&nbsp;support</td>
-    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/windows/gpu/tensorflow_gpu-2.6.0-cp38-cp38-win_amd64.whl</td>
-  </tr>
-  <tr>
-    <td>Python 3.8 CPU-only</td>
-    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/windows/cpu/tensorflow_cpu-2.6.0-cp38-cp38-win_amd64.whl</td>
-  </tr>
-  <tr>
-    <td>Python 3.9 GPU&nbsp;support</td>
-    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/windows/gpu/tensorflow_gpu-2.6.0-cp39-cp39-win_amd64.whl</td>
-  </tr>
-  <tr>
-    <td>Python 3.9 CPU-only</td>
-    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/windows/cpu/tensorflow_cpu-2.6.0-cp39-cp39-win_amd64.whl</td>
-  </tr>
-
-</table>
-
-</body>
-</html>
diff --git a/site/en/install/pip.md b/site/en/install/pip.md
new file mode 100644
index 00000000000..a9e4bf4bf74
--- /dev/null
+++ b/site/en/install/pip.md
@@ -0,0 +1,658 @@
+
+# Install TensorFlow with pip
+<!-- mdformat global-off -->
+
+This guide is for the latest stable version of TensorFlow. For the
+preview build *(nightly)*, use the pip package named
+`tf-nightly`. Refer to [these tables](./source#tested_build_configurations) for
+older TensorFlow version requirements. For the CPU-only build, use the pip
+package named `tensorflow-cpu`.
+
+Here are the quick versions of the install commands. Scroll down for the
+step-by-step instructions.
+
+* {Linux}
+
+    Note: Starting with TensorFlow `2.10`, Linux CPU-builds for Aarch64/ARM64
+    processors are built, maintained, tested and released by a third party:
+    [AWS](https://aws.amazon.com/).
+    Installing the [`tensorflow`](https://pypi.org/project/tensorflow/)
+    package on an ARM machine installs AWS's
+    [`tensorflow-cpu-aws`](https://pypi.org/project/tensorflow-cpu-aws/) package.
+    They are provided as-is. Tensorflow will use reasonable efforts to maintain
+    the availability and integrity of this pip package. There may be delays if
+    the third party fails to release the pip package. See
+    [this blog post](https://blog.tensorflow.org/2022/09/announcing-tensorflow-official-build-collaborators.html)
+    for more information about this collaboration.
+
+    ```bash
+    python3 -m pip install 'tensorflow[and-cuda]'
+    # Verify the installation:
+    python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))"
+    ```
+
+* {MacOS}
+
+    ```bash
+    # There is currently no official GPU support for MacOS.
+    python3 -m pip install tensorflow
+    # Verify the installation:
+    python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))"
+    ```
+
+* {Windows Native}
+
+    Caution: TensorFlow `2.10` was the **last** TensorFlow release that
+    supported GPU on native-Windows.
+    Starting with TensorFlow `2.11`, you will need to install
+    [TensorFlow in WSL2](https://tensorflow.org/install/pip#windows-wsl2),
+    or install `tensorflow` or `tensorflow-cpu` and, optionally, try the
+    [TensorFlow-DirectML-Plugin](https://github.com/microsoft/tensorflow-directml-plugin#tensorflow-directml-plugin-)
+
+    ```bash
+    conda install -c conda-forge cudatoolkit=11.2 cudnn=8.1.0
+    # Anything above 2.10 is not supported on the GPU on Windows Native
+    python -m pip install "tensorflow<2.11"
+    # Verify the installation:
+    python -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))"
+    ```
+
+* {Windows WSL2}
+
+    Note: TensorFlow with GPU access is supported for WSL2 on Windows 10 19044 or
+    higher. This corresponds to Windows 10 version 21H2, the November 2021
+    update. You can get the latest update from here:
+    [Download Windows 10](https://www.microsoft.com/software-download/windows10).
+    For instructions, see
+    [Install WSL2](https://docs.microsoft.com/windows/wsl/install)
+    and
+    [NVIDIA’s setup docs](https://docs.nvidia.com/cuda/wsl-user-guide/index.html)
+    for CUDA in WSL.
+
+    ```bash
+    python3 -m pip install tensorflow[and-cuda]
+    # Verify the installation:
+    python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))"
+    ```
+
+* {CPU}
+
+    Note: Starting with TensorFlow `2.10`, Windows CPU-builds for x86/x64
+    processors are built, maintained, tested and released by a third party:
+    [Intel](https://www.intel.com/).
+    Installing the Windows-native [`tensorflow`](https://pypi.org/project/tensorflow/)
+    or [`tensorflow-cpu`](https://pypi.org/project/tensorflow-cpu/)
+    package installs Intel's
+    [`tensorflow-intel`](https://pypi.org/project/tensorflow-intel/)
+    package. These packages are provided as-is. Tensorflow will use reasonable
+    efforts to maintain the availability and integrity of this pip package.
+    There may be delays if the third party fails to release the pip package. See
+    [this blog post](https://blog.tensorflow.org/2022/09/announcing-tensorflow-official-build-collaborators.html)
+    for more information about this
+    collaboration.
+
+    ```bash
+    python3 -m pip install tensorflow
+    # Verify the installation:
+    python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))"
+    ```
+
+* {Nightly}
+
+    ```bash
+    python3 -m pip install tf-nightly
+    # Verify the installation:
+    python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))"
+    ```
+
+## Hardware requirements
+
+Note: TensorFlow binaries use
+[AVX instructions](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#CPUs_with_AVX)
+which may not run on older CPUs.
+
+The following GPU-enabled devices are supported:
+
+*   NVIDIA® GPU card with CUDA® architectures 3.5, 5.0, 6.0, 7.0, 7.5, 8.0 and
+    higher. See the list of
+    [CUDA®-enabled GPU cards](https://developer.nvidia.com/cuda-gpus).
+*   For GPUs with unsupported CUDA® architectures, or to avoid JIT compilation
+    from PTX, or to use different versions of the NVIDIA® libraries, see the
+    [Linux build from source](./source.md) guide.
+*   Packages do not contain PTX code except for the latest supported CUDA®
+    architecture; therefore, TensorFlow fails to load on older GPUs when
+    `CUDA_FORCE_PTX_JIT=1` is set. (See
+    [Application Compatibility](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#application-compatibility)
+    for details.) 
+
+Note: The error message "Status: device kernel image is invalid" indicates that
+the TensorFlow package does not contain PTX for your architecture. You can
+enable compute capabilities by [building TensorFlow from source](./source.md).
+
+## System requirements
+
+*   Ubuntu 16.04 or higher (64-bit)
+*   macOS 12.0 (Monterey) or higher (64-bit) *(no GPU support)*
+*   Windows Native - Windows 7 or higher (64-bit) *(no GPU support after TF 2.10)*
+*   Windows WSL2 - Windows 10 19044 or higher (64-bit)
+
+Note: GPU support is available for Ubuntu and Windows with CUDA®-enabled cards.
+
+## Software requirements
+
+*   Python 3.9–3.12
+*   pip version 19.0 or higher for Linux (requires `manylinux2014` support) and
+    Windows. pip version 20.3 or higher for macOS.
+*   Windows Native Requires
+    [Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017 and 2019](https://learn.microsoft.com/en-us/cpp/windows/latest-supported-vc-redist)
+
+
+The following NVIDIA® software are only required for GPU support.
+
+*   [NVIDIA® GPU drivers](https://www.nvidia.com/drivers)
+    * >= 525.60.13 for Linux
+    * >= 528.33 for WSL on Windows
+*   [CUDA® Toolkit 12.3](https://developer.nvidia.com/cuda-toolkit-archive).
+*   [cuDNN SDK 8.9.7](https://developer.nvidia.com/cudnn).
+*   *(Optional)*
+    [TensorRT](https://docs.nvidia.com/deeplearning/tensorrt/archives/index.html#trt_7)
+    to improve latency and throughput for inference.
+
+## Step-by-step instructions
+
+*   {Linux}
+
+    ### 1. System requirements
+
+    *  Ubuntu 16.04 or higher (64-bit)
+
+    TensorFlow only officially supports Ubuntu. However, the following
+    instructions may also work for other Linux distros.
+
+    Note: Starting with TensorFlow `2.10`, Linux CPU-builds for Aarch64/ARM64
+    processors are built, maintained, tested and released by a third party:
+    [AWS](https://aws.amazon.com/).
+    Installing the [`tensorflow`](https://pypi.org/project/tensorflow/)
+    package on an ARM machine installs AWS's
+    [`tensorflow-cpu-aws`](https://pypi.org/project/tensorflow-cpu-aws/) package.
+    They are provided as-is. Tensorflow will use reasonable efforts to maintain
+    the availability and integrity of this pip package. There may be delays if
+    the third party fails to release the pip package. See
+    [this blog post](https://blog.tensorflow.org/2022/09/announcing-tensorflow-official-build-collaborators.html)
+    for more information about this collaboration.
+
+    ### 2. GPU setup
+
+    You can skip this section if you only run TensorFlow on the CPU.
+
+    Install the
+    [NVIDIA GPU driver](https://www.nvidia.com/Download/index.aspx)
+    if you have not. You can use the following command to verify it is
+    installed.
+
+    ```bash
+    nvidia-smi
+    ```
+
+    ### 3. Create a virtual environment with [venv](https://docs.python.org/3/library/venv.html){:.external}
+      
+    The venv module is part of Python’s standard library and is the officially recommended way to create virtual environments. 
+    
+    Navigate to your desired virtual environments directory and create a new venv environment named `tf` with the following command.
+
+    ```bash
+    python3 -m venv tf 
+    ```
+
+    You can activate it with the following command.
+
+    ```bash
+    source tf/bin/activate    
+    ```
+
+    Make sure that the virtual environment is activated for the rest of the installation.
+    
+    ### 4. Install TensorFlow
+
+    TensorFlow requires a recent version of pip, so upgrade your pip
+    installation to be sure you're running the latest version.
+
+    ```bash
+    pip install --upgrade pip
+    ```
+
+    Then, install TensorFlow with pip.
+
+    ```bash
+    # For GPU users
+    pip install tensorflow[and-cuda]
+    # For CPU users
+    pip install tensorflow
+    ```
+
+    **Note:** Do not install TensorFlow with `conda`. It may not have the latest stable version. `pip` is recommended since TensorFlow is only officially released to PyPI.
+
+    ### 6. Verify the installation
+
+    Verify the CPU setup:
+
+    ```bash
+    python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))"
+    ```
+
+    If a tensor is returned, you've installed TensorFlow successfully.
+
+    Verify the GPU setup:
+
+    ```bash
+    python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))"
+    ```
+
+    If a list of GPU devices is returned, you've installed TensorFlow
+    successfully. **If not continue to the next step**.
+
+    ### 6. [GPU only] Virtual environment configuration
+
+    If the GPU test in the last section was unsuccessful, the most likely cause is that components aren't being detected,
+    and/or conflict with the existing system CUDA installation. So you need to add some symbolic links to fix this.
+
+    * Create symbolic links to NVIDIA shared libraries:
+    
+    ```bash
+    pushd $(dirname $(python -c 'print(__import__("tensorflow").__file__)'))
+    ln -svf ../nvidia/*/lib/*.so* .
+    popd
+    ```
+    
+    * Create a symbolic link to ptxas:
+
+    ```bash
+    ln -sf $(find $(dirname $(dirname $(python -c "import nvidia.cuda_nvcc;         
+    print(nvidia.cuda_nvcc.__file__)"))/*/bin/) -name ptxas -print -quit) $VIRTUAL_ENV/bin/ptxas
+    ```
+
+    Verify the GPU setup:
+
+    ```bash
+    python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))"
+    ```
+
+
+
+
+*   {MacOS}
+
+   ### 1. System requirements
+
+    *   macOS 10.12.6 (Sierra) or higher (64-bit)
+
+    Note: While TensorFlow supports Apple Silicon (M1), packages that include
+    custom C++ extensions for TensorFlow also need to be compiled for Apple M1.
+    Some packages, like
+    [tensorflow_decision_forests](https://www.tensorflow.org/decision_forests)
+    publish M1-compatible versions, but many packages don't. To use those
+    libraries, you will have to use TensorFlow with x86 emulation and Rosetta.
+
+    Currently there is no official GPU support for running TensorFlow on
+    MacOS. The following instructions are for running on CPU.
+
+    ### 2. Check Python version
+
+    Check if your Python environment is already configured:
+
+    Note: Requires Python 3.9–3.11, and pip >= 20.3 for MacOS.
+
+    ```bash
+    python3 --version
+    python3 -m pip --version
+    ```
+
+    ### 3. Install TensorFlow
+
+    TensorFlow requires a recent version of pip, so upgrade your pip
+    installation to be sure you're running the latest version.
+
+    ```bash
+    pip install --upgrade pip
+    ```
+
+    Then, install TensorFlow with pip.
+
+    ```bash
+    pip install tensorflow
+    ```
+
+    ### 4. Verify the installation
+
+    ```bash
+    python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))"
+    ```
+
+    If a tensor is returned, you've installed TensorFlow successfully.
+
+*   {Windows Native}
+
+   Caution: TensorFlow `2.10` was the **last** TensorFlow release that
+   supported GPU on native-Windows.
+   Starting with TensorFlow `2.11`, you will need to install
+   [TensorFlow in WSL2](https://tensorflow.org/install/pip#windows-[wsl2]),
+   or install `tensorflow-cpu` and, optionally, try the
+   [TensorFlow-DirectML-Plugin](https://github.com/microsoft/tensorflow-directml-plugin#tensorflow-directml-plugin-)
+
+   ## 1. System requirements
+
+   *   Windows 7 or higher (64-bit)
+
+    Note: Starting with TensorFlow `2.10`, Windows CPU-builds for x86/x64
+    processors are built, maintained, tested and released by a third party:
+    [Intel](https://www.intel.com/).
+    Installing the windows-native [`tensorflow`](https://pypi.org/project/tensorflow/)
+    or [`tensorflow-cpu`](https://pypi.org/project/tensorflow-cpu/)
+    package installs Intel's
+    [`tensorflow-intel`](https://pypi.org/project/tensorflow-intel/)
+    package. These packages are provided as-is. Tensorflow will use reasonable
+    efforts to maintain the availability and integrity of this pip package.
+    There may be delays if the third party fails to release the pip package. See
+    [this blog post](https://blog.tensorflow.org/2022/09/announcing-tensorflow-official-build-collaborators.html)
+    for more information about this
+    collaboration.
+
+    ### 2. Install Microsoft Visual C++ Redistributable
+
+    Install the *Microsoft Visual C++ Redistributable for Visual Studio 2015,
+    2017, and 2019*. Starting with the TensorFlow 2.1.0 version, the
+    `msvcp140_1.dll` file is required from this package (which may not be
+    provided from older redistributable packages). The redistributable comes
+    with *Visual Studio 2019* but can be installed separately:
+
+    1.  Go to the
+        [Microsoft Visual C++ downloads](https://support.microsoft.com/help/2977003/the-latest-supported-visual-c-downloads).
+    2.  Scroll down the page to the *Visual Studio 2015, 2017 and 2019* section.
+    3.  Download and install the *Microsoft Visual C++ Redistributable for
+        Visual Studio 2015, 2017 and 2019* for your platform.
+
+    Make sure
+    [long paths are enabled](https://superuser.com/questions/1119883/windows-10-enable-ntfs-long-paths-policy-option-missing)
+    on Windows.
+
+    ### 3. Install Miniconda
+
+    [Miniconda](https://docs.conda.io/en/latest/miniconda.html)
+    is the recommended approach for installing TensorFlow with GPU support.
+    It creates a separate environment to avoid changing any installed
+    software in your system. This is also the easiest way to install the
+    required software especially for the GPU setup.
+
+    Download the
+    [Miniconda Windows Installer](https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe).
+    Double-click the downloaded file and follow the instructions on the screen.
+
+    ### 4. Create a conda environment
+
+    Create a new conda environment named `tf` with the following command.
+
+    ```bash
+    conda create --name tf python=3.9
+    ```
+
+    You can deactivate and activate it with the following commands.
+
+    ```bash
+    conda deactivate
+    conda activate tf
+    ```
+
+    Make sure it is activated for the rest of the installation.
+
+    ### 5. GPU setup
+
+    You can skip this section if you only run TensorFlow on CPU.
+
+    First install
+    [NVIDIA GPU driver](https://www.nvidia.com/Download/index.aspx)
+    if you have not.
+
+    Then install the CUDA, cuDNN with conda.
+
+    ```bash
+    conda install -c conda-forge cudatoolkit=11.2 cudnn=8.1.0
+    ```
+
+    ### 6. Install TensorFlow
+
+    TensorFlow requires a recent version of pip, so upgrade your pip
+    installation to be sure you're running the latest version.
+
+    ```bash
+    pip install --upgrade pip
+    ```
+
+    Then, install TensorFlow with pip.
+
+    Note: Do not install TensorFlow with conda. It may not have the latest stable
+    version. pip is recommended since TensorFlow is only officially released to
+    PyPI.
+
+    ```bash
+    # Anything above 2.10 is not supported on the GPU on Windows Native
+    pip install "tensorflow<2.11" 
+    ```
+
+    ### 7. Verify the installation
+
+    Verify the CPU setup:
+
+    ```bash
+    python -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))"
+    ```
+
+    If a tensor is returned, you've installed TensorFlow successfully.
+
+    Verify the GPU setup:
+
+    ```bash
+    python -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))"
+    ```
+
+    If a list of GPU devices is returned, you've installed TensorFlow
+    successfully.
+
+*   {Windows WSL2}
+
+    ### 1. System requirements
+
+    *   Windows 10 19044 or higher (64-bit). This corresponds to Windows 10
+        version 21H2, the November 2021 update.
+   
+    See the following documents to:
+   
+    * [Download the latest Windows 10 update](https://www.microsoft.com/software-download/windows10).
+    * [Install WSL2](https://docs.microsoft.com/windows/wsl/install)
+    * [Setup NVIDIA® GPU support in WSL2](https://docs.nvidia.com/cuda/wsl-user-guide/index.html)
+
+    ### 2. GPU setup
+
+    You can skip this section if you only run TensorFlow on the CPU.
+
+    Install the
+    [NVIDIA GPU driver](https://www.nvidia.com/Download/index.aspx)
+    if you have not. You can use the following command to verify it is
+    installed.
+
+    ```bash
+    nvidia-smi
+    ```
+
+    ### 3. Install TensorFlow
+
+    TensorFlow requires a recent version of pip, so upgrade your pip
+    installation to be sure you're running the latest version.
+
+    ```bash
+    pip install --upgrade pip
+    ```
+
+    Then, install TensorFlow with pip.
+
+    ```bash
+    # For GPU users
+    pip install tensorflow[and-cuda]
+    # For CPU users
+    pip install tensorflow
+    ```
+
+    ### 4. Verify the installation
+
+    Verify the CPU setup:
+
+    ```bash
+    python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))"
+    ```
+
+    If a tensor is returned, you've installed TensorFlow successfully.
+
+    Verify the GPU setup:
+
+    ```bash
+    python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))"
+    ```
+
+    If a list of GPU devices is returned, you've installed TensorFlow
+    successfully.
+
+
+## Package location
+
+A few installation mechanisms require the URL of the TensorFlow Python package.
+The value you specify depends on your Python version.
+
+<table>
+  <tr><th>Version</th><th>URL</th></tr>
+  <tr class="alt"><td colspan="2">Linux x86</td></tr>
+  <tr>
+    <td>Python 3.9 GPU&nbsp;support</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl</td>
+  </tr>
+  <tr>
+    <td>Python 3.9 CPU-only</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl</td>
+  </tr>
+  <tr>
+    <td>Python 3.10 GPU&nbsp;support</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl</td>
+  </tr>
+  <tr>
+    <td>Python 3.10 CPU-only</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl</td>
+  </tr>
+  <tr>
+    <td>Python 3.11 GPU&nbsp;support</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl</td>
+  </tr>
+  <tr>
+    <td>Python 3.11 CPU-only</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl</td>
+  </tr>
+  <tr>
+    <td>Python 3.12 GPU&nbsp;support</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl</td>
+  </tr>
+  <tr>
+    <td>Python 3.12 CPU-only</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl</td>
+  </tr>
+  <tr>
+    <td>Python 3.13 GPU&nbsp;support</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl</td>
+  </tr>
+  <tr>
+    <td>Python 3.13 CPU-only</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl</td>
+  </tr>
+
+  <tr class="alt"><td colspan="2">Linux Arm64 (CPU-only)</td></tr>
+  <tr>
+    <td>Python 3.9</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl</td>
+  </tr>
+  <tr>
+    <td>Python 3.10</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl</td>
+  </tr>
+  <tr>
+    <td>Python 3.11</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl</td>
+  </tr>
+  <tr>
+    <td>Python 3.12</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl</td>
+  </tr>
+  <tr>
+    <td>Python 3.13</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl</td>
+  </tr>
+
+  <tr class="alt"><td colspan="2">macOS x86 (CPU-only)</td></tr>
+  <tr class="alt"><td colspan="2"><b>Caution</b>: TensorFlow 2.16 was the <b>last</b> TensorFlow release that supported macOS x86</td></tr>
+  <tr>
+    <td>Python 3.9</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.16.2/tensorflow-2.16.2-cp39-cp39-macosx_10_15_x86_64.whl</td>
+  </tr>
+   <tr>
+    <td>Python 3.10</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.16.2/tensorflow-2.16.2-cp310-cp310-macosx_10_15_x86_64.whl</td>
+  </tr>
+  <tr>
+    <td>Python 3.11</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.16.2/tensorflow-2.16.2-cp311-cp311-macosx_10_15_x86_64.whl</td>
+  </tr>
+  <tr>
+    <td>Python 3.12</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.16.2/tensorflow-2.16.2-cp312-cp312-macosx_10_15_x86_64.whl</td>
+  </tr>
+
+  <tr class="alt"><td colspan="2">macOS Arm64 (CPU-only)</td></tr>
+  <tr>
+    <td>Python 3.9</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp39-cp39-macosx_12_0_arm64.whl</td>
+  </tr>
+  <tr>
+    <td>Python 3.10</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp310-cp310-macosx_12_0_arm64.whl</td>
+  </tr>
+  <tr>
+    <td>Python 3.11</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp311-cp311-macosx_12_0_arm64.whl</td>
+  </tr>
+  <tr>
+    <td>Python 3.12</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp312-cp312-macosx_12_0_arm64.whl</td>
+  </tr>
+  <tr>
+    <td>Python 3.13</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp313-cp313-macosx_12_0_arm64.whl</td>
+  </tr>
+
+  <tr class="alt"><td colspan="2">Windows (CPU-only)</td></tr>
+  <tr>
+    <td>Python 3.9</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp39-cp39-win_amd64.whl</td>
+  </tr>
+  <tr>
+    <td>Python 3.10</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp310-cp310-win_amd64.whl</td>
+  </tr>
+  <tr>
+    <td>Python 3.11</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp311-cp311-win_amd64.whl</td>
+  </tr>
+  <tr>
+    <td>Python 3.12</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp312-cp312-win_amd64.whl</td>
+  </tr>
+  <tr>
+    <td>Python 3.13</td>
+    <td class="devsite-click-to-copy">https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp313-cp313-win_amd64.whl</td>
+  </tr>
+
+</table>
diff --git a/site/en/install/source.md b/site/en/install/source.md
index b2f8ee8cb0b..dc847f017e9 100644
--- a/site/en/install/source.md
+++ b/site/en/install/source.md
@@ -4,8 +4,8 @@ Build a TensorFlow *pip* package from source and install it on Ubuntu Linux and
 macOS. While the instructions might work for other systems, it is only tested
 and supported for Ubuntu and macOS.
 
-Note: We already provide well-tested, pre-built
-[TensorFlow packages](./pip.html) for Linux and macOS systems.
+Note: Well-tested, pre-built [TensorFlow packages](./pip.md) for Linux and macOS
+systems are already provided.
 
 ## Setup for Linux and macOS
 
@@ -25,9 +25,6 @@ Install the following build tools to configure your development environment.
 <p>Requires Xcode 9.2 or later.</p>
 <p>Install using the <a href="https://brew.sh/" class="external">Homebrew</a> package manager:</p>
 <pre class="prettyprint lang-bsh">
-<code class="devsite-terminal">/usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"</code>
-<code class="devsite-terminal">export PATH="/usr/local/opt/python/libexec/bin:$PATH"</code>
-<code class="devsite-terminal"># if you are on macOS 10.12 (Sierra) use `export PATH="/usr/local/bin:/usr/local/sbin:$PATH"`</code>
 <code class="devsite-terminal">brew install python</code>
 </pre>
 </section>
@@ -37,13 +34,12 @@ Install the TensorFlow *pip* package dependencies (if using a virtual
 environment, omit the `--user` argument):
 
 <pre class="prettyprint lang-bsh">
-<code class="devsite-terminal">pip install -U --user pip numpy wheel</code>
-<code class="devsite-terminal">pip install -U --user keras_preprocessing --no-deps</code>
+<code class="devsite-terminal">pip install -U --user pip</code>
 </pre>
 
 Note: A `pip` version >19.0 is required to install the TensorFlow 2 `.whl`
 package. Additional required dependencies are listed in the
-<a href="https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/pip_package/setup.py" class="external"><code>setup.py</code></a>
+<a href="https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/pip_package/setup.py.tpl" class="external"><code>setup.py.tpl</code></a>
 file under `REQUIRED_PACKAGES`.
 
 ### Install Bazel
@@ -54,32 +50,83 @@ Bazel and automatically downloads the correct Bazel version for TensorFlow. For
 ease of use, add Bazelisk as the `bazel` executable in your `PATH`.
 
 If Bazelisk is not available, you can manually
-[install Bazel](https://docs.bazel.build/versions/master/install.html). Make
-sure to install a supported Bazel version: any version between
-`_TF_MIN_BAZEL_VERSION` and `_TF_MAX_BAZEL_VERSION` as specified in
-`tensorflow/configure.py`.
+[install Bazel](https://bazel.build/install). Make
+sure to install the correct Bazel version from TensorFlow's
+[.bazelversion](https://github.com/tensorflow/tensorflow/blob/master/.bazelversion)
+file.
+
+### Install Clang (recommended, Linux only)
+
+Clang is a C/C++/Objective-C compiler that is compiled in C++ based on LLVM. It
+is the default compiler to build TensorFlow starting with TensorFlow 2.13. The
+current supported version is LLVM/Clang 17.
+
+[LLVM Debian/Ubuntu nightly packages](https://apt.llvm.org) provide an automatic
+installation script and packages for manual installation on Linux. Make sure you
+run the following command if you manually add llvm apt repository to your
+package sources:
+
+<pre class="prettyprint lang-bsh">
+<code class="devsite-terminal">sudo apt-get update && sudo apt-get install -y llvm-17 clang-17</code>
+</pre>
+
+Now that `/usr/lib/llvm-17/bin/clang` is the actual path to clang in this case.
+
+Alternatively, you can download and unpack the pre-built
+[Clang + LLVM 17](https://github.com/llvm/llvm-project/releases/tag/llvmorg-17.0.2).
+
+Below is an example of steps you can take to set up the downloaded Clang + LLVM
+17 binaries on Debian/Ubuntu operating systems:
+
+1.  Change to the desired destination directory: `cd <desired directory>`
+
+1.  Load and extract an archive file...(suitable to your architecture):
+    <pre class="prettyprint lang-bsh">
+    <code class="devsite-terminal">wget https://github.com/llvm/llvm-project/releases/download/llvmorg-17.0.2/clang+llvm-17.0.2-x86_64-linux-gnu-ubuntu-22.04.tar.xz
+    </code>
+    <code class="devsite-terminal">tar -xvf clang+llvm-17.0.2-x86_64-linux-gnu-ubuntu-22.04.tar.xz
+    </code>
+    </pre>
+
+1.  Copy the extracted contents (directories and files) to `/usr` (you may need
+    sudo permissions, and the correct directory may vary by distribution). This
+    effectively installs Clang and LLVM, and adds it to the path. You should not
+    have to replace anything, unless you have a previous installation, in which
+    case you should replace the files:
+    <pre class="prettyprint lang-bsh">
+    <code class="devsite-terminal">cp -r clang+llvm-17.0.2-x86_64-linux-gnu-ubuntu-22.04/* /usr</code>
+    </pre>
+
+1.  Check the obtained Clang + LLVM 17 binaries version:
+    <pre class="prettyprint lang-bsh">
+    <code class="devsite-terminal">clang --version</code>
+    </pre>
+
+1.  Now that `/usr/bin/clang` is the actual path to your new clang. You can run
+    the `./configure` script or manually set environment variables `CC` and
+    `BAZEL_COMPILER` to this path.
 
 ### Install GPU support (optional, Linux only)
 
 There is *no* GPU support for macOS.
 
-Read the [GPU support](./gpu.md) guide to install the drivers and additional
+Read the [GPU support](./pip.md) guide to install the drivers and additional
 software required to run TensorFlow on a GPU.
 
 Note: It is easier to set up one of TensorFlow's GPU-enabled [Docker images](#docker_linux_builds).
 
 ### Download the TensorFlow source code
 
-Use [Git](https://git-scm.com/){:.external} to clone the
-[TensorFlow repository](https://github.com/tensorflow/tensorflow){:.external}:
+Use [Git](https://git-scm.com/) to clone the
+[TensorFlow repository](https://github.com/tensorflow/tensorflow):
 
 <pre class="devsite-click-to-copy">
 <code class="devsite-terminal">git clone https://github.com/tensorflow/tensorflow.git</code>
 <code class="devsite-terminal">cd tensorflow</code>
 </pre>
 
-The repo defaults to the `master` development branch. You can also checkout a
-[release branch](https://github.com/tensorflow/tensorflow/releases){:.external}
+The repo defaults to the `master` development branch. You can also check out a
+[release branch](https://github.com/tensorflow/tensorflow/releases)
 to build:
 
 <pre class="devsite-terminal prettyprint lang-bsh">
@@ -89,16 +136,21 @@ git checkout <em>branch_name</em>  # r2.2, r2.3, etc.
 
 ## Configure the build
 
-Configure your system build by running the `./configure` at the root of your
-TensorFlow source tree. This script prompts you for the location of TensorFlow
-dependencies and asks for additional build configuration options (compiler
-flags, for example).
+TensorFlow builds are configured by the `.bazelrc` file in the repository's
+root directory. The `./configure` or `./configure.py` scripts can be used to
+adjust common settings.
+
+Please run the `./configure` script from the repository's root directory. This
+script will prompt you for the location of TensorFlow dependencies and asks for
+additional build configuration options (compiler flags, for example). Refer to
+the _Sample session_ section for details.
 
 <pre class="devsite-terminal devsite-click-to-copy">
 ./configure
 </pre>
 
-If using a virtual environment, `python configure.py` prioritizes paths
+There is also a python version of this script, `./configure.py`. If using a
+virtual environment, `python configure.py` prioritizes paths
 within the environment, whereas `./configure` prioritizes paths outside
 the environment. In both cases you can change the default.
 
@@ -111,65 +163,47 @@ session may differ):
 <h4 class="showalways">View sample configuration session</h4>
 <pre class="devsite-terminal">
 ./configure
-You have bazel 3.0.0 installed.
-Please specify the location of python. [Default is /usr/bin/python3]: 
+You have bazel 6.1.0 installed.
+Please specify the location of python. [Default is /Library/Frameworks/Python.framework/Versions/3.9/bin/python3]: 
 
 
 Found possible Python library paths:
-  /usr/lib/python3/dist-packages
-  /usr/local/lib/python3.6/dist-packages
-Please input the desired Python library path to use.  Default is [/usr/lib/python3/dist-packages]
-
-Do you wish to build TensorFlow with OpenCL SYCL support? [y/N]: 
-No OpenCL SYCL support will be enabled for TensorFlow.
+  /Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages
+Please input the desired Python library path to use.  Default is [/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages]
 
-Do you wish to build TensorFlow with ROCm support? [y/N]: 
+Do you wish to build TensorFlow with ROCm support? [y/N]:
 No ROCm support will be enabled for TensorFlow.
 
-Do you wish to build TensorFlow with CUDA support? [y/N]: Y
-CUDA support will be enabled for TensorFlow.
-
-Do you wish to build TensorFlow with TensorRT support? [y/N]: 
-No TensorRT support will be enabled for TensorFlow.
-
-Found CUDA 10.1 in:
-    /usr/local/cuda-10.1/targets/x86_64-linux/lib
-    /usr/local/cuda-10.1/targets/x86_64-linux/include
-Found cuDNN 7 in:
-    /usr/lib/x86_64-linux-gnu
-    /usr/include
-
-
-Please specify a list of comma-separated CUDA compute capabilities you want to build with.
-You can find the compute capability of your device at: https://developer.nvidia.com/cuda-gpus. Each capability can be specified as "x.y" or "compute_xy" to include both virtual and binary GPU code, or as "sm_xy" to only include the binary code.
-Please note that each additional compute capability significantly increases your build time and binary size, and that TensorFlow only supports compute capabilities >= 3.5 [Default is: 3.5,7.0]: 6.1
-
+Do you wish to build TensorFlow with CUDA support? [y/N]:
+No CUDA support will be enabled for TensorFlow.
 
-Do you want to use clang as CUDA compiler? [y/N]: 
-nvcc will be used as CUDA compiler.
+Do you want to use Clang to build TensorFlow? [Y/n]:
+Clang will be used to compile TensorFlow.
 
-Please specify which gcc should be used by nvcc as the host compiler. [Default is /usr/bin/gcc]: 
+Please specify the path to clang executable. [Default is /usr/lib/llvm-16/bin/clang]:
 
+You have Clang 16.0.4 installed.
 
-Please specify optimization flags to use during compilation when bazel option "--config=opt" is specified [Default is -march=native -Wno-sign-compare]: 
+Please specify optimization flags to use during compilation when bazel option "--config=opt" is specified [Default is -Wno-sign-compare]:
 
 
-Would you like to interactively configure ./WORKSPACE for Android builds? [y/N]: 
+Would you like to interactively configure ./WORKSPACE for Android builds? [y/N]: n
 Not configuring the WORKSPACE for Android builds.
 
+Do you wish to build TensorFlow with iOS support? [y/N]: n
+No iOS support will be enabled for TensorFlow.
+
 Preconfigured Bazel build configs. You can use any of the below by adding "--config=<>" to your build command. See .bazelrc for more details.
 	--config=mkl         	# Build with MKL support.
+	--config=mkl_aarch64 	# Build with oneDNN and Compute Library for the Arm Architecture (ACL).
 	--config=monolithic  	# Config for mostly static monolithic build.
-	--config=ngraph      	# Build with Intel nGraph support.
 	--config=numa        	# Build with NUMA support.
 	--config=dynamic_kernels	# (Experimental) Build kernels into separate shared objects.
-	--config=v2          	# Build TensorFlow 2.x instead of 1.x.
+	--config=v1          	# Build with TensorFlow 1 API instead of TF 2 API.
 Preconfigured Bazel build configs to DISABLE default on features:
-	--config=noaws       	# Disable AWS S3 filesystem support.
 	--config=nogcp       	# Disable GCP support.
-	--config=nohdfs      	# Disable HDFS support.
 	--config=nonccl      	# Disable NVIDIA NCCL support.
-Configuration finished
+
 </pre>
 </section>
 
@@ -177,7 +211,14 @@ Configuration finished
 
 #### GPU support
 
-For [GPU support](./gpu.md), set `cuda=Y` during configuration and specify the
+##### from v.2.18.0
+For [GPU support](./pip.md), set `cuda=Y` during configuration and specify the
+versions of CUDA and cuDNN if required. Bazel will download CUDA and CUDNN
+packages automatically or point to CUDA/CUDNN/NCCL redistributions on local file
+system if required.
+
+##### before v.2.18.0
+For [GPU support](./pip.md), set `cuda=Y` during configuration and specify the
 versions of CUDA and cuDNN. If your system has multiple versions of CUDA or
 cuDNN installed, explicitly set the version instead of relying on the default.
 `./configure` creates symbolic links to your system's CUDA libraries—so if you
@@ -188,8 +229,8 @@ building.
 
 For compilation optimization flags, the default (`-march=native`) optimizes the
 generated code for your machine's CPU type. However, if building TensorFlow for
-a different CPU type, consider a more specific optimization flag. See the
-[GCC manual](https://gcc.gnu.org/onlinedocs/gcc-4.5.3/gcc/i386-and-x86_002d64-Options.html){:.external}
+a different CPU type, consider a more specific optimization flag. Check the
+[GCC manual](https://gcc.gnu.org/onlinedocs/gcc-4.5.3/gcc/i386-and-x86_002d64-Options.html)
 for examples.
 
 #### Preconfigured configurations
@@ -201,81 +242,55 @@ There are some preconfigured build configs available that can be added to the
     [CONTRIBUTING.md](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md)
     for details.
 *   `--config=mkl` —Support for the
-    [Intel® MKL-DNN](https://github.com/intel/mkl-dnn){:.external}.
+    [Intel® MKL-DNN](https://github.com/intel/mkl-dnn).
 *   `--config=monolithic` —Configuration for a mostly static, monolithic build.
-*   `--config=v1` —Build TensorFlow 1.x instead of 2.x.
-
-Note: Starting with TensorFlow 1.6, binaries use AVX instructions which may not
-run on older CPUs.
-
-
-## Build the pip package
 
-### TensorFlow 2.x
 
-[Install Bazel](https://docs.bazel.build/versions/master/install.html) and use
-`bazel build` to create the TensorFlow 2.x package with *CPU-only* support:
+## Build and install the pip package
 
-<pre class="devsite-terminal devsite-click-to-copy">
-bazel build [--config=option] //tensorflow/tools/pip_package:build_pip_package
-</pre>
-
-Note: GPU support can be enabled with `cuda=Y` during the `./configure` stage.
-
-### GPU support
-
-To build a TensorFlow package builder with GPU support:
-
-<pre class="devsite-terminal devsite-click-to-copy">
-bazel build --config=cuda [--config=option] //tensorflow/tools/pip_package:build_pip_package
-</pre>
-
-### TensorFlow 1.x
-
-To build an older TensorFlow 1.x package, use the `--config=v1` option:
-
-<pre class="devsite-terminal devsite-click-to-copy">
-bazel build --config=v1 [--config=option] //tensorflow/tools/pip_package:build_pip_package
-</pre>
+#### Bazel build options
 
-### Bazel build options
-
-See the Bazel [command-line reference](https://docs.bazel.build/versions/master/command-line-reference.html)
+Refer to the Bazel
+[command-line reference](https://bazel.build/reference/command-line-reference)
 for
-[build options](https://docs.bazel.build/versions/master/command-line-reference.html#build-options).
+[build options](https://bazel.build/reference/command-line-reference#build-options).
 
 Building TensorFlow from source can use a lot of RAM. If your system is
 memory-constrained, limit Bazel's RAM usage with: `--local_ram_resources=2048`.
 
-The [official TensorFlow packages](./pip.html) are built with a GCC 7.3
-toolchain that complies with the manylinux2010 package standard.
-
-For GCC 5 and later, compatibility with the older ABI can be built using:
-`--cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0"`. ABI compatibility ensures that custom
-ops built against the official TensorFlow package continue to work with the
-GCC 5 built package.
+The [official TensorFlow packages](./pip.md) are built with a Clang toolchain
+that complies with the manylinux2014 package standard.
 
 ### Build the package
 
-The `bazel build` command creates an executable named `build_pip_package`—this
-is the program that builds the `pip` package. Run the executable as shown
-below to build a `.whl` package in the `/tmp/tensorflow_pkg` directory.
+To build pip package, you need to specify `--repo_env=WHEEL_NAME` flag.
+depending on the provided name, package will be created, e.g:
 
-To build from a release branch:
+To build tensorflow CPU package:
+<pre class="devsite-terminal devsite-click-to-copy">
+bazel build //tensorflow/tools/pip_package:wheel --repo_env=USE_PYWRAP_RULES=1 --repo_env=WHEEL_NAME=tensorflow_cpu
+</pre>
 
+To build tensorflow GPU package:
 <pre class="devsite-terminal devsite-click-to-copy">
-./bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
+bazel build //tensorflow/tools/pip_package:wheel --repo_env=USE_PYWRAP_RULES=1 --repo_env=WHEEL_NAME=tensorflow --config=cuda --config=cuda_wheel
 </pre>
 
-To build from master, use `--nightly_flag` to get the right dependencies:
+To build tensorflow TPU package:
+<pre class="devsite-terminal devsite-click-to-copy">
+bazel build //tensorflow/tools/pip_package:wheel --repo_env=USE_PYWRAP_RULES=1 --repo_env=WHEEL_NAME=tensorflow_tpu --config=tpu
+</pre>
 
+To build nightly package, set `tf_nightly` instead of `tensorflow`, e.g.
+to build CPU nightly package:
 <pre class="devsite-terminal devsite-click-to-copy">
-./bazel-bin/tensorflow/tools/pip_package/build_pip_package --nightly_flag /tmp/tensorflow_pkg
+bazel build //tensorflow/tools/pip_package:wheel --repo_env=USE_PYWRAP_RULES=1 --repo_env=WHEEL_NAME=tf_nightly_cpu
 </pre>
 
-Although it is possible to build both CUDA and non-CUDA configurations under the
-same source tree, it's recommended to run `bazel clean` when switching between
-these two configurations in the same source tree.
+As a result, generated wheel will be located in
+<pre class="devsite-terminal devsite-click-to-copy">
+bazel-bin/tensorflow/tools/pip_package/wheel_house/
+</pre>
 
 ### Install the package
 
@@ -283,7 +298,7 @@ The filename of the generated `.whl` file depends on the TensorFlow version and
 your platform. Use `pip install` to install the package, for example:
 
 <pre class="devsite-terminal prettyprint lang-bsh">
-pip install /tmp/tensorflow_pkg/tensorflow-<var>version</var>-<var>tags</var>.whl
+pip install bazel-bin/tensorflow/tools/pip_package/wheel_house/tensorflow-<var>version</var>-<var>tags</var>.whl
 </pre>
 
 Success: TensorFlow is now installed.
@@ -293,17 +308,17 @@ Success: TensorFlow is now installed.
 
 TensorFlow's Docker development images are an easy way to set up an environment
 to build Linux packages from source. These images already contain the source
-code and dependencies required to build TensorFlow. See the TensorFlow
-[Docker guide](./docker.md) for installation and the
-[list of available image tags](https://hub.docker.com/r/tensorflow/tensorflow/tags/){:.external}.
+code and dependencies required to build TensorFlow. Go to the TensorFlow
+[Docker guide](./docker.md) for installation instructions and the
+[list of available image tags](https://hub.docker.com/r/tensorflow/tensorflow/tags/).
 
 ### CPU-only
 
 The following example uses the `:devel` image to build a CPU-only package from
-the latest TensorFlow source code. See the [Docker guide](./docker.md) for
+the latest TensorFlow source code. Check the [Docker guide](./docker.md) for
 available TensorFlow `-devel` tags.
 
-Download the latest development image and start a Docker container that we'll
+Download the latest development image and start a Docker container that you'll
 use to build the *pip* package:
 
 <pre class="prettyprint lang-bsh">
@@ -331,20 +346,20 @@ docker run -it -w /tensorflow -v <var>/path/to/tensorflow</var>:/tensorflow -v $
 With the source tree set up, build the TensorFlow package within the container's
 virtual environment:
 
-1.  Configure the build—this prompts the user to answer build configuration
-    questions.
-2.  Build the tool used to create the *pip* package.
-3.  Run the tool to create the *pip* package.
-4.  Adjust the ownership permissions of the file for outside the container.
+1.  Optional: Configure the build—this prompts the user to answer build
+    configuration questions.
+2.  Build the *pip* package.
+3.  Adjust the ownership permissions of the file for outside the container.
 
 <pre class="devsite-disable-click-to-copy prettyprint lang-bsh">
-<code class="devsite-terminal tfo-terminal-root">./configure  # answer prompts or use defaults</code>
-
-<code class="devsite-terminal tfo-terminal-root">bazel build --config=opt //tensorflow/tools/pip_package:build_pip_package</code>
-
-<code class="devsite-terminal tfo-terminal-root">./bazel-bin/tensorflow/tools/pip_package/build_pip_package /mnt  # create package</code>
-
-<code class="devsite-terminal tfo-terminal-root">chown $HOST_PERMS /mnt/tensorflow-<var>version</var>-<var>tags</var>.whl</code>
+<code class="devsite-terminal tfo-terminal-root">./configure  # if necessary</code>
+
+<code class="devsite-terminal tfo-terminal-root">
+bazel build //tensorflow/tools/pip_package:wheel \
+--repo_env=USE_PYWRAP_RULES=1 --repo_env=WHEEL_NAME=tensorflow_cpu --config=opt
+</code>
+`
+<code class="devsite-terminal tfo-terminal-root">chown $HOST_PERMS bazel-bin/tensorflow/tools/pip_package/wheel_house/tensorflow-<var>version</var>-<var>tags</var>.whl</code>
 </pre>
 
 Install and verify the package within the container:
@@ -352,7 +367,7 @@ Install and verify the package within the container:
 <pre class="prettyprint lang-bsh">
 <code class="devsite-terminal tfo-terminal-root">pip uninstall tensorflow  # remove current version</code>
 
-<code class="devsite-terminal tfo-terminal-root">pip install /mnt/tensorflow-<var>version</var>-<var>tags</var>.whl</code>
+<code class="devsite-terminal tfo-terminal-root">pip install bazel-bin/tensorflow/tools/pip_package/wheel_house/tensorflow-<var>version</var>-<var>tags</var>.whl</code>
 <code class="devsite-terminal tfo-terminal-root">cd /tmp  # don't import from source directory</code>
 <code class="devsite-terminal tfo-terminal-root">python -c "import tensorflow as tf; print(tf.__version__)"</code>
 </pre>
@@ -365,12 +380,15 @@ On your host machine, the TensorFlow *pip* package is in the current directory
 
 ### GPU support
 
+Note: Starting from Tensorflow v.2.18.0 the wheels can be built from
+source on a machine without GPUs and without NVIDIA driver installed.
+
 Docker is the easiest way to build GPU support for TensorFlow since the *host*
 machine only requires the
-[NVIDIA®&nbsp;driver](https://github.com/NVIDIA/nvidia-docker/wiki/Frequently-Asked-Questions#how-do-i-install-the-nvidia-driver){:.external}
-(the *NVIDIA® CUDA® Toolkit* doesn't have to be installed). See the
-[GPU support guide](./gpu.md) and the TensorFlow [Docker guide](./docker.md) to
-set up [nvidia-docker](https://github.com/NVIDIA/nvidia-docker){:.external}
+[NVIDIA®&nbsp;driver](https://github.com/NVIDIA/nvidia-docker/wiki/Frequently-Asked-Questions#how-do-i-install-the-nvidia-driver)
+(the *NVIDIA® CUDA® Toolkit* doesn't have to be installed). Refer to the
+[GPU support guide](./pip.md) and the TensorFlow [Docker guide](./docker.md) to
+set up [nvidia-docker](https://github.com/NVIDIA/nvidia-docker)
 (Linux only).
 
 The following example downloads the TensorFlow `:devel-gpu` image and uses
@@ -388,13 +406,15 @@ Then, within the container's virtual environment, build the TensorFlow package
 with GPU support:
 
 <pre class="devsite-disable-click-to-copy prettyprint lang-bsh">
-<code class="devsite-terminal tfo-terminal-root">./configure  # answer prompts or use defaults</code>
-
-<code class="devsite-terminal tfo-terminal-root">bazel build --config=opt --config=cuda //tensorflow/tools/pip_package:build_pip_package</code>
+<code class="devsite-terminal tfo-terminal-root">./configure  # if necessary</code>
 
-<code class="devsite-terminal tfo-terminal-root">./bazel-bin/tensorflow/tools/pip_package/build_pip_package /mnt  # create package</code>
+<code class="devsite-terminal tfo-terminal-root">
+bazel build //tensorflow/tools/pip_package:wheel \
+--repo_env=USE_PYWRAP_RULES=1 --repo_env=WHEEL_NAME=tensorflow --config=cuda \
+--config=cuda_wheel --config=opt
+</code>
 
-<code class="devsite-terminal tfo-terminal-root">chown $HOST_PERMS /mnt/tensorflow-<var>version</var>-<var>tags</var>.whl</code>
+<code class="devsite-terminal tfo-terminal-root">chown $HOST_PERMS bazel-bin/tensorflow/tools/pip_package/wheel_house/tensorflow-<var>version</var>-<var>tags</var>.whl</code>
 </pre>
 
 Install and verify the package within the container and check for a GPU:
@@ -402,7 +422,7 @@ Install and verify the package within the container and check for a GPU:
 <pre class="prettyprint lang-bsh">
 <code class="devsite-terminal tfo-terminal-root">pip uninstall tensorflow  # remove current version</code>
 
-<code class="devsite-terminal tfo-terminal-root">pip install /mnt/tensorflow-<var>version</var>-<var>tags</var>.whl</code>
+<code class="devsite-terminal tfo-terminal-root">pip install bazel-bin/tensorflow/tools/pip_package/wheel_house/tensorflow-<var>version</var>-<var>tags</var>.whl</code>
 <code class="devsite-terminal tfo-terminal-root">cd /tmp  # don't import from source directory</code>
 <code class="devsite-terminal tfo-terminal-root">python -c "import tensorflow as tf; print(\"Num GPUs Available: \", len(tf.config.list_physical_devices('GPU')))"</code>
 </pre>
@@ -419,6 +439,20 @@ Success: TensorFlow is now installed.
 
 <table>
 <tr><th>Version</th><th>Python version</th><th>Compiler</th><th>Build tools</th></tr>
+<tr><td>tensorflow-2.20.0</td><td>3.9-3.13</td><td>Clang 18.1.8</td><td>Bazel 7.4.1</td></tr>
+<tr><td>tensorflow-2.19.0</td><td>3.9-3.12</td><td>Clang 18.1.8</td><td>Bazel 6.5.0</td></tr>
+<tr><td>tensorflow-2.18.0</td><td>3.9-3.12</td><td>Clang 17.0.6</td><td>Bazel 6.5.0</td></tr>
+<tr><td>tensorflow-2.17.0</td><td>3.9-3.12</td><td>Clang 17.0.6</td><td>Bazel 6.5.0</td></tr>
+<tr><td>tensorflow-2.16.1</td><td>3.9-3.12</td><td>Clang 17.0.6</td><td>Bazel 6.5.0</td></tr>
+<tr><td>tensorflow-2.15.0</td><td>3.9-3.11</td><td>Clang 16.0.0</td><td>Bazel 6.1.0</td></tr>
+<tr><td>tensorflow-2.14.0</td><td>3.9-3.11</td><td>Clang 16.0.0</td><td>Bazel 6.1.0</td></tr>
+<tr><td>tensorflow-2.13.0</td><td>3.8-3.11</td><td>Clang 16.0.0</td><td>Bazel 5.3.0</td></tr>
+<tr><td>tensorflow-2.12.0</td><td>3.8-3.11</td><td>GCC 9.3.1</td><td>Bazel 5.3.0</td></tr>
+<tr><td>tensorflow-2.11.0</td><td>3.7-3.10</td><td>GCC 9.3.1</td><td>Bazel 5.3.0</td></tr>
+<tr><td>tensorflow-2.10.0</td><td>3.7-3.10</td><td>GCC 9.3.1</td><td>Bazel 5.1.1</td></tr>
+<tr><td>tensorflow-2.9.0</td><td>3.7-3.10</td><td>GCC 9.3.1</td><td>Bazel 5.0.0</td></tr>
+<tr><td>tensorflow-2.8.0</td><td>3.7-3.10</td><td>GCC 7.3.1</td><td>Bazel 4.2.1</td></tr>
+<tr><td>tensorflow-2.7.0</td><td>3.7-3.9</td><td>GCC 7.3.1</td><td>Bazel 3.7.2</td></tr>
 <tr><td>tensorflow-2.6.0</td><td>3.6-3.9</td><td>GCC 7.3.1</td><td>Bazel 3.7.2</td></tr>
 <tr><td>tensorflow-2.5.0</td><td>3.6-3.9</td><td>GCC 7.3.1</td><td>Bazel 3.7.2</td></tr>
 <tr><td>tensorflow-2.4.0</td><td>3.6-3.8</td><td>GCC 7.3.1</td><td>Bazel 3.1.0</td></tr>
@@ -448,6 +482,20 @@ Success: TensorFlow is now installed.
 
 <table>
 <tr><th>Version</th><th>Python version</th><th>Compiler</th><th>Build tools</th><th>cuDNN</th><th>CUDA</th></tr>
+<tr><td>tensorflow-2.20.0</td><td>3.9-3.13</td><td>Clang 18.1.8</td><td>Bazel 7.4.1</td><td>9.3</td><td>12.5</td></tr>
+<tr><td>tensorflow-2.19.0</td><td>3.9-3.12</td><td>Clang 18.1.8</td><td>Bazel 6.5.0</td><td>9.3</td><td>12.5</td></tr>
+<tr><td>tensorflow-2.18.0</td><td>3.9-3.12</td><td>Clang 17.0.6</td><td>Bazel 6.5.0</td><td>9.3</td><td>12.5</td></tr>
+<tr><td>tensorflow-2.17.0</td><td>3.9-3.12</td><td>Clang 17.0.6</td><td>Bazel 6.5.0</td><td>8.9</td><td>12.3</td></tr>
+<tr><td>tensorflow-2.16.1</td><td>3.9-3.12</td><td>Clang 17.0.6</td><td>Bazel 6.5.0</td><td>8.9</td><td>12.3</td></tr>
+<tr><td>tensorflow-2.15.0</td><td>3.9-3.11</td><td>Clang 16.0.0</td><td>Bazel 6.1.0</td><td>8.9</td><td>12.2</td></tr>
+<tr><td>tensorflow-2.14.0</td><td>3.9-3.11</td><td>Clang 16.0.0</td><td>Bazel 6.1.0</td><td>8.7</td><td>11.8</td></tr>
+<tr><td>tensorflow-2.13.0</td><td>3.8-3.11</td><td>Clang 16.0.0</td><td>Bazel 5.3.0</td><td>8.6</td><td>11.8</td></tr>
+<tr><td>tensorflow-2.12.0</td><td>3.8-3.11</td><td>GCC 9.3.1</td><td>Bazel 5.3.0</td><td>8.6</td><td>11.8</td></tr>
+<tr><td>tensorflow-2.11.0</td><td>3.7-3.10</td><td>GCC 9.3.1</td><td>Bazel 5.3.0</td><td>8.1</td><td>11.2</td></tr>
+<tr><td>tensorflow-2.10.0</td><td>3.7-3.10</td><td>GCC 9.3.1</td><td>Bazel 5.1.1</td><td>8.1</td><td>11.2</td></tr>
+<tr><td>tensorflow-2.9.0</td><td>3.7-3.10</td><td>GCC 9.3.1</td><td>Bazel 5.0.0</td><td>8.1</td><td>11.2</td></tr>
+<tr><td>tensorflow-2.8.0</td><td>3.7-3.10</td><td>GCC 7.3.1</td><td>Bazel 4.2.1</td><td>8.1</td><td>11.2</td></tr>
+<tr><td>tensorflow-2.7.0</td><td>3.7-3.9</td><td>GCC 7.3.1</td><td>Bazel 3.7.2</td><td>8.1</td><td>11.2</td></tr>
 <tr><td>tensorflow-2.6.0</td><td>3.6-3.9</td><td>GCC 7.3.1</td><td>Bazel 3.7.2</td><td>8.1</td><td>11.2</td></tr>
 <tr><td>tensorflow-2.5.0</td><td>3.6-3.9</td><td>GCC 7.3.1</td><td>Bazel 3.7.2</td><td>8.1</td><td>11.2</td></tr>
 <tr><td>tensorflow-2.4.0</td><td>3.6-3.8</td><td>GCC 7.3.1</td><td>Bazel 3.1.0</td><td>8.0</td><td>11.0</td></tr>
@@ -479,6 +527,16 @@ Success: TensorFlow is now installed.
 
 <table>
 <tr><th>Version</th><th>Python version</th><th>Compiler</th><th>Build tools</th></tr>
+<tr><td>tensorflow-2.16.1</td><td>3.9-3.12</td><td>Clang from Xcode 13.6</td><td>Bazel 6.5.0</td></tr>
+<tr><td>tensorflow-2.15.0</td><td>3.9-3.11</td><td>Clang from xcode 10.15</td><td>Bazel 6.1.0</td></tr>
+<tr><td>tensorflow-2.14.0</td><td>3.9-3.11</td><td>Clang from xcode 10.15</td><td>Bazel 6.1.0</td></tr>
+<tr><td>tensorflow-2.13.0</td><td>3.8-3.11</td><td>Clang from xcode 10.15</td><td>Bazel 5.3.0</td></tr>
+<tr><td>tensorflow-2.12.0</td><td>3.8-3.11</td><td>Clang from xcode 10.15</td><td>Bazel 5.3.0</td></tr>
+<tr><td>tensorflow-2.11.0</td><td>3.7-3.10</td><td>Clang from xcode 10.14</td><td>Bazel 5.3.0</td></tr>
+<tr><td>tensorflow-2.10.0</td><td>3.7-3.10</td><td>Clang from xcode 10.14</td><td>Bazel 5.1.1</td></tr>
+<tr><td>tensorflow-2.9.0</td><td>3.7-3.10</td><td>Clang from xcode 10.14</td><td>Bazel 5.0.0</td></tr>
+<tr><td>tensorflow-2.8.0</td><td>3.7-3.10</td><td>Clang from xcode 10.14</td><td>Bazel 4.2.1</td></tr>
+<tr><td>tensorflow-2.7.0</td><td>3.7-3.9</td><td>Clang from xcode 10.11</td><td>Bazel 3.7.2</td></tr>
 <tr><td>tensorflow-2.6.0</td><td>3.6-3.9</td><td>Clang from xcode 10.11</td><td>Bazel 3.7.2</td></tr>
 <tr><td>tensorflow-2.5.0</td><td>3.6-3.9</td><td>Clang from xcode 10.11</td><td>Bazel 3.7.2</td></tr>
 <tr><td>tensorflow-2.4.0</td><td>3.6-3.8</td><td>Clang from xcode 10.3</td><td>Bazel 3.1.0</td></tr>
diff --git a/site/en/install/source_windows.md b/site/en/install/source_windows.md
index cff252e0b9e..efc0f7a9286 100644
--- a/site/en/install/source_windows.md
+++ b/site/en/install/source_windows.md
@@ -1,9 +1,9 @@
 # Build from source on Windows
 
-Build a TensorFlow *pip* package from source and install it on Windows.
+Build a TensorFlow *pip* package from the source and install it on Windows.
 
 Note: We already provide well-tested, pre-built
-[TensorFlow packages](./pip.html) for Windows systems.
+[TensorFlow packages](./pip.md) for Windows systems.
 
 ## Setup for Windows
 
@@ -13,16 +13,16 @@ environment.
 ### Install Python and the TensorFlow package dependencies
 
 Install a
-[Python 3.6.x 64-bit release for Windows](https://www.python.org/downloads/windows/){:.external}.
+[Python 3.9+ 64-bit release for Windows](https://www.python.org/downloads/windows/).
 Select *pip* as an optional feature and add it to your `%PATH%` environmental
 variable.
 
 Install the TensorFlow *pip* package dependencies:
 
 <pre class="devsite-click-to-copy">
-<code class="devsite-terminal tfo-terminal-windows">pip3 install six numpy wheel</code>
-<code class="devsite-terminal tfo-terminal-windows">pip3 install keras_applications==1.0.6 --no-deps</code>
-<code class="devsite-terminal tfo-terminal-windows">pip3 install keras_preprocessing==1.0.5 --no-deps</code>
+<code class="devsite-terminal tfo-terminal-windows">pip3 install -U pip</code>
+<code class="devsite-terminal tfo-terminal-windows">pip3 install -U six numpy wheel packaging</code>
+<code class="devsite-terminal tfo-terminal-windows">pip3 install -U keras_preprocessing --no-deps</code>
 </pre>
 
 The dependencies are listed in the
@@ -42,38 +42,53 @@ Add the location of the Bazel executable to your `%PATH%` environment variable.
 
 ### Install MSYS2
 
-[Install MSYS2](https://www.msys2.org/){:.external} for the bin tools needed to
+[Install MSYS2](https://www.msys2.org/) for the bin tools needed to
 build TensorFlow. If MSYS2 is installed to `C:\msys64`, add
 `C:\msys64\usr\bin` to your `%PATH%` environment variable. Then, using `cmd.exe`,
 run:
 
 <pre class="devsite-terminal tfo-terminal-windows devsite-click-to-copy">
+pacman -Syu (requires a console restart)
 pacman -S git patch unzip
+pacman -S git patch unzip rsync
 </pre>
 
-### Install Visual C++ Build Tools 2019
+Note: Clang will be the preferred compiler to build TensorFlow CPU wheels on the Windows Platform starting with TF 2.16.1 The currently supported version is LLVM/clang 17.0.6.
 
-Install the *Visual C++ build tools 2019*. This comes with *Visual Studio 2019*
+Note: To build with Clang on Windows, it is required to install both LLVM and Visual C++ Build tools as although Windows uses clang-cl.exe as the compiler, Visual C++ Build tools are needed to link to Visual C++ libraries
+
+### Install Visual C++ Build Tools 2022
+
+Install the *Visual C++ build tools 2022*. This comes with *Visual Studio Community 2022*
 but can be installed separately:
 
 1.  Go to the
-    [Visual Studio downloads](https://visualstudio.microsoft.com/downloads/){:.external},
-2.  Select *Redistributables and Build Tools*,
+    [Visual Studio downloads](https://visualstudio.microsoft.com/downloads/),
+2.  Select *Tools for Visual Studio or Other Tools, Framework and Redistributables*,
 3.  Download and install:
-    -   *Microsoft Visual C++ 2019 Redistributable*
-    -   *Microsoft Build Tools 2019*
+    -   *Build Tools for Visual Studio 2022*
+    -   *Microsoft Visual C++ Redistributables for Visual Studio 2022*
+
+Note: TensorFlow is tested against the *Visual Studio Community 2022*.
+
+### Install LLVM
+
+1.  Go to the
+    [LLVM downloads](https://github.com/llvm/llvm-project/releases/),
+2.  Download and install Windows-compatible LLVM in C:/Program Files/LLVM e.g., LLVM-17.0.6-win64.exe
 
-Note: TensorFlow is tested against the *Visual Studio 2019*.
 
 ### Install GPU support (optional)
 
 See the Windows [GPU support](./gpu.md) guide to install the drivers and
 additional software required to run TensorFlow on a GPU.
 
+Note: GPU support on native-Windows is only available for 2.10 or earlier versions, starting in TF 2.11, CUDA build is not supported for Windows. For using TensorFlow GPU on Windows, you will need to build/install TensorFlow in WSL2 or use tensorflow-cpu with TensorFlow-DirectML-Plugin
+
 ### Download the TensorFlow source code
 
-Use [Git](https://git-scm.com/){:.external} to clone the
-[TensorFlow repository](https://github.com/tensorflow/tensorflow){:.external}
+Use [Git](https://git-scm.com/) to clone the
+[TensorFlow repository](https://github.com/tensorflow/tensorflow)
 (`git` is installed with MSYS2):
 
 <pre class="devsite-click-to-copy">
@@ -81,8 +96,8 @@ Use [Git](https://git-scm.com/){:.external} to clone the
 <code class="devsite-terminal tfo-terminal-windows">cd tensorflow</code>
 </pre>
 
-The repo defaults to the `master` development branch. You can also checkout a
-[release branch](https://github.com/tensorflow/tensorflow/releases){:.external}
+The repo defaults to the `master` development branch. You can also check out a
+[release branch](https://github.com/tensorflow/tensorflow/releases)
 to build:
 
 <pre class="devsite-terminal tfo-terminal-windows prettyprint lang-bsh">
@@ -92,11 +107,38 @@ git checkout <em>branch_name</em>  # r1.9, r1.10, etc.
 Key Point: If you're having build problems on the latest development branch, try
 a release branch that is known to work.
 
+## Optional: Environmental Variable Set Up
+Run the following commands before running the build command to avoid issues with package creation:
+(If the below commands were set up while installing the packages, please ignore them). Run `set` to check if all the paths were set correctly, run `echo %Environmental Variable%` e.g., `echo %BAZEL_VC%` to check the path set up for a specific Environmental Variable
+
+ Python path set up issue [tensorflow:issue#59943](https://github.com/tensorflow/tensorflow/issues/59943),[tensorflow:issue#9436](https://github.com/tensorflow/tensorflow/issues/9436),[tensorflow:issue#60083](https://github.com/tensorflow/tensorflow/issues/60083)
+
+<pre class="devsite-terminal tfo-terminal-windows devsite-click-to-copy">
+set PATH=path/to/python;%PATH% # [e.g. (C:/Python311)]
+set PATH=path/to/python/Scripts;%PATH% # [e.g. (C:/Python311/Scripts)] 
+set PYTHON_BIN_PATH=path/to/python_virtualenv/Scripts/python.exe 
+set PYTHON_LIB_PATH=path/to/python virtualenv/lib/site-packages 
+set PYTHON_DIRECTORY=path/to/python_virtualenv/Scripts 
+</pre>
+
+Bazel/MSVC/CLANG path set up issue [tensorflow:issue#54578](https://github.com/tensorflow/tensorflow/issues/54578)
+
+<pre class="devsite-terminal tfo-terminal-windows devsite-click-to-copy">
+set BAZEL_SH=C:/msys64/usr/bin/bash.exe 
+set BAZEL_VS=C:/Program Files/Microsoft Visual Studio/2022/BuildTools 
+set BAZEL_VC=C:/Program Files/Microsoft Visual Studio/2022/BuildTools/VC 
+set Bazel_LLVM=C:/Program Files/LLVM (explicitly tell Bazel where LLVM is installed by BAZEL_LLVM, needed while using CLANG)
+set PATH=C:/Program Files/LLVM/bin;%PATH% (Optional, needed while using CLANG as Compiler)
+</pre>
+
+## Optional: Configure the build
 
-## Configure the build
+TensorFlow builds are configured by the `.bazelrc` file in the repository's
+root directory. The `./configure` or `./configure.py` scripts can be used to
+adjust common settings.
 
-Configure your system build by running the following at the root of your
-TensorFlow source tree:
+If you need to change the configuration, run the `./configure` script from
+the repository's root directory.
 
 <pre class="devsite-terminal tfo-terminal-windows devsite-click-to-copy">
 python ./configure.py
@@ -111,92 +153,99 @@ differ):
 <h4 class="showalways">View sample configuration session</h4>
 <pre class="devsite-terminal tfo-terminal-windows">
 python ./configure.py
-Starting local Bazel server and connecting to it...
-................
-You have bazel 0.15.0 installed.
-Please specify the location of python. [Default is C:\python36\python.exe]:
+You have bazel 6.5.0 installed.
+Please specify the location of python. [Default is C:\Python311\python.exe]:
 
 Found possible Python library paths:
-  C:\python36\lib\site-packages
-Please input the desired Python library path to use.  Default is [C:\python36\lib\site-packages]
-
-Do you wish to build TensorFlow with CUDA support? [y/N]: <b>Y</b>
-CUDA support will be enabled for TensorFlow.
+C:\Python311\lib\site-packages
+Please input the desired Python library path to use.  Default is [C:\Python311\lib\site-packages]
 
-Please specify the CUDA SDK version you want to use. [Leave empty to default to CUDA 9.0]:
+Do you wish to build TensorFlow with ROCm support? [y/N]:
+No ROCm support will be enabled for TensorFlow.
 
-Please specify the location where CUDA 9.0 toolkit is installed. Refer to README.md for more details. [Default is C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.0]:
+WARNING: Cannot build with CUDA support on Windows.
+Starting in TF 2.11, CUDA build is not supported for Windows. To use TensorFlow GPU on Windows, you will need to build/install TensorFlow in WSL2.
 
-Please specify the cuDNN version you want to use. [Leave empty to default to cuDNN 7.0]: <b>7.0</b>
+Do you want to use Clang to build TensorFlow? [Y/n]:
+Add "--config=win_clang" to compile TensorFlow with CLANG.
 
-Please specify the location where cuDNN 7 library is installed. Refer to README.md for more details. [Default is C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.0]: <b>C:\tools\cuda</b>
+Please specify the path to clang executable. [Default is C:\Program Files\LLVM\bin\clang.EXE]:
 
-Please specify a list of comma-separated Cuda compute capabilities you want to build with.
-You can find the compute capability of your device at: https://developer.nvidia.com/cuda-gpus.
-Please note that each additional compute capability significantly increases your build time and binary size. [Default is: 3.5,7.0]: <b>3.7</b>
+You have Clang 17.0.6 installed.
 
 Please specify optimization flags to use during compilation when bazel option "--config=opt" is specified [Default is /arch:AVX]:
 
 Would you like to override eigen strong inline for some C++ compilation to reduce the compilation time? [Y/n]:
 Eigen strong inline overridden.
 
-Configuration finished
+Would you like to interactively configure ./WORKSPACE for Android builds? [y/N]:
+Not configuring the WORKSPACE for Android builds.
+
+Preconfigured Bazel build configs. You can use any of the below by adding "--config=<>" to your build command. See .bazelrc for more details.
+        --config=mkl            # Build with MKL support.
+        --config=mkl_aarch64    # Build with oneDNN and Compute Library for the Arm Architecture (ACL).
+        --config=monolithic     # Config for mostly static monolithic build.
+        --config=numa           # Build with NUMA support.
+        --config=dynamic_kernels        # (Experimental) Build kernels into separate shared objects.
+        --config=v1             # Build with TensorFlow 1 API instead of TF 2 API.
+Preconfigured Bazel build configs to DISABLE default on features:
+        --config=nogcp          # Disable GCP support.
+        --config=nonccl         # Disable NVIDIA NCCL support.
 </pre>
 </section>
 
-### Configuration options
-
-For [GPU support](./gpu.md), specify the versions of CUDA and cuDNN. If your
-system has multiple versions of CUDA or cuDNN installed, explicitly set the
-version instead of relying on the default. `./configure.py` creates symbolic
-links to your system's CUDA libraries—so if you update your CUDA library paths,
-this configuration step must be run again before building.
-
-Note: Starting with TensorFlow 1.6, binaries use AVX instructions which may not
-run on older CPUs.
-
+## Build and install the pip package
 
-## Build the pip package
+The pip package is built in two steps. A `bazel build` command creates a
+"package-builder" program. You then run the package-builder to create the
+package.
 
-### TensorFlow 2.x
+### Build the package-builder
 
 tensorflow:master repo has been updated to build 2.x by default.
 [Install Bazel](https://docs.bazel.build/versions/master/install.html) and use
-`bazel build ` to create the TensorFlow package.
+`bazel build ` to create the TensorFlow package-builder.
 
 <pre class="devsite-terminal tfo-terminal-windows devsite-click-to-copy">
-bazel build //tensorflow/tools/pip_package:build_pip_package
+bazel build //tensorflow/tools/pip_package:wheel
 </pre>
 
+#### CPU-only
 
-### TensorFlow 1.x
-
-To build the 1.x version of TensorFlow from master, use
-`bazel build --config=v1` to create a TensorFlow 1.x package.
+Use `bazel` to make the TensorFlow package builder with CPU-only support:
 
+##### Build with MSVC 
 <pre class="devsite-terminal tfo-terminal-windows devsite-click-to-copy">
-bazel build --config=v1 //tensorflow/tools/pip_package:build_pip_package
+bazel build --config=opt --repo_env=TF_PYTHON_VERSION=3.11 //tensorflow/tools/pip_package:wheel --repo_env=WHEEL_NAME=tensorflow_cpu
 </pre>
 
-#### CPU-only
-
-Use `bazel` to make the TensorFlow package builder with CPU-only support:
+##### Build with CLANG 
+Use --config=`win_clang` to build TenorFlow with the CLANG Compiler:
 
 <pre class="devsite-terminal tfo-terminal-windows devsite-click-to-copy">
-bazel build --config=opt //tensorflow/tools/pip_package:build_pip_package
+bazel build --config=win_clang --repo_env=TF_PYTHON_VERSION=3.11 //tensorflow/tools/pip_package:wheel --repo_env=WHEEL_NAME=tensorflow_cpu
 </pre>
 
 #### GPU support
 
+Note: GPU support on native-Windows is only available for 2.10 or earlier versions, starting in TF 2.11, CUDA build is not supported for Windows. For using TensorFlow GPU on Windows, you will need to build/install TensorFlow in WSL2 or use tensorflow-cpu with TensorFlow-DirectML-Plugin
+
 To make the TensorFlow package builder with GPU support:
 
 <pre class="devsite-terminal tfo-terminal-windows devsite-click-to-copy">
 bazel build --config=opt --config=cuda --define=no_tensorflow_py_deps=true //tensorflow/tools/pip_package:build_pip_package
 </pre>
 
+Commands to clean the bazel cache to resolve errors due to invalid or outdated cached data, bazel clean with --expunge flag removes files permanently
+
+<pre class="devsite-terminal tfo-terminal-windows devsite-click-to-copy">
+bazel clean 
+bazel clean --expunge  
+</pre>
+
 #### Bazel build options
 
-Use this option when building to avoid issue with package creation:
+Use this option when building to avoid issues with package creation:
 [tensorflow:issue#22390](https://github.com/tensorflow/tensorflow/issues/22390)
 
 <pre class="devsite-terminal tfo-terminal-windows devsite-click-to-copy">
@@ -215,30 +264,37 @@ to suppress nvcc warning messages.
 
 ### Build the package
 
-The `bazel build` command creates an executable named `build_pip_package`—this
-is the program that builds the `pip` package. For example, the following builds
-a `.whl` package in the `C:/tmp/tensorflow_pkg` directory:
+To build a pip package, you need to specify the --repo_env=WHEEL_NAME flag. 
+Depending on the provided name, the package will be created. For example:
 
-<pre class="devsite-terminal tfo-terminal-windows devsite-click-to-copy">
-bazel-bin\tensorflow\tools\pip_package\build_pip_package C:/tmp/tensorflow_pkg
+To build tensorflow CPU package:
+<pre class="devsite-terminal devsite-click-to-copy">
+bazel build //tensorflow/tools/pip_package:wheel --repo_env=WHEEL_NAME=tensorflow_cpu
+</pre>
+
+To build nightly package, set `tf_nightly` instead of `tensorflow`, e.g.
+to build CPU nightly package:
+<pre class="devsite-terminal devsite-click-to-copy">
+bazel build //tensorflow/tools/pip_package:wheel --repo_env=WHEEL_NAME=tf_nightly_cpu
+</pre>
+
+As a result, generated wheel will be located in
+<pre class="devsite-terminal devsite-click-to-copy">
+bazel-bin/tensorflow/tools/pip_package/wheel_house/
 </pre>
 
-Although it is possible to build both CUDA and non-CUDA configs under the
-same source tree, we recommend running `bazel clean` when switching between
-these two configurations in the same source tree.
 
 ### Install the package
 
 The filename of the generated `.whl` file depends on the TensorFlow version and
-your platform. Use `pip3 install` to install the package, for example:
+your platform. Use `pip install` to install the package, for example:
 
-<pre class="devsite-terminal tfo-terminal-windows prettyprint lang-bsh">
-pip3 install C:/tmp/tensorflow_pkg/tensorflow-<var>version</var>-cp36-cp36m-win_amd64.whl
+<pre class="devsite-terminal prettyprint lang-bsh">
+pip install bazel-bin/tensorflow/tools/pip_package/wheel_house/tensorflow-<var>version</var>-<var>tags</var>.whl
 </pre>
 
 Success: TensorFlow is now installed.
 
-
 ## Build using the MSYS shell
 
 TensorFlow can also be built using the MSYS shell. Make the changes listed
@@ -260,12 +316,12 @@ considered a Unix absolute path since it starts with a slash.)
 
 Add the Bazel and Python installation directories to your `$PATH` environmental
 variable. If Bazel is installed to `C:\tools\bazel.exe`, and Python to
-`C:\Python36\python.exe`, set your `PATH` with:
+`C:\Python\python.exe`, set your `PATH` with:
 
 <pre class="prettyprint lang-bsh">
 # Use Unix-style with ':' as separator
 <code class="devsite-terminal">export PATH="/c/tools:$PATH"</code>
-<code class="devsite-terminal">export PATH="/c/Python36:$PATH"</code>
+<code class="devsite-terminal">export PATH="/c/path/to/Python:$PATH"</code>
 </pre>
 
 For GPU support, add the CUDA and cuDNN bin directories to your `$PATH`:
@@ -276,6 +332,8 @@ For GPU support, add the CUDA and cuDNN bin directories to your `$PATH`:
 <code class="devsite-terminal">export PATH="/c/tools/cuda/bin:$PATH"</code>
 </pre>
 
+Note: Starting in TF 2.11, CUDA build is not supported for Windows. For using TensorFlow GPU on Windows, you will need to build/install TensorFlow in WSL2 or use tensorflow-cpu with TensorFlow-DirectML-Plugin
+
 <a name="tested_build_configurations"></a>
 ## Tested build configurations
 
@@ -283,6 +341,19 @@ For GPU support, add the CUDA and cuDNN bin directories to your `$PATH`:
 
 <table>
 <tr><th>Version</th><th>Python version</th><th>Compiler</th><th>Build tools</th></tr>
+<tr><td>tensorflow-2.20.0</td><td>3.9-3.13</td><td>CLANG 18.1.4</td><td>Bazel 7.4.1</td></tr>
+<tr><td>tensorflow-2.19.0</td><td>3.9-3.12</td><td>CLANG 18.1.4</td><td>Bazel 6.5.0</td></tr>
+<tr><td>tensorflow-2.18.0</td><td>3.9-3.12</td><td>CLANG 17.0.6</td><td>Bazel 6.5.0</td></tr>
+<tr><td>tensorflow-2.17.0</td><td>3.9-3.12</td><td>CLANG 17.0.6</td><td>Bazel 6.5.0</td></tr>
+<tr><td>tensorflow-2.16.1</td><td>3.9-3.12</td><td>CLANG 17.0.6</td><td>Bazel 6.5.0</td></tr>
+<tr><td>tensorflow-2.15.0</td><td>3.9-3.11</td><td>MSVC 2019</td><td>Bazel 6.1.0</td></tr>
+<tr><td>tensorflow-2.14.0</td><td>3.9-3.11</td><td>MSVC 2019</td><td>Bazel 6.1.0</td></tr>
+<tr><td>tensorflow-2.12.0</td><td>3.8-3.11</td><td>MSVC 2019</td><td>Bazel 5.3.0</td></tr>
+<tr><td>tensorflow-2.11.0</td><td>3.7-3.10</td><td>MSVC 2019</td><td>Bazel 5.3.0</td></tr>
+<tr><td>tensorflow-2.10.0</td><td>3.7-3.10</td><td>MSVC 2019</td><td>Bazel 5.1.1</td></tr>
+<tr><td>tensorflow-2.9.0</td><td>3.7-3.10</td><td>MSVC 2019</td><td>Bazel 5.0.0</td></tr>
+<tr><td>tensorflow-2.8.0</td><td>3.7-3.10</td><td>MSVC 2019</td><td>Bazel 4.2.1</td></tr>
+<tr><td>tensorflow-2.7.0</td><td>3.7-3.9</td><td>MSVC 2019</td><td>Bazel 3.7.2</td></tr>
 <tr><td>tensorflow-2.6.0</td><td>3.6-3.9</td><td>MSVC 2019</td><td>Bazel 3.7.2</td></tr>
 <tr><td>tensorflow-2.5.0</td><td>3.6-3.9</td><td>MSVC 2019</td><td>Bazel 3.7.2</td></tr>
 <tr><td>tensorflow-2.4.0</td><td>3.6-3.8</td><td>MSVC 2019</td><td>Bazel 3.1.0</td></tr>
@@ -309,9 +380,14 @@ For GPU support, add the CUDA and cuDNN bin directories to your `$PATH`:
 </table>
 
 ### GPU
+Note: GPU support on native-Windows is only available for 2.10 or earlier versions, starting in TF 2.11, CUDA build is not supported for Windows. For using TensorFlow GPU on Windows, you will need to build/install TensorFlow in WSL2 or use tensorflow-cpu with TensorFlow-DirectML-Plugin
 
 <table>
 <tr><th>Version</th><th>Python version</th><th>Compiler</th><th>Build tools</th><th>cuDNN</th><th>CUDA</th></tr>
+<tr><td>tensorflow_gpu-2.10.0</td><td>3.7-3.10</td><td>MSVC 2019</td><td>Bazel 5.1.1</td><td>8.1</td><td>11.2</td></tr>
+<tr><td>tensorflow_gpu-2.9.0</td><td>3.7-3.10</td><td>MSVC 2019</td><td>Bazel 5.0.0</td><td>8.1</td><td>11.2</td></tr>
+<tr><td>tensorflow_gpu-2.8.0</td><td>3.7-3.10</td><td>MSVC 2019</td><td>Bazel 4.2.1</td><td>8.1</td><td>11.2</td></tr>
+<tr><td>tensorflow_gpu-2.7.0</td><td>3.7-3.9</td><td>MSVC 2019</td><td>Bazel 3.7.2</td><td>8.1</td><td>11.2</td></tr>
 <tr><td>tensorflow_gpu-2.6.0</td><td>3.6-3.9</td><td>MSVC 2019</td><td>Bazel 3.7.2</td><td>8.1</td><td>11.2</td></tr>
 <tr><td>tensorflow_gpu-2.5.0</td><td>3.6-3.9</td><td>MSVC 2019</td><td>Bazel 3.7.2</td><td>8.1</td><td>11.2</td></tr>
 <tr><td>tensorflow_gpu-2.4.0</td><td>3.6-3.8</td><td>MSVC 2019</td><td>Bazel 3.1.0</td><td>8.0</td><td>11.0</td></tr>
diff --git a/site/en/io/README.md b/site/en/io/README.md
deleted file mode 100644
index 24249b7ac03..00000000000
--- a/site/en/io/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-Welcome to the warp zone!
-
-# TensorFlow SIG IO
-
-These docs are available here: https://github.com/tensorflow/io/tree/master/docs
diff --git a/site/en/js/README.md b/site/en/js/README.md
deleted file mode 100644
index 5a3a34677b4..00000000000
--- a/site/en/js/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-Welcome to the warp zone!
-
-# TensorFlow.js
-
-These docs are available here: https://github.com/tensorflow/tfjs-website/tree/master/docs
diff --git a/site/en/lattice/README.md b/site/en/lattice/README.md
deleted file mode 100644
index 27ce3c8ce55..00000000000
--- a/site/en/lattice/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-Welcome to the warp zone!
-
-# TensorFlow Lattice
-
-These docs are available here: https://github.com/tensorflow/lattice/tree/master/docs
diff --git a/site/en/lite/README.md b/site/en/lite/README.md
deleted file mode 100644
index 43c3249dc7b..00000000000
--- a/site/en/lite/README.md
+++ /dev/null
@@ -1,6 +0,0 @@
-Welcome to the warp zone!
-
-# TensorFlow Lite
-
-These docs are available here:
-https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/g3doc
diff --git a/site/en/mlir/README.md b/site/en/mlir/README.md
deleted file mode 100644
index 614f9f693c8..00000000000
--- a/site/en/mlir/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-Welcome to the warp zone!
-
-# TensorFlow MLIR
-
-These docs are available here: https://github.com/tensorflow/tensorflow/tree/master/tensorflow/compiler/mlir/g3doc
diff --git a/site/en/neural_structured_learning/README.md b/site/en/neural_structured_learning/README.md
deleted file mode 100644
index 85c905af170..00000000000
--- a/site/en/neural_structured_learning/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-Welcome to the warp zone!
-
-# Neural Structured Learning
-
-These docs are available here: https://github.com/tensorflow/neural-structured-learning/tree/master/g3doc
diff --git a/site/en/probability/README.md b/site/en/probability/README.md
deleted file mode 100644
index c17e5ba447b..00000000000
--- a/site/en/probability/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-Welcome to the warp zone!
-
-# TensorFlow Probability
-
-These docs are available here: https://github.com/tensorflow/probability/tree/master/tensorflow_probability/g3doc
diff --git a/site/en/quantum/README.md b/site/en/quantum/README.md
deleted file mode 100644
index 78580b3dfd8..00000000000
--- a/site/en/quantum/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-Welcome to the warp zone!
-
-# TensorFlow Quantum
-
-These docs are available here: https://github.com/tensorflow/quantum/tree/master/docs
diff --git a/site/en/r1/guide/autograph.ipynb b/site/en/r1/guide/autograph.ipynb
index 5d8d7c97999..64d631a52b3 100644
--- a/site/en/r1/guide/autograph.ipynb
+++ b/site/en/r1/guide/autograph.ipynb
@@ -66,7 +66,7 @@
       "source": [
         "> Note: This is an archived TF1 notebook. These are configured\n",
         "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
+        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
         "but will run in TF1 as well. To use TF1 in Colab, use the\n",
         "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
         "magic."
@@ -78,7 +78,7 @@
         "id": "CydFK2CL7ZHA"
       },
       "source": [
-        "[AutoGraph](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/autograph/) helps you write complicated graph code using normal Python. Behind the scenes, AutoGraph automatically transforms your code into the equivalent [TensorFlow graph code](https://www.tensorflow.org/r1/guide/graphs). AutoGraph already supports much of the Python language, and that coverage continues to grow. For a list of supported Python language features, see the [Autograph capabilities and limitations](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/autograph/g3doc/reference/limitations.md)."
+        "[AutoGraph](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/autograph/) helps you write complicated graph code using normal Python. Behind the scenes, AutoGraph automatically transforms your code into the equivalent [TensorFlow graph code](https://www.tensorflow.org/r1/guide/graphs). AutoGraph already supports much of the Python language, and that coverage continues to grow. For a list of supported Python language features, see the [Autograph capabilities and limitations](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/autograph/g3doc/reference/limitations.md)."
       ]
     },
     {
@@ -241,7 +241,7 @@
         "id": "m-jWmsCmByyw"
       },
       "source": [
-        "AutoGraph supports common Python statements like `while`, `for`, `if`, `break`, and `return`, with support for nesting. Compare this function with the complicated graph verson displayed in the following code blocks:"
+        "AutoGraph supports common Python statements like `while`, `for`, `if`, `break`, and `return`, with support for nesting. Compare this function with the complicated graph version displayed in the following code blocks:"
       ]
     },
     {
diff --git a/site/en/r1/guide/checkpoints.md b/site/en/r1/guide/checkpoints.md
index 682631449d5..41544f52b25 100644
--- a/site/en/r1/guide/checkpoints.md
+++ b/site/en/r1/guide/checkpoints.md
@@ -56,8 +56,8 @@ Suppose you call the Estimator's `train` method. For example:
 
 ```python
 classifier.train(
-        input_fn=lambda:train_input_fn(train_x, train_y, batch_size=100),
-                steps=200)
+    input_fn=lambda: train_input_fn(train_x, train_y, batch_size=100),
+    steps=200)
 ```
 
 As suggested by the following diagrams, the first call to `train`
diff --git a/site/en/r1/guide/custom_estimators.md b/site/en/r1/guide/custom_estimators.md
index 87dce26a0dc..7bbf3573909 100644
--- a/site/en/r1/guide/custom_estimators.md
+++ b/site/en/r1/guide/custom_estimators.md
@@ -592,10 +592,10 @@ function for custom Estimators; everything else is the same.
 For more details, be sure to check out:
 
 * The
-  [official TensorFlow implementation of MNIST](https://github.com/tensorflow/models/tree/master/official/r1/mnist),
+  [official TensorFlow implementation of MNIST](https://github.com/tensorflow/models/tree/r1.15/official/r1/mnist),
   which uses a custom estimator.
 * The TensorFlow
-  [official models repository](https://github.com/tensorflow/models/tree/master/official),
+  [official models repository](https://github.com/tensorflow/models/tree/r1.15/official),
   which contains more curated examples using custom estimators.
 * This [TensorBoard video](https://youtu.be/eBbEDRsCmv4), which introduces
   TensorBoard.
diff --git a/site/en/r1/guide/datasets.md b/site/en/r1/guide/datasets.md
index b1ed1b6e113..d7c38bf2f92 100644
--- a/site/en/r1/guide/datasets.md
+++ b/site/en/r1/guide/datasets.md
@@ -437,7 +437,7 @@ dataset = dataset.batch(32)
 iterator = dataset.make_initializable_iterator()
 
 # You can feed the initializer with the appropriate filenames for the current
-# phase of execution, e.g. training vs. validation.
+# phase of execution, e.g., training vs. validation.
 
 # Initialize `iterator` with training data.
 training_filenames = ["/var/data/file1.tfrecord", "/var/data/file2.tfrecord"]
@@ -639,7 +639,7 @@ TODO(mrry): Add this section.
 The simplest form of batching stacks `n` consecutive elements of a dataset into
 a single element. The `Dataset.batch()` transformation does exactly this, with
 the same constraints as the `tf.stack()` operator, applied to each component
-of the elements: i.e. for each component *i*, all elements must have a tensor
+of the elements: i.e., for each component *i*, all elements must have a tensor
 of the exact same shape.
 
 ```python
diff --git a/site/en/r1/guide/debugger.md b/site/en/r1/guide/debugger.md
index 2b4b6497ec4..963765b97db 100644
--- a/site/en/r1/guide/debugger.md
+++ b/site/en/r1/guide/debugger.md
@@ -10,7 +10,7 @@ due to TensorFlow's computation-graph paradigm.
 This guide focuses on the command-line interface (CLI) of `tfdbg`. For guide on
 how to use the graphical user interface (GUI) of tfdbg, i.e., the
 **TensorBoard Debugger Plugin**, please visit
-[its README](https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/debugger/README.md).
+[its README](https://github.com/tensorflow/tensorboard/blob/r1.15/tensorboard/plugins/debugger/README.md).
 
 Note: The TensorFlow debugger uses a
 [curses](https://en.wikipedia.org/wiki/Curses_\(programming_library\))-based text
@@ -35,7 +35,7 @@ TensorFlow. Later sections of this document describe how to use **tfdbg** with
 higher-level APIs of TensorFlow, including `tf.estimator`, `tf.keras` / `keras`
 and `tf.contrib.slim`. To *observe* such an issue, run the following command
 without the debugger (the source code can be found
-[here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/debug/examples/v1/debug_mnist.py)):
+[here](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/debug/examples/v1/debug_mnist.py)):
 
 <pre>
 python -m tensorflow.python.debug.examples.v1.debug_mnist
@@ -64,7 +64,7 @@ numeric problem first surfaced.
 To add support for tfdbg in our example, all that is needed is to add the
 following lines of code and wrap the Session object with a debugger wrapper.
 This code is already added in
-[debug_mnist.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/debug/examples/v1/debug_mnist.py),
+[debug_mnist.py](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/debug/examples/v1/debug_mnist.py),
 so you can activate tfdbg CLI with the `--debug` flag at the command line.
 
 ```python
@@ -370,7 +370,7 @@ traceback of the node's construction.
 
 From the traceback, you can see that the op is constructed at the following
 line:
-[`debug_mnist.py`](https://www.tensorflow.org/code/tensorflow/python/debug/examples/v1/debug_mnist.py):
+[`debug_mnist.py`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/debug/examples/v1/debug_mnist.py):
 
 ```python
 diff = y_ * tf.log(y)
@@ -457,7 +457,7 @@ accuracy_score = classifier.evaluate(eval_input_fn,
 predict_results = classifier.predict(predict_input_fn, hooks=hooks)
 ```
 
-[debug_tflearn_iris.py](https://www.tensorflow.org/code/tensorflow/python/debug/examples/v1/debug_tflearn_iris.py),
+[debug_tflearn_iris.py](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/debug/examples/v1/debug_tflearn_iris.py),
 contains a full example of how to use the tfdbg with `Estimator`s. To run this
 example, do:
 
@@ -501,7 +501,7 @@ TensorFlow backend. You just need to replace `tf.keras.backend` with
 ## Debugging tf-slim with TFDBG
 
 TFDBG supports debugging of training and evaluation with
-[tf-slim](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim).
+[tf-slim](https://github.com/tensorflow/tensorflow/tree/r1.15/tensorflow/contrib/slim).
 As detailed below, training and evaluation require slightly different debugging
 workflows.
 
@@ -605,7 +605,7 @@ The `watch_fn` argument accepts a `Callable` that allows you to configure what
 If your model code is written in C++ or other languages, you can also
 modify the `debug_options` field of `RunOptions` to generate debug dumps that
 can be inspected offline. See
-[the proto definition](https://www.tensorflow.org/code/tensorflow/core/protobuf/debug.proto)
+[the proto definition](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/protobuf/debug.proto)
 for more details.
 
 ### Debugging Remotely-Running Estimators
@@ -648,7 +648,7 @@ python -m tensorflow.python.debug.cli.offline_analyzer \
        model, check out
 
    1. The profiling mode of tfdbg: `tfdbg> run -p`.
-   2. [tfprof](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler)
+   2. [tfprof](https://github.com/tensorflow/tensorflow/tree/r1.15/tensorflow/core/profiler)
       and other profiling tools for TensorFlow.
 
 **Q**: _How do I link tfdbg against my `Session` in Bazel? Why do I see an
@@ -808,4 +808,4 @@ tensor dumps.
        and conditional breakpoints, and tying tensors to their
        graph-construction source code, all in the browser environment.
        To get started, please visit
-       [its README](https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/debugger/README.md).
+       [its README](https://github.com/tensorflow/tensorboard/blob/r1.15/tensorboard/plugins/debugger/README.md).
diff --git a/site/en/r1/guide/distribute_strategy.ipynb b/site/en/r1/guide/distribute_strategy.ipynb
index f6d85912e16..4dd502d331b 100644
--- a/site/en/r1/guide/distribute_strategy.ipynb
+++ b/site/en/r1/guide/distribute_strategy.ipynb
@@ -64,7 +64,7 @@
       "source": [
         "> Note: This is an archived TF1 notebook. These are configured\n",
         "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
+        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
         "but will run in TF1 as well. To use TF1 in Colab, use the\n",
         "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
         "magic."
@@ -118,7 +118,7 @@
         "## Types of strategies\n",
         "`tf.distribute.Strategy` intends to cover a number of use cases along different axes. Some of these combinations are currently supported and others will be added in the future. Some of these axes are:\n",
         "\n",
-        "* Syncronous vs asynchronous training: These are two common ways of distributing training with data parallelism. In sync training, all workers train over different slices of input data in sync, and aggregating gradients at each step. In async training, all workers are independently training over the input data and updating variables asynchronously. Typically sync training is supported via all-reduce and async through parameter server architecture.\n",
+        "* Synchronous vs asynchronous training: These are two common ways of distributing training with data parallelism. In sync training, all workers train over different slices of input data in sync, and aggregating gradients at each step. In async training, all workers are independently training over the input data and updating variables asynchronously. Typically sync training is supported via all-reduce and async through parameter server architecture.\n",
         "* Hardware platform: Users may want to scale their training onto multiple GPUs on one machine, or multiple machines in a network (with 0 or more GPUs each), or on Cloud TPUs.\n",
         "\n",
         "In order to support these use cases, we have 4 strategies available. In the next section we will talk about which of these are supported in which scenarios in TF."
@@ -223,7 +223,7 @@
         "id": "KY1nJHNkMl7b"
       },
       "source": [
-        "This will create a `CentralStorageStrategy` instance which will use all visible GPUs and CPU. Update to variables on replicas will be aggragated before being applied to variables."
+        "This will create a `CentralStorageStrategy` instance which will use all visible GPUs and CPU. Update to variables on replicas will be aggregated before being applied to variables."
       ]
     },
     {
@@ -245,7 +245,7 @@
         "\n",
         "`tf.distribute.experimental.MultiWorkerMirroredStrategy` is very similar to `MirroredStrategy`. It implements synchronous distributed training across multiple workers, each with potentially multiple GPUs. Similar to `MirroredStrategy`, it creates copies of all variables in the model on each device across all workers.\n",
         "\n",
-        "It uses [CollectiveOps](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/collective_ops.py) as the multi-worker all-reduce communication method used to keep variables in sync. A collective op is a single op in the TensorFlow graph which can automatically choose an all-reduce algorithm in the TensorFlow runtime according to hardware, network topology and tensor sizes.\n",
+        "It uses [CollectiveOps](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/ops/collective_ops.py) as the multi-worker all-reduce communication method used to keep variables in sync. A collective op is a single op in the TensorFlow graph which can automatically choose an all-reduce algorithm in the TensorFlow runtime according to hardware, network topology and tensor sizes.\n",
         "\n",
         "It also implements additional performance optimizations. For example, it includes a static optimization that converts multiple all-reductions on small tensors into fewer all-reductions on larger tensors. In addition, we are designing it to have a plugin architecture - so that in the future, users will be able to plugin algorithms that are better tuned for their hardware. Note that collective ops also implement other collective operations such as broadcast and all-gather.\n",
         "\n",
@@ -371,7 +371,7 @@
         "id": "hQv1lm9UPDFy"
       },
       "source": [
-        "So far we've talked about what are the different stategies available and how you can instantiate them. In the next few sections, we will talk about the different ways in which you can use them to distribute your training. We will show short code snippets in this guide and link off to full tutorials which you can run end to end."
+        "So far we've talked about what are the different strategies available and how you can instantiate them. In the next few sections, we will talk about the different ways in which you can use them to distribute your training. We will show short code snippets in this guide and link off to full tutorials which you can run end to end."
       ]
     },
     {
@@ -490,8 +490,8 @@
         "Here is a list of tutorials and examples that illustrate the above integration end to end with Keras:\n",
         "\n",
         "1. [Tutorial](../tutorials/distribute/keras.ipynb) to train MNIST with `MirroredStrategy`.\n",
-        "2. Official [ResNet50](https://github.com/tensorflow/models/blob/master/official/vision/image_classification/resnet_imagenet_main.py) training with ImageNet data using `MirroredStrategy`.\n",
-        "3. [ResNet50](https://github.com/tensorflow/tpu/blob/master/models/experimental/resnet50_keras/resnet50.py) trained with Imagenet data on Cloud TPus with `TPUStrategy`."
+        "2. Official [ResNet50](https://github.com/tensorflow/models/blob/r1.15/official/vision/image_classification/resnet_imagenet_main.py) training with ImageNet data using `MirroredStrategy`.\n",
+        "3. [ResNet50](https://github.com/tensorflow/tpu/blob/1.15/models/experimental/resnet50_keras/resnet50.py) trained with Imagenet data on Cloud TPus with `TPUStrategy`."
       ]
     },
     {
@@ -595,9 +595,9 @@
         "### Examples and Tutorials\n",
         "Here are some examples that show end to end usage of various strategies with Estimator:\n",
         "\n",
-        "1. [End to end example](https://github.com/tensorflow/ecosystem/tree/master/distribution_strategy) for multi worker training in tensorflow/ecosystem using Kuberentes templates. This example starts with a Keras model and converts it to an Estimator using the `tf.keras.estimator.model_to_estimator` API.\n",
-        "2. Official [ResNet50](https://github.com/tensorflow/models/blob/master/official/r1/resnet/imagenet_main.py) model, which can be trained using either `MirroredStrategy` or `MultiWorkerMirroredStrategy`.\n",
-        "3. [ResNet50](https://github.com/tensorflow/tpu/blob/master/models/experimental/distribution_strategy/resnet_estimator.py) example with TPUStrategy."
+        "1. [End to end example](https://github.com/tensorflow/ecosystem/tree/r1.15/distribution_strategy) for multi worker training in tensorflow/ecosystem using Kuberentes templates. This example starts with a Keras model and converts it to an Estimator using the `tf.keras.estimator.model_to_estimator` API.\n",
+        "2. Official [ResNet50](https://github.com/tensorflow/models/blob/r1.15/official/r1/resnet/imagenet_main.py) model, which can be trained using either `MirroredStrategy` or `MultiWorkerMirroredStrategy`.\n",
+        "3. [ResNet50](https://github.com/tensorflow/tpu/blob/1.15/models/experimental/distribution_strategy/resnet_estimator.py) example with TPUStrategy."
       ]
     },
     {
@@ -607,7 +607,7 @@
       },
       "source": [
         "## Using `tf.distribute.Strategy` with custom training loops\n",
-        "As you've seen, using `tf.distrbute.Strategy` with high level APIs is only a couple lines of code change. With a little more effort, `tf.distrbute.Strategy` can also be used by other users who are not using these frameworks.\n",
+        "As you've seen, using `tf.distribute.Strategy` with high level APIs is only a couple lines of code change. With a little more effort, `tf.distribute.Strategy` can also be used by other users who are not using these frameworks.\n",
         "\n",
         "TensorFlow is used for a wide variety of use cases and some users (such as researchers) require more flexibility and control over their training loops. This makes it hard for them to use the high level frameworks such as Estimator or Keras. For instance, someone using a GAN may want to take a different number of generator or discriminator steps each round. Similarly, the high level frameworks are not very suitable for Reinforcement Learning training. So these users will usually write their own training loops.\n",
         "\n",
diff --git a/site/en/r1/guide/eager.ipynb b/site/en/r1/guide/eager.ipynb
index 547e1b02977..f76acb4b702 100644
--- a/site/en/r1/guide/eager.ipynb
+++ b/site/en/r1/guide/eager.ipynb
@@ -64,7 +64,7 @@
       "source": [
         "> Note: This is an archived TF1 notebook. These are configured\n",
         "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
+        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
         "but will run in TF1 as well. To use TF1 in Colab, use the\n",
         "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
         "magic."
@@ -95,7 +95,7 @@
         "\n",
         "Eager execution supports most TensorFlow operations and GPU acceleration. For a\n",
         "collection of examples running in eager execution, see:\n",
-        "[tensorflow/contrib/eager/python/examples](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/examples).\n",
+        "[tensorflow/contrib/eager/python/examples](https://github.com/tensorflow/tensorflow/tree/r1.15/tensorflow/contrib/eager/python/examples).\n",
         "\n",
         "Note: Some models may experience increased overhead with eager execution\n",
         "enabled. Performance improvements are ongoing, but please\n",
@@ -702,7 +702,7 @@
       },
       "outputs": [],
       "source": [
-        "if tf.test.is_gpu_available():\n",
+        "if tf.config.list_physical_devices('GPU'):\n",
         "  with tf.device(\"gpu:0\"):\n",
         "    v = tf.Variable(tf.random_normal([1000, 1000]))\n",
         "    v = None  # v no longer takes up GPU memory"
@@ -1116,7 +1116,7 @@
         "  print(\"CPU: {} secs\".format(measure(tf.random_normal(shape), steps)))\n",
         "\n",
         "# Run on GPU, if available:\n",
-        "if tf.test.is_gpu_available():\n",
+        "if tf.config.list_physical_devices('GPU'):\n",
         "  with tf.device(\"/gpu:0\"):\n",
         "    print(\"GPU: {} secs\".format(measure(tf.random_normal(shape), steps)))\n",
         "else:\n",
@@ -1141,7 +1141,7 @@
       },
       "outputs": [],
       "source": [
-        "if tf.test.is_gpu_available():\n",
+        "if tf.config.list_physical_devices('GPU'):\n",
         "  x = tf.random_normal([10, 10])\n",
         "\n",
         "  x_gpu0 = x.gpu()\n",
@@ -1160,7 +1160,7 @@
         "### Benchmarks\n",
         "\n",
         "For compute-heavy models, such as\n",
-        "[ResNet50](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/examples/resnet50)\n",
+        "[ResNet50](https://github.com/tensorflow/tensorflow/tree/r1.15/tensorflow/contrib/eager/python/examples/resnet50)\n",
         "training on a GPU, eager execution performance is comparable to graph execution.\n",
         "But this gap grows larger for models with less computation and there is work to\n",
         "be done for optimizing hot code paths for models with lots of small operations."
@@ -1225,7 +1225,7 @@
         "production deployment. Use `tf.train.Checkpoint` to save and restore model\n",
         "variables, this allows movement between eager and graph execution environments.\n",
         "See the examples in:\n",
-        "[tensorflow/contrib/eager/python/examples](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/examples).\n"
+        "[tensorflow/contrib/eager/python/examples](https://github.com/tensorflow/tensorflow/tree/r1.15/tensorflow/contrib/eager/python/examples).\n"
       ]
     },
     {
diff --git a/site/en/r1/guide/extend/architecture.md b/site/en/r1/guide/extend/architecture.md
index 1f2ac53066f..0753824e15e 100644
--- a/site/en/r1/guide/extend/architecture.md
+++ b/site/en/r1/guide/extend/architecture.md
@@ -34,7 +34,7 @@ This document focuses on the following layers:
 *  **Client**:
    *  Defines the computation as a dataflow graph.
    *  Initiates graph execution using a [**session**](
-      https://www.tensorflow.org/code/tensorflow/python/client/session.py).
+      https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/client/session.py).
 *  **Distributed Master**
    *  Prunes a specific subgraph from the graph, as defined by the arguments
       to Session.run().
@@ -144,8 +144,8 @@ The distributed master then ships the graph pieces to the distributed tasks.
 
 ### Code
 
-*  [MasterService API definition](https://www.tensorflow.org/code/tensorflow/core/protobuf/master_service.proto)
-*  [Master interface](https://www.tensorflow.org/code/tensorflow/core/distributed_runtime/master_interface.h)
+*  [MasterService API definition](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/protobuf/master_service.proto)
+*  [Master interface](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/distributed_runtime/master_interface.h)
 
 ## Worker Service
 
@@ -178,7 +178,7 @@ For transfers between tasks, TensorFlow uses multiple protocols, including:
 
 We also have preliminary support for NVIDIA's NCCL library for multi-GPU
 communication, see:
-[`tf.contrib.nccl`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/nccl_ops.py).
+[`tf.contrib.nccl`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/ops/nccl_ops.py).
 
 <img src="https://www.tensorflow.org/images/graph_send_recv.svg" alt="Partitioned Graph" width="700">
 
@@ -186,9 +186,9 @@ communication, see:
 
 ### Code
 
-*   [WorkerService API definition](https://www.tensorflow.org/code/tensorflow/core/protobuf/worker_service.proto)
-*   [Worker interface](https://www.tensorflow.org/code/tensorflow/core/distributed_runtime/worker_interface.h)
-*   [Remote rendezvous (for Send and Recv implementations)](https://www.tensorflow.org/code/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.h)
+*   [WorkerService API definition](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/protobuf/worker_service.proto)
+*   [Worker interface](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/distributed_runtime/worker_interface.h)
+*   [Remote rendezvous (for Send and Recv implementations)](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.h)
 
 ## Kernel Implementations
 
@@ -199,7 +199,7 @@ Many of the operation kernels are implemented using Eigen::Tensor, which uses
 C++ templates to generate efficient parallel code for multicore CPUs and GPUs;
 however, we liberally use libraries like cuDNN where a more efficient kernel
 implementation is possible. We have also implemented
-[quantization](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/g3doc/performance/post_training_quantization.md), which enables
+[quantization](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/lite/g3doc/performance/post_training_quantization.md), which enables
 faster inference in environments such as mobile devices and high-throughput
 datacenter applications, and use the
 [gemmlowp](https://github.com/google/gemmlowp) low-precision matrix library to
@@ -215,4 +215,4 @@ experimental implementation of automatic kernel fusion.
 
 ### Code
 
-*   [`OpKernel` interface](https://www.tensorflow.org/code/tensorflow/core/framework/op_kernel.h)
+*   [`OpKernel` interface](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/op_kernel.h)
diff --git a/site/en/r1/guide/extend/bindings.md b/site/en/r1/guide/extend/bindings.md
index 9c10e90840f..7daa2212106 100644
--- a/site/en/r1/guide/extend/bindings.md
+++ b/site/en/r1/guide/extend/bindings.md
@@ -112,11 +112,11 @@ There are a few ways to get a list of the `OpDef`s for the registered ops:
     to interpret the `OpDef` messages.
 -   The C++ function `OpRegistry::Global()->GetRegisteredOps()` returns the same
     list of all registered `OpDef`s (defined in
-    [`tensorflow/core/framework/op.h`](https://www.tensorflow.org/code/tensorflow/core/framework/op.h)). This can be used to write the generator
+    [`tensorflow/core/framework/op.h`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/op.h)). This can be used to write the generator
     in C++ (particularly useful for languages that do not have protocol buffer
     support).
 -   The ASCII-serialized version of that list is periodically checked in to
-    [`tensorflow/core/ops/ops.pbtxt`](https://www.tensorflow.org/code/tensorflow/core/ops/ops.pbtxt) by an automated process.
+    [`tensorflow/core/ops/ops.pbtxt`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/ops/ops.pbtxt) by an automated process.
 
 The `OpDef` specifies the following:
 
@@ -159,7 +159,7 @@ between the generated code and the `OpDef`s checked into the repository, but is
 useful for languages where code is expected to be generated ahead of time like
 `go get` for Go and `cargo ops` for Rust. At the other end of the spectrum, for
 some languages the code could be generated dynamically from
-[`tensorflow/core/ops/ops.pbtxt`](https://www.tensorflow.org/code/tensorflow/core/ops/ops.pbtxt).
+[`tensorflow/core/ops/ops.pbtxt`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/ops/ops.pbtxt).
 
 #### Handling Constants
 
@@ -228,4 +228,4 @@ At this time, support for gradients, functions and control flow operations ("if"
 and "while") is not available in languages other than Python. This will be
 updated when the [C API] provides necessary support.
 
-[C API]: https://www.tensorflow.org/code/tensorflow/c/c_api.h
+[C API]: https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/c/c_api.h
diff --git a/site/en/r1/guide/extend/filesystem.md b/site/en/r1/guide/extend/filesystem.md
index 4d34c07102e..2d6ea0c4645 100644
--- a/site/en/r1/guide/extend/filesystem.md
+++ b/site/en/r1/guide/extend/filesystem.md
@@ -54,7 +54,7 @@ To implement a custom filesystem plugin, you must do the following:
 ### The FileSystem interface
 
 The `FileSystem` interface is an abstract C++ interface defined in
-[file_system.h](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/file_system.h).
+[file_system.h](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/platform/file_system.h).
 An implementation of the `FileSystem` interface should implement all relevant
 the methods defined by the interface. Implementing the interface requires
 defining operations such as creating `RandomAccessFile`, `WritableFile`, and
@@ -70,26 +70,26 @@ involves calling `stat()` on the file and then returns the filesize as reported
 by the return of the stat object. Similarly, for the `HDFSFileSystem`
 implementation, these calls simply delegate to the `libHDFS` implementation of
 similar functionality, such as `hdfsDelete` for
-[DeleteFile](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/hadoop/hadoop_file_system.cc#L386).
+[DeleteFile](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/platform/hadoop/hadoop_file_system.cc#L386).
 
 We suggest looking through these code examples to get an idea of how different
 filesystem implementations call their existing libraries. Examples include:
 
 *   [POSIX
-    plugin](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/posix/posix_file_system.h)
+    plugin](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/platform/posix/posix_file_system.h)
 *   [HDFS
-    plugin](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/hadoop/hadoop_file_system.h)
+    plugin](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/platform/hadoop/hadoop_file_system.h)
 *   [GCS
-    plugin](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/cloud/gcs_file_system.h)
+    plugin](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/platform/cloud/gcs_file_system.h)
 *   [S3
-    plugin](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/s3/s3_file_system.h)
+    plugin](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/platform/s3/s3_file_system.h)
 
 #### The File interfaces
 
 Beyond operations that allow you to query and manipulate files and directories
 in a filesystem, the `FileSystem` interface requires you to implement factories
 that return implementations of abstract objects such as the
-[RandomAccessFile](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/file_system.h#L223),
+[RandomAccessFile](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/platform/file_system.h#L223),
 the `WritableFile`, so that TensorFlow code and read and write to files in that
 `FileSystem` implementation.
 
@@ -224,7 +224,7 @@ it will use the `FooBarFileSystem` implementation.
 
 Next, you must build a shared object containing this implementation. An example
 of doing so using bazel's `cc_binary` rule can be found
-[here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/BUILD#L244),
+[here](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/BUILD#L244),
 but you may use any build system to do so. See the section on [building the op library](../extend/op.md#build_the_op_library) for similar
 instructions.
 
@@ -236,7 +236,7 @@ passing the path to the shared object. Calling this in your client program loads
 the shared object in the process, thus registering your implementation as
 available for any file operations going through the `FileSystem` interface. You
 can see
-[test_file_system.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/framework/file_system_test.py)
+[test_file_system.py](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/framework/file_system_test.py)
 for an example.
 
 ## What goes through this interface?
diff --git a/site/en/r1/guide/extend/formats.md b/site/en/r1/guide/extend/formats.md
index 3b7b4aafbd6..bdebee5487d 100644
--- a/site/en/r1/guide/extend/formats.md
+++ b/site/en/r1/guide/extend/formats.md
@@ -28,11 +28,11 @@ individual records in a file. There are several examples of "reader" datasets
 that are already built into TensorFlow:
 
 *   `tf.data.TFRecordDataset`
-    ([source in `kernels/data/reader_dataset_ops.cc`](https://www.tensorflow.org/code/tensorflow/core/kernels/data/reader_dataset_ops.cc))
+    ([source in `kernels/data/reader_dataset_ops.cc`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/kernels/data/reader_dataset_ops.cc))
 *   `tf.data.FixedLengthRecordDataset`
-    ([source in `kernels/data/reader_dataset_ops.cc`](https://www.tensorflow.org/code/tensorflow/core/kernels/data/reader_dataset_ops.cc))
+    ([source in `kernels/data/reader_dataset_ops.cc`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/kernels/data/reader_dataset_ops.cc))
 *   `tf.data.TextLineDataset`
-    ([source in `kernels/data/reader_dataset_ops.cc`](https://www.tensorflow.org/code/tensorflow/core/kernels/data/reader_dataset_ops.cc))
+    ([source in `kernels/data/reader_dataset_ops.cc`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/kernels/data/reader_dataset_ops.cc))
 
 Each of these implementations comprises three related classes:
 
@@ -279,7 +279,7 @@ if __name__ == "__main__":
 ```
 
 You can see some examples of `Dataset` wrapper classes in
-[`tensorflow/python/data/ops/dataset_ops.py`](https://www.tensorflow.org/code/tensorflow/python/data/ops/dataset_ops.py).
+[`tensorflow/python/data/ops/dataset_ops.py`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/data/ops/dataset_ops.py).
 
 ## Writing an Op for a record format
 
@@ -297,7 +297,7 @@ Examples of Ops useful for decoding records:
 
 Note that it can be useful to use multiple Ops to decode a particular record
 format.  For example, you may have an image saved as a string in
-[a `tf.train.Example` protocol buffer](https://www.tensorflow.org/code/tensorflow/core/example/example.proto).
+[a `tf.train.Example` protocol buffer](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/example/example.proto).
 Depending on the format of that image, you might take the corresponding output
 from a `tf.parse_single_example` op and call `tf.image.decode_jpeg`,
 `tf.image.decode_png`, or `tf.decode_raw`.  It is common to take the output
diff --git a/site/en/r1/guide/extend/model_files.md b/site/en/r1/guide/extend/model_files.md
index 30e73a5169e..e590fcf1f27 100644
--- a/site/en/r1/guide/extend/model_files.md
+++ b/site/en/r1/guide/extend/model_files.md
@@ -28,7 +28,7 @@ by calling `as_graph_def()`, which returns a `GraphDef` object.
 
 The GraphDef class is an object created by the ProtoBuf library from the
 definition in
-[tensorflow/core/framework/graph.proto](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/graph.proto). The protobuf tools parse
+[tensorflow/core/framework/graph.proto](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/graph.proto). The protobuf tools parse
 this text file, and generate the code to load, store, and manipulate graph
 definitions. If you see a standalone TensorFlow file representing a model, it's
 likely to contain a serialized version of one of these `GraphDef` objects
@@ -87,7 +87,7 @@ for node in graph_def.node
 ```
 
 Each node is a `NodeDef` object, defined in
-[tensorflow/core/framework/node_def.proto](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/node_def.proto). These
+[tensorflow/core/framework/node_def.proto](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/node_def.proto). These
 are the fundamental building blocks of TensorFlow graphs, with each one defining
 a single operation along with its input connections. Here are the members of a
 `NodeDef`, and what they mean.
@@ -107,7 +107,7 @@ This defines what operation to run, for example `"Add"`, `"MatMul"`, or
 `"Conv2D"`. When a graph is run, this op name is looked up in a registry to
 find an implementation. The registry is populated by calls to the
 `REGISTER_OP()` macro, like those in
-[tensorflow/core/ops/nn_ops.cc](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/ops/nn_ops.cc).
+[tensorflow/core/ops/nn_ops.cc](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/ops/nn_ops.cc).
 
 ### `input`
 
@@ -133,7 +133,7 @@ size of filters for convolutions, or the values of constant ops. Because there
 can be so many different types of attribute values, from strings, to ints, to
 arrays of tensor values, there's a separate protobuf file defining the data
 structure that holds them, in
-[tensorflow/core/framework/attr_value.proto](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/attr_value.proto).
+[tensorflow/core/framework/attr_value.proto](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/attr_value.proto).
 
 Each attribute has a unique name string, and the expected attributes are listed
 when the operation is defined. If an attribute isn't present in a node, but it
@@ -151,7 +151,7 @@ the file format during training. Instead, they're held in separate checkpoint
 files, and there are `Variable` ops in the graph that load the latest values
 when they're initialized. It's often not very convenient to have separate files
 when you're deploying to production, so there's the
-[freeze_graph.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py) script that takes a graph definition and a set
+[freeze_graph.py](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/tools/freeze_graph.py) script that takes a graph definition and a set
 of checkpoints and freezes them together into a single file.
 
 What this does is load the `GraphDef`, pull in the values for all the variables
@@ -167,7 +167,7 @@ the most common problems is extracting and interpreting the weight values. A
 common way to store them, for example in graphs created by the freeze_graph
 script, is as `Const` ops containing the weights as `Tensors`. These are
 defined in
-[tensorflow/core/framework/tensor.proto](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor.proto), and contain information
+[tensorflow/core/framework/tensor.proto](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/tensor.proto), and contain information
 about the size and type of the data, as well as the values themselves. In
 Python, you get a `TensorProto` object from a `NodeDef` representing a `Const`
 op by calling something like `some_node_def.attr['value'].tensor`.
diff --git a/site/en/r1/guide/extend/op.md b/site/en/r1/guide/extend/op.md
index d006a6251d0..186d9c28c04 100644
--- a/site/en/r1/guide/extend/op.md
+++ b/site/en/r1/guide/extend/op.md
@@ -47,7 +47,7 @@ To incorporate your custom op you'll need to:
     test the op in C++. If you define gradients, you can verify them with the
     Python `tf.test.compute_gradient_error`.
     See
-    [`relu_op_test.py`](https://www.tensorflow.org/code/tensorflow/python/kernel_tests/relu_op_test.py) as
+    [`relu_op_test.py`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/kernel_tests/relu_op_test.py) as
     an example that tests the forward functions of Relu-like operators and
     their gradients.
 
@@ -155,17 +155,17 @@ REGISTER_KERNEL_BUILDER(Name("ZeroOut").Device(DEVICE_CPU), ZeroOutOp);
 >   Important: Instances of your OpKernel may be accessed concurrently.
 >   Your `Compute` method must be thread-safe. Guard any access to class
 >   members with a mutex. Or better yet, don't share state via class members!
->   Consider using a [`ResourceMgr`](https://www.tensorflow.org/code/tensorflow/core/framework/resource_mgr.h)
+>   Consider using a [`ResourceMgr`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/resource_mgr.h)
 >   to keep track of op state.
 
 ### Multi-threaded CPU kernels
 
 To write a multi-threaded CPU kernel, the Shard function in
-[`work_sharder.h`](https://www.tensorflow.org/code/tensorflow/core/util/work_sharder.h)
+[`work_sharder.h`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/util/work_sharder.h)
 can be used. This function shards a computation function across the
 threads configured to be used for intra-op threading (see
 intra_op_parallelism_threads in
-[`config.proto`](https://www.tensorflow.org/code/tensorflow/core/protobuf/config.proto)).
+[`config.proto`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/protobuf/config.proto)).
 
 ### GPU kernels
 
@@ -348,12 +348,13 @@ g++ -std=c++11 -shared zero_out.cc -o zero_out.so -fPIC ${TF_CFLAGS[@]} ${TF_LFL
 On macOS, the additional flag "-undefined dynamic_lookup" is required when
 building the `.so` file.
 
->   Note on `gcc` version `>=5`: gcc uses the new C++
->   [ABI](https://gcc.gnu.org/gcc-5/changes.html#libstdcxx) since version `5`. The binary pip
->   packages available on the TensorFlow website are built with `gcc4` that uses
->   the older ABI. If you compile your op library with `gcc>=5`, add
->   `-D_GLIBCXX_USE_CXX11_ABI=0` to the command line to make the library
->   compatible with the older abi.
+> Note on `gcc` version `>=5`: gcc uses the new C++
+> [ABI](https://gcc.gnu.org/gcc-5/changes.html#libstdcxx) since version `5`.
+> TensorFlow 2.8 and earlier were built with `gcc4` that uses the older ABI. If
+> you are using these versions of TensorFlow and are trying to compile your op
+> library with `gcc>=5`, add `-D_GLIBCXX_USE_CXX11_ABI=0` to the command line to
+> make the library compatible with the older ABI. TensorFlow 2.9+ packages are
+> compatible with the newer ABI by default.
 
 ### Compile the op using bazel (TensorFlow source installation)
 
@@ -485,13 +486,13 @@ This asserts that the input is a vector, and returns having set the
 
 *   The `context`, which can either be an `OpKernelContext` or
     `OpKernelConstruction` pointer (see
-    [`tensorflow/core/framework/op_kernel.h`](https://www.tensorflow.org/code/tensorflow/core/framework/op_kernel.h)),
+    [`tensorflow/core/framework/op_kernel.h`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/op_kernel.h)),
     for its `SetStatus()` method.
 *   The condition.  For example, there are functions for validating the shape
     of a tensor in
-    [`tensorflow/core/framework/tensor_shape.h`](https://www.tensorflow.org/code/tensorflow/core/framework/tensor_shape.h)
+    [`tensorflow/core/framework/tensor_shape.h`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/tensor_shape.h)
 *   The error itself, which is represented by a `Status` object, see
-    [`tensorflow/core/lib/core/status.h`](https://www.tensorflow.org/code/tensorflow/core/lib/core/status.h). A
+    [`tensorflow/core/lib/core/status.h`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/lib/core/status.h). A
     `Status` has both a type (frequently `InvalidArgument`, but see the list of
     types) and a message.  Functions for constructing an error may be found in
     [`tensorflow/core/lib/core/errors.h`][validation-macros].
@@ -632,7 +633,7 @@ define an attr with constraints, you can use the following `<attr-type-expr>`s:
 
     The specific lists of types allowed by these are defined by the functions
     (like `NumberTypes()`) in
-    [`tensorflow/core/framework/types.h`](https://www.tensorflow.org/code/tensorflow/core/framework/types.h).
+    [`tensorflow/core/framework/types.h`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/types.h).
     In this example the attr `t` must be one of the numeric types:
 
     ```c++
@@ -1179,7 +1180,7 @@ There are several ways to preserve backwards-compatibility.
    type into a list of varying types).
 
 The full list of safe and unsafe changes can be found in
-[`tensorflow/core/framework/op_compatibility_test.cc`](https://www.tensorflow.org/code/tensorflow/core/framework/op_compatibility_test.cc).
+[`tensorflow/core/framework/op_compatibility_test.cc`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/op_compatibility_test.cc).
 If you cannot make your change to an operation backwards compatible, then create
 a new operation with a new name with the new semantics.
 
@@ -1189,23 +1190,23 @@ callers.  The Python API may be kept compatible by careful changes in a
 hand-written Python wrapper, by keeping the old signature except possibly adding
 new optional arguments to the end.  Generally incompatible changes may only be
 made when TensorFlow changes major versions, and must conform to the
-[`GraphDef` version semantics](../guide/version_compat.md#compatibility_of_graphs_and_checkpoints).
+[`GraphDef` version semantics](../version_compat.md).
 
 ### GPU Support
 
 You can implement different OpKernels and register one for CPU and another for
 GPU, just like you can [register kernels for different types](#polymorphism).
 There are several examples of kernels with GPU support in
-[`tensorflow/core/kernels/`](https://www.tensorflow.org/code/tensorflow/core/kernels/).
+[`tensorflow/core/kernels/`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/kernels/).
 Notice some kernels have a CPU version in a `.cc` file, a GPU version in a file
 ending in `_gpu.cu.cc`, and some code shared in common in a `.h` file.
 
 For example, the `tf.pad` has
 everything but the GPU kernel in [`tensorflow/core/kernels/pad_op.cc`][pad_op].
 The GPU kernel is in
-[`tensorflow/core/kernels/pad_op_gpu.cu.cc`](https://www.tensorflow.org/code/tensorflow/core/kernels/pad_op_gpu.cu.cc),
+[`tensorflow/core/kernels/pad_op_gpu.cu.cc`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/kernels/pad_op_gpu.cu.cc),
 and the shared code is a templated class defined in
-[`tensorflow/core/kernels/pad_op.h`](https://www.tensorflow.org/code/tensorflow/core/kernels/pad_op.h).
+[`tensorflow/core/kernels/pad_op.h`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/kernels/pad_op.h).
 We organize the code this way for two reasons: it allows you to share common
 code among the CPU and GPU implementations, and it puts the GPU implementation
 into a separate file so that it can be compiled only by the GPU compiler.
@@ -1226,16 +1227,16 @@ kept on the CPU, add a `HostMemory()` call to the kernel registration, e.g.:
 #### Compiling the kernel for the GPU device
 
 Look at
-[cuda_op_kernel.cu.cc](https://www.tensorflow.org/code/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc)
+[cuda_op_kernel.cu.cc](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc)
 for an example that uses a CUDA kernel to implement an op. The
 `tf_custom_op_library` accepts a `gpu_srcs` argument in which the list of source
 files containing the CUDA kernels (`*.cu.cc` files) can be specified. For use
 with a binary installation of TensorFlow, the CUDA kernels have to be compiled
 with NVIDIA's `nvcc` compiler. Here is the sequence of commands you can use to
 compile the
-[cuda_op_kernel.cu.cc](https://www.tensorflow.org/code/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc)
+[cuda_op_kernel.cu.cc](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc)
 and
-[cuda_op_kernel.cc](https://www.tensorflow.org/code/tensorflow/examples/adding_an_op/cuda_op_kernel.cc)
+[cuda_op_kernel.cc](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/examples/adding_an_op/cuda_op_kernel.cc)
 into a single dynamically loadable library:
 
 ```bash
@@ -1360,7 +1361,7 @@ be set to the first input's shape. If the output is selected by its index as in
 
 There are a number of common shape functions
 that apply to many ops, such as `shape_inference::UnchangedShape` which can be
-found in [common_shape_fns.h](https://www.tensorflow.org/code/tensorflow/core/framework/common_shape_fns.h) and used as follows:
+found in [common_shape_fns.h](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/common_shape_fns.h) and used as follows:
 
 ```c++
 REGISTER_OP("ZeroOut")
@@ -1407,7 +1408,7 @@ provides access to the attributes of the op).
 
 Since shape inference is an optional feature, and the shapes of tensors may vary
 dynamically, shape functions must be robust to incomplete shape information for
-any of the inputs. The `Merge` method in [`InferenceContext`](https://www.tensorflow.org/code/tensorflow/core/framework/shape_inference.h)
+any of the inputs. The `Merge` method in [`InferenceContext`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/shape_inference.h)
 allows the caller to assert that two shapes are the same, even if either
 or both of them do not have complete information. Shape functions are defined
 for all of the core TensorFlow ops and provide many different usage examples.
@@ -1432,7 +1433,7 @@ If you have a complicated shape function, you should consider adding a test for
 validating that various input shape combinations produce the expected output
 shape combinations.  You can see examples of how to write these tests in some
 our
-[core ops tests](https://www.tensorflow.org/code/tensorflow/core/ops/array_ops_test.cc).
+[core ops tests](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/ops/array_ops_test.cc).
 (The syntax of `INFER_OK` and `INFER_ERROR` are a little cryptic, but try to be
 compact in representing input and output shape specifications in tests.  For
 now, see the surrounding comments in those tests to get a sense of the shape
@@ -1445,20 +1446,20 @@ To build a `pip` package for your op, see the
 guide shows how to build custom ops from the TensorFlow pip package instead
 of building TensorFlow from source.
 
-[core-array_ops]:https://www.tensorflow.org/code/tensorflow/core/ops/array_ops.cc
-[python-user_ops]:https://www.tensorflow.org/code/tensorflow/python/user_ops/user_ops.py
-[tf-kernels]:https://www.tensorflow.org/code/tensorflow/core/kernels/
-[user_ops]:https://www.tensorflow.org/code/tensorflow/core/user_ops/
-[pad_op]:https://www.tensorflow.org/code/tensorflow/core/kernels/pad_op.cc
-[standard_ops-py]:https://www.tensorflow.org/code/tensorflow/python/ops/standard_ops.py
-[standard_ops-cc]:https://www.tensorflow.org/code/tensorflow/cc/ops/standard_ops.h
-[python-BUILD]:https://www.tensorflow.org/code/tensorflow/python/BUILD
-[validation-macros]:https://www.tensorflow.org/code/tensorflow/core/lib/core/errors.h
-[op_def_builder]:https://www.tensorflow.org/code/tensorflow/core/framework/op_def_builder.h
-[register_types]:https://www.tensorflow.org/code/tensorflow/core/framework/register_types.h
-[FinalizeAttr]:https://www.tensorflow.org/code/tensorflow/core/framework/op_def_builder.cc
-[DataTypeString]:https://www.tensorflow.org/code/tensorflow/core/framework/types.cc
-[python-BUILD]:https://www.tensorflow.org/code/tensorflow/python/BUILD
-[types-proto]:https://www.tensorflow.org/code/tensorflow/core/framework/types.proto
-[TensorShapeProto]:https://www.tensorflow.org/code/tensorflow/core/framework/tensor_shape.proto
-[TensorProto]:https://www.tensorflow.org/code/tensorflow/core/framework/tensor.proto
+[core-array_ops]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/ops/array_ops.cc
+[python-user_ops]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/user_ops/user_ops.py
+[tf-kernels]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/kernels/
+[user_ops]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/user_ops/
+[pad_op]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/kernels/pad_op.cc
+[standard_ops-py]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/ops/standard_ops.py
+[standard_ops-cc]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/cc/ops/standard_ops.h
+[python-BUILD]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/BUILD
+[validation-macros]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/lib/core/errors.h
+[op_def_builder]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/op_def_builder.h
+[register_types]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/register_types.h
+[FinalizeAttr]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/op_def_builder.cc
+[DataTypeString]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/types.cc
+[python-BUILD]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/BUILD
+[types-proto]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/types.proto
+[TensorShapeProto]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/tensor_shape.proto
+[TensorProto]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/tensor.proto
diff --git a/site/en/r1/guide/feature_columns.md b/site/en/r1/guide/feature_columns.md
index 5a4dfbbf46d..e4259f85e9f 100644
--- a/site/en/r1/guide/feature_columns.md
+++ b/site/en/r1/guide/feature_columns.md
@@ -562,7 +562,7 @@ For more examples on feature columns, view the following:
 
 * The [Low Level Introduction](../guide/low_level_intro.md#feature_columns) demonstrates how
   experiment directly with `feature_columns` using TensorFlow's low level APIs.
-* The [Estimator wide and deep learning tutorial](https://github.com/tensorflow/models/tree/master/official/r1/wide_deep)
+* The [Estimator wide and deep learning tutorial](https://github.com/tensorflow/models/tree/r1.15/official/r1/wide_deep)
   solves a binary classification problem using `feature_columns` on a variety of
   input data types.
 
diff --git a/site/en/r1/guide/graph_viz.md b/site/en/r1/guide/graph_viz.md
index 1965378e03e..1e3780e7928 100644
--- a/site/en/r1/guide/graph_viz.md
+++ b/site/en/r1/guide/graph_viz.md
@@ -251,7 +251,7 @@ is a snippet from the train and test section of a modification of the
 [Estimators MNIST tutorial](../tutorials/estimators/cnn.md), in which we have
 recorded summaries and
 runtime statistics. See the
-[Tensorboard](https://tensorflow.org/tensorboard)
+[TensorBoard](https://tensorflow.org/tensorboard)
 for details on how to record summaries.
 Full source is [here](https://github.com/tensorflow/tensorflow/tree/r1.15/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py).
 
diff --git a/site/en/r1/guide/keras.ipynb b/site/en/r1/guide/keras.ipynb
index 08a778b60a5..3a0cd8e55c5 100644
--- a/site/en/r1/guide/keras.ipynb
+++ b/site/en/r1/guide/keras.ipynb
@@ -64,7 +64,7 @@
       "source": [
         "> Note: This is an archived TF1 notebook. These are configured\n",
         "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
+        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
         "but will run in TF1 as well. To use TF1 in Colab, use the\n",
         "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
         "magic."
@@ -1211,8 +1211,7 @@
     "colab": {
       "collapsed_sections": [],
       "name": "keras.ipynb",
-      "provenance": [],
-      "toc_visible": true
+            "toc_visible": true
     },
     "kernelspec": {
       "display_name": "Python 3",
diff --git a/site/en/r1/guide/performance/benchmarks.md b/site/en/r1/guide/performance/benchmarks.md
index 8998c0723db..a56959ea416 100644
--- a/site/en/r1/guide/performance/benchmarks.md
+++ b/site/en/r1/guide/performance/benchmarks.md
@@ -401,7 +401,7 @@ GPUs | InceptionV3 (batch size 32) | ResNet-50 (batch size 32)
 ## Methodology
 
 This
-[script](https://github.com/tensorflow/benchmarks/tree/master/scripts/tf_cnn_benchmarks)
+[script](https://github.com/tensorflow/benchmarks/tree/r1.15/scripts/tf_cnn_benchmarks)
 was run on the various platforms to generate the above results.
 
 In order to create results that are as repeatable as possible, each test was run
diff --git a/site/en/r1/guide/performance/overview.md b/site/en/r1/guide/performance/overview.md
index af74f0f28c6..be7217f4b99 100644
--- a/site/en/r1/guide/performance/overview.md
+++ b/site/en/r1/guide/performance/overview.md
@@ -19,9 +19,9 @@ Reading large numbers of small files significantly impacts I/O performance.
 One approach to get maximum I/O throughput is to preprocess input data into
 larger (~100MB) `TFRecord` files. For smaller data sets (200MB-1GB), the best
 approach is often to load the entire data set into memory. The document
-[Downloading and converting to TFRecord format](https://github.com/tensorflow/models/tree/master/research/slim#downloading-and-converting-to-tfrecord-format)
+[Downloading and converting to TFRecord format](https://github.com/tensorflow/models/tree/r1.15/research/slim#downloading-and-converting-to-tfrecord-format)
 includes information and scripts for creating `TFRecord`s, and this
-[script](https://github.com/tensorflow/models/tree/master/research/tutorials/image/cifar10_estimator/generate_cifar10_tfrecords.py)
+[script](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/image/cifar10_estimator/generate_cifar10_tfrecords.py)
 converts the CIFAR-10 dataset into `TFRecord`s.
 
 While feeding data using a `feed_dict` offers a high level of flexibility, in
@@ -122,7 +122,7 @@ tf.Session(config=config)
 Intel® has added optimizations to TensorFlow for Intel® Xeon® and Intel® Xeon
 Phi™ through the use of the Intel® Math Kernel Library for Deep Neural Networks
 (Intel® MKL-DNN) optimized primitives. The optimizations also provide speedups
-for the consumer line of processors, e.g. i5 and i7 Intel processors. The Intel
+for the consumer line of processors, e.g., i5 and i7 Intel processors. The Intel
 published paper
 [TensorFlow* Optimizations on Modern Intel® Architecture](https://software.intel.com/en-us/articles/tensorflow-optimizations-on-modern-intel-architecture)
 contains additional details on the implementation.
@@ -255,7 +255,7 @@ bazel build -c opt --copt=-march="broadwell" --config=cuda //tensorflow/tools/pi
   a docker container, the data is not cached and the penalty is paid each time
   TensorFlow starts. The best practice is to include the
   [compute capabilities](http://developer.nvidia.com/cuda-gpus)
-  of the GPUs that will be used, e.g. P100: 6.0, Titan X (Pascal): 6.1,
+  of the GPUs that will be used, e.g., P100: 6.0, Titan X (Pascal): 6.1,
   Titan X (Maxwell): 5.2, and K80: 3.7.
 * Use a version of `gcc` that supports all of the optimizations of the target
   CPU. The recommended minimum gcc version is 4.8.3. On macOS, upgrade to the
diff --git a/site/en/r1/guide/ragged_tensors.ipynb b/site/en/r1/guide/ragged_tensors.ipynb
index 61bce66ecfb..289d29ce82e 100644
--- a/site/en/r1/guide/ragged_tensors.ipynb
+++ b/site/en/r1/guide/ragged_tensors.ipynb
@@ -57,7 +57,7 @@
       "source": [
         "> Note: This is an archived TF1 notebook. These are configured\n",
         "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
+        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
         "but will run in TF1 as well. To use TF1 in Colab, use the\n",
         "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
         "magic."
@@ -1010,7 +1010,7 @@
         "    `tf.RaggedTensor.values`\n",
         "    and\n",
         "    `tf.RaggedTensor.row_splits`\n",
-        "    properties, or row-paritioning methods such as `tf.RaggedTensor.row_lengths()`\n",
+        "    properties, or row-partitioning methods such as `tf.RaggedTensor.row_lengths()`\n",
         "    and `tf.RaggedTensor.value_rowids()`."
       ]
     },
diff --git a/site/en/r1/guide/saved_model.md b/site/en/r1/guide/saved_model.md
index 623863a9df9..34447ffe861 100644
--- a/site/en/r1/guide/saved_model.md
+++ b/site/en/r1/guide/saved_model.md
@@ -23,7 +23,7 @@ TensorFlow saves variables in binary *checkpoint files* that map variable
 names to tensor values.
 
 Caution: TensorFlow model files are code. Be careful with untrusted code.
-See [Using TensorFlow Securely](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md)
+See [Using TensorFlow Securely](https://github.com/tensorflow/tensorflow/blob/r1.15/SECURITY.md)
 for details.
 
 ### Save variables
@@ -148,7 +148,7 @@ Notes:
    `tf.variables_initializer` for more information.
 
 *  To inspect the variables in a checkpoint, you can use the
-   [`inspect_checkpoint`](https://www.tensorflow.org/code/tensorflow/python/tools/inspect_checkpoint.py)
+   [`inspect_checkpoint`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/tools/inspect_checkpoint.py)
    library, particularly the `print_tensors_in_checkpoint_file` function.
 
 *  By default, `Saver` uses the value of the `tf.Variable.name` property
@@ -159,7 +159,7 @@ Notes:
 ### Inspect variables in a checkpoint
 
 We can quickly inspect variables in a checkpoint with the
-[`inspect_checkpoint`](https://www.tensorflow.org/code/tensorflow/python/tools/inspect_checkpoint.py) library.
+[`inspect_checkpoint`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/tools/inspect_checkpoint.py) library.
 
 Continuing from the save/restore examples shown earlier:
 
@@ -216,7 +216,7 @@ simple_save(session,
 
 This configures the `SavedModel` so it can be loaded by
 [TensorFlow serving](https://www.tensorflow.org/tfx/tutorials/serving/rest_simple) and supports the
-[Predict API](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis/predict.proto).
+[Predict API](https://github.com/tensorflow/serving/blob/r1.15/tensorflow_serving/apis/predict.proto).
 To access the classify, regress, or multi-inference APIs, use the manual
 `SavedModel` builder APIs or an `tf.estimator.Estimator`.
 
@@ -328,7 +328,7 @@ with tf.Session(graph=tf.Graph()) as sess:
 ### Load a SavedModel in C++
 
 The C++ version of the SavedModel
-[loader](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/cc/saved_model/loader.h)
+[loader](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/cc/saved_model/loader.h)
 provides an API to load a SavedModel from a path, while allowing
 `SessionOptions` and `RunOptions`.
 You have to specify the tags associated with the graph to be loaded.
@@ -383,20 +383,20 @@ reuse and share across tools consistently.
 You may use sets of tags to uniquely identify a `MetaGraphDef` saved in a
 SavedModel. A subset of commonly used tags is specified in:
 
-* [Python](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/tag_constants.py)
-* [C++](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/cc/saved_model/tag_constants.h)
+* [Python](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/saved_model/tag_constants.py)
+* [C++](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/cc/saved_model/tag_constants.h)
 
 
 #### Standard SignatureDef constants
 
-A [**SignatureDef**](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/meta_graph.proto)
+A [**SignatureDef**](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/protobuf/meta_graph.proto)
 is a protocol buffer that defines the signature of a computation
 supported by a graph.
 Commonly used input keys, output keys, and method names are
 defined in:
 
-* [Python](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/signature_constants.py)
-* [C++](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/cc/saved_model/signature_constants.h)
+* [Python](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/saved_model/signature_constants.py)
+* [C++](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/cc/saved_model/signature_constants.h)
 
 ## Using SavedModel with Estimators
 
@@ -408,7 +408,7 @@ To prepare a trained Estimator for serving, you must export it in the standard
 SavedModel format. This section explains how to:
 
 * Specify the output nodes and the corresponding
-  [APIs](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis/prediction_service.proto)
+  [APIs](https://github.com/tensorflow/serving/blob/r1.15/tensorflow_serving/apis/prediction_service.proto)
   that can be served (Classify, Regress, or Predict).
 * Export your model to the SavedModel format.
 * Serve the model from a local server and request predictions.
@@ -506,7 +506,7 @@ Each `output` value must be an `ExportOutput` object  such as
 `tf.estimator.export.PredictOutput`.
 
 These output types map straightforwardly to the
-[TensorFlow Serving APIs](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis/prediction_service.proto),
+[TensorFlow Serving APIs](https://github.com/tensorflow/serving/blob/r1.15/tensorflow_serving/apis/prediction_service.proto),
 and so determine which request types will be honored.
 
 Note: In the multi-headed case, a `SignatureDef` will be generated for each
@@ -515,7 +515,7 @@ the same keys.  These `SignatureDef`s differ only in their outputs, as
 provided by the corresponding `ExportOutput` entry.  The inputs are always
 those provided by the `serving_input_receiver_fn`.
 An inference request may specify the head by name.  One head must be named
-using [`signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY`](https://www.tensorflow.org/code/tensorflow/python/saved_model/signature_constants.py)
+using [`signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/saved_model/signature_constants.py)
 indicating which `SignatureDef` will be served when an inference request
 does not specify one.
 
@@ -566,9 +566,9 @@ Now you have a server listening for inference requests via gRPC on port 9000!
 ### Request predictions from a local server
 
 The server responds to gRPC requests according to the
-[PredictionService](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis/prediction_service.proto#L15)
+[PredictionService](https://github.com/tensorflow/serving/blob/r1.15/tensorflow_serving/apis/prediction_service.proto#L15)
 gRPC API service definition.  (The nested protocol buffers are defined in
-various [neighboring files](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis)).
+various [neighboring files](https://github.com/tensorflow/serving/blob/r1.15/tensorflow_serving/apis)).
 
 From the API service definition, the gRPC framework generates client libraries
 in various languages providing remote access to the API.  In a project using the
@@ -620,7 +620,7 @@ The returned result in this example is a `ClassificationResponse` protocol
 buffer.
 
 This is a skeletal example; please see the [Tensorflow Serving](../deploy/index.md)
-documentation and [examples](https://github.com/tensorflow/serving/tree/master/tensorflow_serving/example)
+documentation and [examples](https://github.com/tensorflow/serving/tree/r1.15/tensorflow_serving/example)
 for more details.
 
 > Note: `ClassificationRequest` and `RegressionRequest` contain a
diff --git a/site/en/r1/guide/using_tpu.md b/site/en/r1/guide/using_tpu.md
index 74169092189..e3e338adf49 100644
--- a/site/en/r1/guide/using_tpu.md
+++ b/site/en/r1/guide/using_tpu.md
@@ -7,8 +7,8 @@ changing the *hardware accelerator* in your notebook settings:
 TPU-enabled Colab notebooks are available to test:
 
   1. [A quick test, just to measure FLOPS](https://colab.research.google.com/notebooks/tpu.ipynb).
-  2. [A CNN image classifier with `tf.keras`](https://colab.research.google.com/github/tensorflow/tpu/blob/master/tools/colab/fashion_mnist.ipynb).
-  3. [An LSTM markov chain text generator with `tf.keras`](https://colab.research.google.com/github/tensorflow/tpu/blob/master/tools/colab/shakespeare_with_tpu_and_keras.ipynb)
+  2. [A CNN image classifier with `tf.keras`](https://colab.research.google.com/github/tensorflow/tpu/blob/r1.15/tools/colab/fashion_mnist.ipynb).
+  3. [An LSTM markov chain text generator with `tf.keras`](https://colab.research.google.com/github/tensorflow/tpu/blob/r1.15/tools/colab/shakespeare_with_tpu_and_keras.ipynb)
 
 ## TPUEstimator
 
@@ -25,7 +25,7 @@ Cloud TPU is to define the model's inference phase (from inputs to predictions)
 outside of the `model_fn`. Then maintain separate implementations of the
 `Estimator` setup and `model_fn`, both wrapping this inference step. For an
 example of this pattern compare the `mnist.py` and `mnist_tpu.py` implementation in
-[tensorflow/models](https://github.com/tensorflow/models/tree/master/official/r1/mnist).
+[tensorflow/models](https://github.com/tensorflow/models/tree/r1.15/official/r1/mnist).
 
 ### Run a TPUEstimator locally
 
@@ -350,10 +350,10 @@ in bytes. A minimum of a few MB (`buffer_size=8*1024*1024`) is recommended so
 that data is available when needed.
 
 The TPU-demos repo includes
-[a script](https://github.com/tensorflow/tpu/blob/master/tools/datasets/imagenet_to_gcs.py)
+[a script](https://github.com/tensorflow/tpu/blob/1.15/tools/datasets/imagenet_to_gcs.py)
 for downloading the imagenet dataset and converting it to an appropriate format.
 This together with the imagenet
-[models](https://github.com/tensorflow/tpu/tree/master/models)
+[models](https://github.com/tensorflow/tpu/tree/r1.15/models)
 included in the repo demonstrate all of these best-practices.
 
 ## Next steps
diff --git a/site/en/r1/guide/version_compat.md b/site/en/r1/guide/version_compat.md
index 6702f6e0819..a765620518d 100644
--- a/site/en/r1/guide/version_compat.md
+++ b/site/en/r1/guide/version_compat.md
@@ -49,19 +49,19 @@ patch versions.  The public APIs consist of
   submodules, but is not documented, then it is **not** considered part of the
   public API.
 
-* The [C API](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/c/c_api.h).
+* The [C API](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/c/c_api.h).
 
 * The following protocol buffer files:
-    * [`attr_value`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/attr_value.proto)
-    * [`config`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/config.proto)
-    * [`event`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/event.proto)
-    * [`graph`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/graph.proto)
-    * [`op_def`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/op_def.proto)
-    * [`reader_base`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/reader_base.proto)
-    * [`summary`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/summary.proto)
-    * [`tensor`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor.proto)
-    * [`tensor_shape`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor_shape.proto)
-    * [`types`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/types.proto)
+    * [`attr_value`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/attr_value.proto)
+    * [`config`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/protobuf/config.proto)
+    * [`event`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/util/event.proto)
+    * [`graph`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/graph.proto)
+    * [`op_def`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/op_def.proto)
+    * [`reader_base`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/reader_base.proto)
+    * [`summary`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/summary.proto)
+    * [`tensor`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/tensor.proto)
+    * [`tensor_shape`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/tensor_shape.proto)
+    * [`types`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/types.proto)
 
 <a name="not_covered"></a>
 ## What is *not* covered
@@ -79,7 +79,7 @@ backward incompatible ways between minor releases. These include:
     such as:
 
   - [C++](./extend/cc.md) (exposed through header files in
-    [`tensorflow/cc`](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/cc)).
+    [`tensorflow/cc`](https://github.com/tensorflow/tensorflow/tree/r1.15/tensorflow/cc)).
   - [Java](../api_docs/java/reference/org/tensorflow/package-summary),
   - [Go](https://pkg.go.dev/github.com/tensorflow/tensorflow/tensorflow/go)
   - [JavaScript](https://js.tensorflow.org)
@@ -209,7 +209,7 @@ guidelines for evolving `GraphDef` versions.
 There are different data versions for graphs and checkpoints. The two data
 formats evolve at different rates from each other and also at different rates
 from TensorFlow. Both versioning systems are defined in
-[`core/public/version.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/version.h).
+[`core/public/version.h`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/public/version.h).
 Whenever a new version is added, a note is added to the header detailing what
 changed and the date.
 
@@ -224,7 +224,7 @@ We distinguish between the following kinds of data version information:
   (`min_producer`).
 
 Each piece of versioned data has a [`VersionDef
-versions`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/versions.proto)
+versions`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/versions.proto)
 field which records the `producer` that made the data, the `min_consumer`
 that it is compatible with, and a list of `bad_consumers` versions that are
 disallowed.
@@ -239,7 +239,7 @@ accept a piece of data if the following are all true:
 *   `consumer` not in data's `bad_consumers`
 
 Since both producers and consumers come from the same TensorFlow code base,
-[`core/public/version.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/version.h)
+[`core/public/version.h`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/public/version.h)
 contains a main data version which is treated as either `producer` or
 `consumer` depending on context and both `min_consumer` and `min_producer`
 (needed by producers and consumers, respectively). Specifically,
@@ -309,7 +309,7 @@ existing producer scripts will not suddenly use the new functionality.
 1.  Add a new similar op named `SomethingV2` or similar and go through the
     process of adding it and switching existing Python wrappers to use it.
     To ensure forward compatibility use the checks suggested in
-    [compat.py](https://www.tensorflow.org/code/tensorflow/python/compat/compat.py)
+    [compat.py](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/compat/compat.py)
     when changing the Python wrappers.
 2.  Remove the old op (Can only take place with a major version change due to
     backward compatibility).
diff --git a/site/en/r1/tutorials/README.md b/site/en/r1/tutorials/README.md
index b6d932041bd..9ff164ad77c 100644
--- a/site/en/r1/tutorials/README.md
+++ b/site/en/r1/tutorials/README.md
@@ -10,7 +10,7 @@ desktop, mobile, web, and cloud. See the sections below to get started.
 
 The high-level Keras API provides building blocks to create and
 train deep learning models. Start with these beginner-friendly
-notebook examples, then read the [TensorFlow Keras guide](../guide/keras.ipynb).
+notebook examples, then read the [TensorFlow Keras guide](https://www.tensorflow.org/guide/keras).
 
 * [Basic classification](./keras/basic_classification.ipynb)
 * [Text classification](./keras/basic_text_classification.ipynb)
@@ -68,4 +68,4 @@ implement common ML algorithms. See the
 * [Boosted trees](./estimators/boosted_trees.ipynb)
 * [Gradient Boosted Trees: Model understanding](./estimators/boosted_trees_model_understanding.ipynb)
 * [Build a Convolutional Neural Network using Estimators](./estimators/cnn.ipynb)
-* [Wide and deep learning with Estimators](https://github.com/tensorflow/models/tree/master/official/r1/wide_deep)
+* [Wide and deep learning with Estimators](https://github.com/tensorflow/models/tree/r1.15/official/r1/wide_deep)
diff --git a/site/en/r1/tutorials/_index.ipynb b/site/en/r1/tutorials/_index.ipynb
index e2fe960d125..eca1450964f 100644
--- a/site/en/r1/tutorials/_index.ipynb
+++ b/site/en/r1/tutorials/_index.ipynb
@@ -64,7 +64,7 @@
       "source": [
         "> Note: This is an archived TF1 notebook. These are configured\n",
         "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
+        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
         "but will run in TF1 as well. To use TF1 in Colab, use the\n",
         "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
         "magic."
diff --git a/site/en/r1/tutorials/distribute/keras.ipynb b/site/en/r1/tutorials/distribute/keras.ipynb
index b8d3c87bfab..14e8bf739a9 100644
--- a/site/en/r1/tutorials/distribute/keras.ipynb
+++ b/site/en/r1/tutorials/distribute/keras.ipynb
@@ -64,7 +64,7 @@
       "source": [
         "> Note: This is an archived TF1 notebook. These are configured\n",
         "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
+        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
         "but will run in TF1 as well. To use TF1 in Colab, use the\n",
         "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
         "magic."
@@ -86,7 +86,7 @@
         "Essentially, it copies all of the model's variables to each processor.\n",
         "Then, it uses [all-reduce](http://mpitutorial.com/tutorials/mpi-reduce-and-allreduce/) to combine the gradients from all processors and applies the combined value to all copies of the model.\n",
         "\n",
-        "`MirroredStategy` is one of several distribution strategy available in TensorFlow core. You can read about more strategies at [distribution strategy guide](../../guide/distribute_strategy.ipynb).\n"
+        "`MirroredStrategy` is one of several distribution strategy available in TensorFlow core. You can read about more strategies at [distribution strategy guide](../../guide/distribute_strategy.ipynb).\n"
       ]
     },
     {
@@ -345,7 +345,7 @@
       "source": [
         "The callbacks used here are:\n",
         "\n",
-        "*   *Tensorboard*: This callback writes a log for Tensorboard which allows you to visualize the graphs.\n",
+        "*   *TensorBoard*: This callback writes a log for TensorBoard which allows you to visualize the graphs.\n",
         "*   *Model Checkpoint*: This callback saves the model after every epoch.\n",
         "*   *Learning Rate Scheduler*: Using this callback, you can schedule the learning rate to change after every epoch/batch.\n",
         "\n",
@@ -554,7 +554,7 @@
       },
       "outputs": [],
       "source": [
-        "tf.keras.experimental.export_saved_model(model, path)"
+        "model.save(path)"
       ]
     },
     {
@@ -574,7 +574,7 @@
       },
       "outputs": [],
       "source": [
-        "unreplicated_model = tf.keras.experimental.load_from_saved_model(path)\n",
+        "unreplicated_model = tf.keras.models.load_model(path)\n",
         "\n",
         "unreplicated_model.compile(\n",
         "    loss='sparse_categorical_crossentropy',\n",
diff --git a/site/en/r1/tutorials/distribute/tpu_custom_training.ipynb b/site/en/r1/tutorials/distribute/tpu_custom_training.ipynb
index 6d09d2623de..c61f893ca4c 100644
--- a/site/en/r1/tutorials/distribute/tpu_custom_training.ipynb
+++ b/site/en/r1/tutorials/distribute/tpu_custom_training.ipynb
@@ -64,7 +64,7 @@
       "source": [
         "> Note: This is an archived TF1 notebook. These are configured\n",
         "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
+        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
         "but will run in TF1 as well. To use TF1 in Colab, use the\n",
         "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
         "magic."
diff --git a/site/en/r1/tutorials/distribute/training_loops.ipynb b/site/en/r1/tutorials/distribute/training_loops.ipynb
index 1343e8c8b6b..8eb72c13030 100644
--- a/site/en/r1/tutorials/distribute/training_loops.ipynb
+++ b/site/en/r1/tutorials/distribute/training_loops.ipynb
@@ -64,7 +64,7 @@
       "source": [
         "> Note: This is an archived TF1 notebook. These are configured\n",
         "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
+        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
         "but will run in TF1 as well. To use TF1 in Colab, use the\n",
         "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
         "magic."
diff --git a/site/en/r1/tutorials/eager/automatic_differentiation.ipynb b/site/en/r1/tutorials/eager/automatic_differentiation.ipynb
index bbbb689a617..df843bac3b8 100644
--- a/site/en/r1/tutorials/eager/automatic_differentiation.ipynb
+++ b/site/en/r1/tutorials/eager/automatic_differentiation.ipynb
@@ -64,7 +64,7 @@
       "source": [
         "> Note: This is an archived TF1 notebook. These are configured\n",
         "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
+        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
         "but will run in TF1 as well. To use TF1 in Colab, use the\n",
         "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
         "magic."
diff --git a/site/en/r1/tutorials/eager/custom_layers.ipynb b/site/en/r1/tutorials/eager/custom_layers.ipynb
index c82458cb857..48b55ed943e 100644
--- a/site/en/r1/tutorials/eager/custom_layers.ipynb
+++ b/site/en/r1/tutorials/eager/custom_layers.ipynb
@@ -64,7 +64,7 @@
       "source": [
         "> Note: This is an archived TF1 notebook. These are configured\n",
         "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
+        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
         "but will run in TF1 as well. To use TF1 in Colab, use the\n",
         "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
         "magic."
@@ -230,7 +230,7 @@
       "source": [
         "## Models: composing layers\n",
         "\n",
-        "Many interesting layer-like things in machine learning models are implemented by composing existing layers. For example, each residual block in a resnet is a composition of convolutions, batch normalizations, and a shortcut.\n",
+        "Many interesting layer-like things in machine learning models are implemented by composing existing layers. For example, each residual block in a ResNet is a composition of convolutions, batch normalizations, and a shortcut.\n",
         "\n",
         "The main class used when creating a layer-like thing which contains other layers is tf.keras.Model. Implementing one is done by inheriting from tf.keras.Model."
       ]
diff --git a/site/en/r1/tutorials/eager/custom_training.ipynb b/site/en/r1/tutorials/eager/custom_training.ipynb
index 72beefe89ad..f0f7faffa7f 100644
--- a/site/en/r1/tutorials/eager/custom_training.ipynb
+++ b/site/en/r1/tutorials/eager/custom_training.ipynb
@@ -64,7 +64,7 @@
       "source": [
         "> Note: This is an archived TF1 notebook. These are configured\n",
         "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
+        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
         "but will run in TF1 as well. To use TF1 in Colab, use the\n",
         "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
         "magic."
diff --git a/site/en/r1/tutorials/eager/custom_training_walkthrough.ipynb b/site/en/r1/tutorials/eager/custom_training_walkthrough.ipynb
index a4839429827..3989f3e44bc 100644
--- a/site/en/r1/tutorials/eager/custom_training_walkthrough.ipynb
+++ b/site/en/r1/tutorials/eager/custom_training_walkthrough.ipynb
@@ -64,7 +64,7 @@
       "source": [
         "> Note: This is an archived TF1 notebook. These are configured\n",
         "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
+        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
         "but will run in TF1 as well. To use TF1 in Colab, use the\n",
         "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
         "magic."
diff --git a/site/en/r1/tutorials/eager/eager_basics.ipynb b/site/en/r1/tutorials/eager/eager_basics.ipynb
index 9a72f192385..acd00ec2e20 100644
--- a/site/en/r1/tutorials/eager/eager_basics.ipynb
+++ b/site/en/r1/tutorials/eager/eager_basics.ipynb
@@ -64,7 +64,7 @@
       "source": [
         "> Note: This is an archived TF1 notebook. These are configured\n",
         "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
+        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
         "but will run in TF1 as well. To use TF1 in Colab, use the\n",
         "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
         "magic."
@@ -236,7 +236,7 @@
         "x = tf.random.uniform([3, 3])\n",
         "\n",
         "print(\"Is there a GPU available: \"),\n",
-        "print(tf.test.is_gpu_available())\n",
+        "print(tf.config.list_physical_devices('GPU'))\n",
         "\n",
         "print(\"Is the Tensor on GPU #0:  \"),\n",
         "print(x.device.endswith('GPU:0'))"
@@ -292,7 +292,7 @@
         "  time_matmul(x)\n",
         "\n",
         "# Force execution on GPU #0 if available\n",
-        "if tf.test.is_gpu_available():\n",
+        "if tf.config.list_physical_devices('GPU'):\n",
         "  with tf.device(\"GPU:0\"): # Or GPU:1 for the 2nd GPU, GPU:2 for the 3rd etc.\n",
         "    x = tf.random_uniform([1000, 1000])\n",
         "    assert x.device.endswith(\"GPU:0\")\n",
diff --git a/site/en/r1/tutorials/estimators/boosted_trees.ipynb b/site/en/r1/tutorials/estimators/boosted_trees.ipynb
deleted file mode 100644
index 7452d521095..00000000000
--- a/site/en/r1/tutorials/estimators/boosted_trees.ipynb
+++ /dev/null
@@ -1,606 +0,0 @@
-{
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "7765UFHoyGx6"
-      },
-      "source": [
-        "##### Copyright 2019 The TensorFlow Authors."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "cellView": "form",
-        "id": "KVtTDrUNyL7x"
-      },
-      "outputs": [],
-      "source": [
-        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
-        "# you may not use this file except in compliance with the License.\n",
-        "# You may obtain a copy of the License at\n",
-        "#\n",
-        "# https://www.apache.org/licenses/LICENSE-2.0\n",
-        "#\n",
-        "# Unless required by applicable law or agreed to in writing, software\n",
-        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
-        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
-        "# See the License for the specific language governing permissions and\n",
-        "# limitations under the License."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "xPYxZMrWyA0N"
-      },
-      "source": [
-        "#How to train Boosted Trees models in TensorFlow"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "p_vOREjRx-Y0"
-      },
-      "source": [
-        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/r1/tutorials/estimators/boosted_trees.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
-        "  </td>\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/r1/tutorials/estimators/boosted_trees.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\">View source on GitHub</a>\n",
-        "  </td>\n",
-        "</table>"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "6lCDyX3HFWos"
-      },
-      "source": [
-        "> Note: This is an archived TF1 notebook. These are configured\n",
-        "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
-        "but will run in TF1 as well. To use TF1 in Colab, use the\n",
-        "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
-        "magic."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "dW3r7qVxzqN5"
-      },
-      "source": [
-        "This tutorial is an end-to-end walkthrough of training a Gradient Boosting  model using decision trees with the `tf.estimator` API. Boosted Trees models are among the most popular and effective machine learning approaches for both regression and classification. It is an ensemble technique that combines the predictions from several (think 10s, 100s or even 1000s) tree models.\n",
-        "\n",
-        "Boosted Trees models are popular with many machine learning practioners as they can achieve impressive performance with minimal hyperparameter tuning."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "eylrTPAN3rJV"
-      },
-      "source": [
-        "## Load the titanic dataset\n",
-        "You will be using the titanic dataset, where the (rather morbid) goal is to predict passenger survival, given characteristics such as gender, age, class, etc."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "KuhAiPfZ3rJW"
-      },
-      "outputs": [],
-      "source": [
-        "from matplotlib import pyplot as plt\n",
-        "\n",
-        "import numpy as np\n",
-        "import pandas as pd\n",
-        "import tensorflow.compat.v1 as tf\n",
-        "\n",
-        "tf.logging.set_verbosity(tf.logging.ERROR)\n",
-        "tf.set_random_seed(123)\n",
-        "\n",
-        "# Load dataset.\n",
-        "dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')\n",
-        "dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')\n",
-        "y_train = dftrain.pop('survived')\n",
-        "y_eval = dfeval.pop('survived')"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "3ioodHdVJVdA"
-      },
-      "source": [
-        "The dataset consists of a training set and an evaluation set:\n",
-        "\n",
-        "* `dftrain` and `y_train` are the *training set*—the data the model uses to learn.\n",
-        "* The model is tested against the *eval set*, `dfeval`, and `y_eval`.\n",
-        "\n",
-        "For training you will use the following features:\n",
-        "\n",
-        "\n",
-        "<table>\n",
-        "  <tr>\n",
-        "    <th>Feature Name</th>\n",
-        "    <th>Description</th>\n",
-        "  </tr>\n",
-        "  <tr>\n",
-        "    <td>sex</td>\n",
-        "    <td>Gender of passenger</td>\n",
-        "  </tr>\n",
-        "  <tr>\n",
-        "    <td>age</td>\n",
-        "    <td>Age of passenger</td>\n",
-        "  </tr>\n",
-        "    <tr>\n",
-        "    <td>n_siblings_spouses</td>\n",
-        "    <td># siblings and partners aboard</td>\n",
-        "  </tr>\n",
-        "    <tr>\n",
-        "    <td>parch</td>\n",
-        "    <td># of parents and children aboard</td>\n",
-        "  </tr>\n",
-        "    <tr>\n",
-        "    <td>fare</td>\n",
-        "    <td>Fare passenger paid.</td>\n",
-        "  </tr>\n",
-        "    <tr>\n",
-        "    <td>class</td>\n",
-        "    <td>Passenger's class on ship</td>\n",
-        "  </tr>\n",
-        "    <tr>\n",
-        "    <td>deck</td>\n",
-        "    <td>Which deck passenger was on</td>\n",
-        "  </tr>\n",
-        "    <tr>\n",
-        "    <td>embark_town</td>\n",
-        "    <td>Which town passenger embarked from</td>\n",
-        "  </tr>\n",
-        "    <tr>\n",
-        "    <td>alone</td>\n",
-        "    <td>If passenger was alone</td>\n",
-        "  </tr>\n",
-        "</table>"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "AoPiWsJALr-k"
-      },
-      "source": [
-        "## Explore the data"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "slcat1yzmzw5"
-      },
-      "source": [
-        "Let's first preview some of the data and create summary statistics on the training set."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "15PLelXBlxEW"
-      },
-      "outputs": [],
-      "source": [
-        "dftrain.head()"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "j2hiM4ETmqP0"
-      },
-      "outputs": [],
-      "source": [
-        "dftrain.describe()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "-IR0e8V-LyJ4"
-      },
-      "source": [
-        "There are 627 and 264 examples in the training and evaluation sets, respectively."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "_1NwYqGwDjFf"
-      },
-      "outputs": [],
-      "source": [
-        "dftrain.shape[0], dfeval.shape[0]"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "28UFJ4KSMK3V"
-      },
-      "source": [
-        "The majority of passengers are in their 20's and 30's."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "CaVDmZtuDfux"
-      },
-      "outputs": [],
-      "source": [
-        "dftrain.age.hist(bins=20)\n",
-        "plt.show()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "1pifWiCoMbR5"
-      },
-      "source": [
-        "There are approximately twice as male passengers as female passengers aboard."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "-WazAq30MO5J"
-      },
-      "outputs": [],
-      "source": [
-        "dftrain.sex.value_counts().plot(kind='barh')\n",
-        "plt.show()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "7_XkxrpmmVU_"
-      },
-      "source": [
-        "The majority of passengers were in the \"third\" class."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "zZ3PvVy4l4gI"
-      },
-      "outputs": [],
-      "source": [
-        "(dftrain['class']\n",
-        "  .value_counts()\n",
-        "  .plot(kind='barh'))\n",
-        "plt.show()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "HM5SlwlxmZMT"
-      },
-      "source": [
-        "Most passengers embarked from Southampton."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "RVTSrdr4mZaC"
-      },
-      "outputs": [],
-      "source": [
-        "(dftrain['embark_town']\n",
-        "  .value_counts()\n",
-        "  .plot(kind='barh'))\n",
-        "plt.show()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "aTn1niLPob3x"
-      },
-      "source": [
-        "Females have a much higher chance of surviving vs. males. This will clearly be a predictive feature for the model."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Eh3KW5oYkaNS"
-      },
-      "outputs": [],
-      "source": [
-        "ax = (pd.concat([dftrain, y_train], axis=1)\\\n",
-        "  .groupby('sex')\n",
-        "  .survived\n",
-        "  .mean()\n",
-        "  .plot(kind='barh'))\n",
-        "ax.set_xlabel('% survive')\n",
-        "plt.show()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "krkRHuMp3rJn"
-      },
-      "source": [
-        "## Create feature columns and input functions\n",
-        "The Gradient Boosting estimator can utilize both numeric and categorical features. Feature columns work with all TensorFlow estimators and their purpose is to define the features used for modeling. Additionally they provide some feature engineering capabilities like one-hot-encoding, normalization, and bucketization. In this tutorial, the fields in `CATEGORICAL_COLUMNS` are transformed from categorical columns to one-hot-encoded columns ([indicator column](https://www.tensorflow.org/api_docs/python/tf/feature_column/indicator_column)):"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "upaNWxcF3rJn"
-      },
-      "outputs": [],
-      "source": [
-        "fc = tf.feature_column\n",
-        "CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck',\n",
-        "                       'embark_town', 'alone']\n",
-        "NUMERIC_COLUMNS = ['age', 'fare']\n",
-        "\n",
-        "def one_hot_cat_column(feature_name, vocab):\n",
-        "  return fc.indicator_column(\n",
-        "      fc.categorical_column_with_vocabulary_list(feature_name,\n",
-        "                                                 vocab))\n",
-        "feature_columns = []\n",
-        "for feature_name in CATEGORICAL_COLUMNS:\n",
-        "  # Need to one-hot encode categorical features.\n",
-        "  vocabulary = dftrain[feature_name].unique()\n",
-        "  feature_columns.append(one_hot_cat_column(feature_name, vocabulary))\n",
-        "\n",
-        "for feature_name in NUMERIC_COLUMNS:\n",
-        "  feature_columns.append(fc.numeric_column(feature_name,\n",
-        "                                           dtype=tf.float32))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "74GNtFpStSAz"
-      },
-      "source": [
-        "You can view the transformation that a feature column produces. For example, here is the output when using the `indicator_column` on a single example:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Eaq79D9FtmF8"
-      },
-      "outputs": [],
-      "source": [
-        "example = dftrain.head(1)\n",
-        "class_fc = one_hot_cat_column('class', ('First', 'Second', 'Third'))\n",
-        "print('Feature value: \"{}\"'.format(example['class'].iloc[0]))\n",
-        "print('One-hot encoded: ', fc.input_layer(dict(example), [class_fc]).numpy())"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "YbCUn3nCusC3"
-      },
-      "source": [
-        "Additionally, you can view all of the feature column transformations together:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "omIYcsVws3g0"
-      },
-      "outputs": [],
-      "source": [
-        "fc.input_layer(dict(example), feature_columns).numpy()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "-UOlROp33rJo"
-      },
-      "source": [
-        "Next you need to create the input functions. These will specify how data will be read into our model for both training and inference. You will use the `from_tensor_slices` method in the [`tf.data`](https://www.tensorflow.org/api_docs/python/tf/data) API to read in data directly from Pandas. This is suitable for smaller, in-memory datasets. For larger datasets, the tf.data API supports a variety of file formats (including [csv](https://www.tensorflow.org/api_docs/python/tf/data/experimental/make_csv_dataset)) so that you can process datasets that do not fit in memory."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "9dquwCQB3rJp"
-      },
-      "outputs": [],
-      "source": [
-        "# Use entire batch since this is such a small dataset.\n",
-        "NUM_EXAMPLES = len(y_train)\n",
-        "\n",
-        "def make_input_fn(X, y, n_epochs=None, shuffle=True):\n",
-        "  y = np.expand_dims(y, axis=1)\n",
-        "  def input_fn():\n",
-        "    dataset = tf.data.Dataset.from_tensor_slices((dict(X), y))\n",
-        "    if shuffle:\n",
-        "      dataset = dataset.shuffle(NUM_EXAMPLES)\n",
-        "    # For training, cycle thru dataset as many times as need (n_epochs=None).\n",
-        "    dataset = dataset.repeat(n_epochs)\n",
-        "    # In memory training doesn't use batching.\n",
-        "    dataset = dataset.batch(NUM_EXAMPLES)\n",
-        "    return dataset\n",
-        "  return input_fn\n",
-        "\n",
-        "# Training and evaluation input functions.\n",
-        "train_input_fn = make_input_fn(dftrain, y_train)\n",
-        "eval_input_fn = make_input_fn(dfeval, y_eval, shuffle=False, n_epochs=1)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "HttfNNlN3rJr"
-      },
-      "source": [
-        "## Train and evaluate the model\n",
-        "\n",
-        "Below you will do the following steps:\n",
-        "\n",
-        "1. Initialize the model, specifying the features and hyperparameters.\n",
-        "2. Feed the training data to the model using the `train_input_fn` and train the model using the `train` function.\n",
-        "3. You will assess model performance using the evaluation set—in this example, the `dfeval` DataFrame. You will verify that the predictions match the labels from the `y_eval` array.\n",
-        "\n",
-        "Before training a Boosted Trees model, let's first train a linear classifier (logistic regression model). It is best practice to start with simpler model to establish a benchmark."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "JPOGpmmq3rJr"
-      },
-      "outputs": [],
-      "source": [
-        "linear_est = tf.estimator.LinearClassifier(feature_columns)\n",
-        "\n",
-        "# Train model.\n",
-        "linear_est.train(train_input_fn, max_steps=100)\n",
-        "\n",
-        "# Evaluation.\n",
-        "results = linear_est.evaluate(eval_input_fn)\n",
-        "print('Accuracy : ', results['accuracy'])\n",
-        "print('Dummy model: ', results['accuracy_baseline'])"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "BarkNXwA3rJu"
-      },
-      "source": [
-        "Next let's train a Boosted Trees model. For boosted trees, regression (`BoostedTreesRegressor`) and classification (`BoostedTreesClassifier`) are supported, along with using any twice differentiable custom loss (`BoostedTreesEstimator`). Since the goal is to predict a class - survive or not survive, you will use the `BoostedTreesClassifier`.\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "tgEzMtlw3rJu"
-      },
-      "outputs": [],
-      "source": [
-        "# Since data fits into memory, use entire dataset per layer. It will be faster.\n",
-        "# Above one batch is defined as the entire dataset.\n",
-        "n_batches = 1\n",
-        "est = tf.estimator.BoostedTreesClassifier(feature_columns,\n",
-        "                                          n_batches_per_layer=n_batches)\n",
-        "\n",
-        "# The model will stop training once the specified number of trees is built, not\n",
-        "# based on the number of steps.\n",
-        "est.train(train_input_fn, max_steps=100)\n",
-        "\n",
-        "# Eval.\n",
-        "results = est.evaluate(eval_input_fn)\n",
-        "print('Accuracy : ', results['accuracy'])\n",
-        "print('Dummy model: ', results['accuracy_baseline'])"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "hEflwznXvuMP"
-      },
-      "source": [
-        "Now you can use the train model to make predictions on a passenger from the evaluation set. TensorFlow models are optimized to make predictions on a batch, or collection, of examples at once. Earlier,  the `eval_input_fn` is  defined using the entire evaluation set."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "6zmIjTr73rJ4"
-      },
-      "outputs": [],
-      "source": [
-        "pred_dicts = list(est.predict(eval_input_fn))\n",
-        "probs = pd.Series([pred['probabilities'][1] for pred in pred_dicts])\n",
-        "\n",
-        "probs.plot(kind='hist', bins=20, title='predicted probabilities')\n",
-        "plt.show()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "mBUaNN1BzJHG"
-      },
-      "source": [
-        "Finally you can also look at the receiver operating characteristic (ROC) of the results, which will give us a better idea of the tradeoff between the true positive rate and false positive rate."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "NzxghvVz3rJ6"
-      },
-      "outputs": [],
-      "source": [
-        "from sklearn.metrics import roc_curve\n",
-        "\n",
-        "fpr, tpr, _ = roc_curve(y_eval, probs)\n",
-        "plt.plot(fpr, tpr)\n",
-        "plt.title('ROC curve')\n",
-        "plt.xlabel('false positive rate')\n",
-        "plt.ylabel('true positive rate')\n",
-        "plt.xlim(0,)\n",
-        "plt.ylim(0,)\n",
-        "plt.show()"
-      ]
-    }
-  ],
-  "metadata": {
-    "colab": {
-      "collapsed_sections": [],
-      "name": "boosted_trees.ipynb",
-      "provenance": [],
-      "toc_visible": true
-    },
-    "kernelspec": {
-      "display_name": "Python 3",
-      "name": "python3"
-    }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 0
-}
diff --git a/site/en/r1/tutorials/estimators/boosted_trees_model_understanding.ipynb b/site/en/r1/tutorials/estimators/boosted_trees_model_understanding.ipynb
deleted file mode 100644
index 6f3f2c2feb0..00000000000
--- a/site/en/r1/tutorials/estimators/boosted_trees_model_understanding.ipynb
+++ /dev/null
@@ -1,1028 +0,0 @@
-{
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "7765UFHoyGx6"
-      },
-      "source": [
-        "##### Copyright 2019 The TensorFlow Authors."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "cellView": "form",
-        "id": "KVtTDrUNyL7x"
-      },
-      "outputs": [],
-      "source": [
-        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
-        "# you may not use this file except in compliance with the License.\n",
-        "# You may obtain a copy of the License at\n",
-        "#\n",
-        "# https://www.apache.org/licenses/LICENSE-2.0\n",
-        "#\n",
-        "# Unless required by applicable law or agreed to in writing, software\n",
-        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
-        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
-        "# See the License for the specific language governing permissions and\n",
-        "# limitations under the License."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "r0_fqL3ayLHX"
-      },
-      "source": [
-        "# Gradient Boosted Trees: Model understanding"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "PS6_yKSoyLAl"
-      },
-      "source": [
-        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/r1/tutorials/estimators/boosted_trees_model_understanding.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
-        "  </td>\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/r1/tutorials/estimators/boosted_trees_model_understanding.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
-        "  </td>\n",
-        "</table>"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "PS6_yKSoyLAl"
-      },
-      "source": [
-        "> Note: This is an archived TF1 notebook. These are configured\n",
-        "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
-        "but will run in TF1 as well. To use TF1 in Colab, use the\n",
-        "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
-        "magic."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "dW3r7qVxzqN5"
-      },
-      "source": [
-        "For an end-to-end walkthrough of training a Gradient Boosting model check out the [boosted trees tutorial](https://www.tensorflow.org/r1/tutorials/estimators/boosted_trees). In this tutorial you will:\n",
-        "\n",
-        "* Learn how to interpret a Boosted Trees model both *locally* and *globally*\n",
-        "* Gain intution for how a Boosted Trees model fits a dataset\n",
-        "\n",
-        "## How to interpret Boosted Trees models both locally and globally\n",
-        "\n",
-        "Local interpretability refers to an understanding of a model’s predictions at the individual example level, while global interpretability refers to an understanding of the model as a whole. Such techniques can help machine learning (ML) practitioners detect bias and bugs during the model development stage\n",
-        "\n",
-        "For local interpretability, you will learn how to create and visualize per-instance contributions. To distinguish this from feature importances, we refer to these values as directional feature contributions (DFCs).\n",
-        "\n",
-        "For global interpretability you will retrieve and visualize gain-based feature importances, [permutation feature importances](https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf) and also show aggregated DFCs."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "eylrTPAN3rJV"
-      },
-      "source": [
-        "## Load the titanic dataset\n",
-        "You will be using the titanic dataset, where the (rather morbid) goal is to predict passenger survival, given characteristics such as gender, age, class, etc."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "KuhAiPfZ3rJW"
-      },
-      "outputs": [],
-      "source": [
-        "import numpy as np\n",
-        "import pandas as pd\n",
-        "import tensorflow.compat.v1 as tf\n",
-        "\n",
-        "\n",
-        "tf.logging.set_verbosity(tf.logging.ERROR)\n",
-        "tf.set_random_seed(123)\n",
-        "\n",
-        "# Load dataset.\n",
-        "dftrain = pd.read_csv('https://storage.googleapis.com/tfbt/titanic_train.csv')\n",
-        "dfeval = pd.read_csv('https://storage.googleapis.com/tfbt/titanic_eval.csv')\n",
-        "y_train = dftrain.pop('survived')\n",
-        "y_eval = dfeval.pop('survived')"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "3ioodHdVJVdA"
-      },
-      "source": [
-        "For a description of the features, please review the prior tutorial."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "krkRHuMp3rJn"
-      },
-      "source": [
-        "## Create feature columns, input_fn, and the train the estimator"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "JiJ6K3hr1lXW"
-      },
-      "source": [
-        "### Preprocess the data"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "udMytRJC05oW"
-      },
-      "source": [
-        "Create the feature columns, using the original numeric columns as is and one-hot-encoding categorical variables."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "upaNWxcF3rJn"
-      },
-      "outputs": [],
-      "source": [
-        "fc = tf.feature_column\n",
-        "CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck',\n",
-        "                       'embark_town', 'alone']\n",
-        "NUMERIC_COLUMNS = ['age', 'fare']\n",
-        "\n",
-        "def one_hot_cat_column(feature_name, vocab):\n",
-        "  return fc.indicator_column(\n",
-        "      fc.categorical_column_with_vocabulary_list(feature_name,\n",
-        "                                                 vocab))\n",
-        "feature_columns = []\n",
-        "for feature_name in CATEGORICAL_COLUMNS:\n",
-        "  # Need to one-hot encode categorical features.\n",
-        "  vocabulary = dftrain[feature_name].unique()\n",
-        "  feature_columns.append(one_hot_cat_column(feature_name, vocabulary))\n",
-        "\n",
-        "for feature_name in NUMERIC_COLUMNS:\n",
-        "  feature_columns.append(fc.numeric_column(feature_name,\n",
-        "                                           dtype=tf.float32))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "9rTefnXe1n0v"
-      },
-      "source": [
-        "### Build the input pipeline"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "-UOlROp33rJo"
-      },
-      "source": [
-        "Create the input functions using the `from_tensor_slices` method in the [`tf.data`](https://www.tensorflow.org/api_docs/python/tf/data) API to read in data directly from Pandas."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "9dquwCQB3rJp"
-      },
-      "outputs": [],
-      "source": [
-        "# Use entire batch since this is such a small dataset.\n",
-        "NUM_EXAMPLES = len(y_train)\n",
-        "\n",
-        "def make_input_fn(X, y, n_epochs=None, shuffle=True):\n",
-        "  y = np.expand_dims(y, axis=1)\n",
-        "  def input_fn():\n",
-        "    dataset = tf.data.Dataset.from_tensor_slices((X.to_dict(orient='list'), y))\n",
-        "    if shuffle:\n",
-        "      dataset = dataset.shuffle(NUM_EXAMPLES)\n",
-        "    # For training, cycle thru dataset as many times as need (n_epochs=None).\n",
-        "    dataset = (dataset\n",
-        "      .repeat(n_epochs)\n",
-        "      .batch(NUM_EXAMPLES))\n",
-        "    return dataset\n",
-        "  return input_fn\n",
-        "\n",
-        "# Training and evaluation input functions.\n",
-        "train_input_fn = make_input_fn(dftrain, y_train)\n",
-        "eval_input_fn = make_input_fn(dfeval, y_eval, shuffle=False, n_epochs=1)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "HttfNNlN3rJr"
-      },
-      "source": [
-        "### Train the model"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "tgEzMtlw3rJu"
-      },
-      "outputs": [],
-      "source": [
-        "params = {\n",
-        "  'n_trees': 50,\n",
-        "  'max_depth': 3,\n",
-        "  'n_batches_per_layer': 1,\n",
-        "  # You must enable center_bias = True to get DFCs. This will force the model to\n",
-        "  # make an initial prediction before using any features (e.g. use the mean of\n",
-        "  # the training labels for regression or log odds for classification when\n",
-        "  # using cross entropy loss).\n",
-        "  'center_bias': True\n",
-        "}\n",
-        "\n",
-        "est = tf.estimator.BoostedTreesClassifier(feature_columns, **params)\n",
-        "est.train(train_input_fn, max_steps=100)\n",
-        "results = est.evaluate(eval_input_fn)\n",
-        "pd.Series(results).to_frame()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "cUrakbu6sqKe"
-      },
-      "source": [
-        "For performance reasons, when your data fits in memory, we recommend use the `boosted_trees_classifier_train_in_memory` function. However if training time is not of a concern or if you have a very large dataset and want to do distributed training, use the `tf.estimator.BoostedTrees` API shown above.\n",
-        "\n",
-        "\n",
-        "When using this method, you should not batch your input data, as the method operates on the entire dataset.\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "-4_xz3b_D0W5"
-      },
-      "outputs": [],
-      "source": [
-        "in_memory_params = dict(params)\n",
-        "del in_memory_params['n_batches_per_layer']\n",
-        "# In-memory input_fn does not use batching.\n",
-        "def make_inmemory_train_input_fn(X, y):\n",
-        "  y = np.expand_dims(y, axis=1)\n",
-        "  def input_fn():\n",
-        "    return dict(X), y\n",
-        "  return input_fn\n",
-        "train_input_fn = make_inmemory_train_input_fn(dftrain, y_train)\n",
-        "\n",
-        "# Train the model.\n",
-        "est = tf.contrib.estimator.boosted_trees_classifier_train_in_memory(\n",
-        "    train_input_fn,\n",
-        "    feature_columns,\n",
-        "    **in_memory_params)\n",
-        "print(est.evaluate(eval_input_fn))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "TSZYqNcRuczV"
-      },
-      "source": [
-        "## Model interpretation and plotting"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "BjcfLiI3uczW"
-      },
-      "outputs": [],
-      "source": [
-        "import matplotlib.pyplot as plt\n",
-        "import seaborn as sns\n",
-        "sns_colors = sns.color_palette('colorblind')"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ywTtbBvBuczY"
-      },
-      "source": [
-        "## Local interpretability\n",
-        "Next you will output the directional feature contributions (DFCs) to explain individual predictions using the approach outlined in [Palczewska et al](https://arxiv.org/pdf/1312.1121.pdf) and by Saabas in [Interpreting Random Forests](http://blog.datadive.net/interpreting-random-forests/) (this method is also available in scikit-learn for Random Forests in the [`treeinterpreter`](https://github.com/andosa/treeinterpreter) package). The DFCs are generated with:\n",
-        "\n",
-        "`pred_dicts = list(est.experimental_predict_with_explanations(pred_input_fn))`\n",
-        "\n",
-        "(Note: The method is named experimental as we may modify the API before dropping the experimental prefix.)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "TIL93B4sDRqE"
-      },
-      "outputs": [],
-      "source": [
-        "pred_dicts = list(est.experimental_predict_with_explanations(eval_input_fn))"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "tDPoRx_ZaY1E"
-      },
-      "outputs": [],
-      "source": [
-        "# Create DFC Pandas dataframe.\n",
-        "labels = y_eval.values\n",
-        "probs = pd.Series([pred['probabilities'][1] for pred in pred_dicts])\n",
-        "df_dfc = pd.DataFrame([pred['dfc'] for pred in pred_dicts])\n",
-        "df_dfc.describe().T"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "EUKSaVoraY1C"
-      },
-      "source": [
-        "A nice property of DFCs is that the sum of the contributions + the bias is equal to the prediction for a given example."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Hd9VuizRaY1H"
-      },
-      "outputs": [],
-      "source": [
-        "# Sum of DFCs + bias == probabality.\n",
-        "bias = pred_dicts[0]['bias']\n",
-        "dfc_prob = df_dfc.sum(axis=1) + bias\n",
-        "np.testing.assert_almost_equal(dfc_prob.values,\n",
-        "                               probs.values)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "uIC7qm1gaY1L"
-      },
-      "source": [
-        "Plot DFCs for an individual passenger."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "P3u971LsuczZ"
-      },
-      "outputs": [],
-      "source": [
-        "# Plot results.\n",
-        "ID = 182\n",
-        "example = df_dfc.iloc[ID]  # Choose ith example from evaluation set.\n",
-        "TOP_N = 8  # View top 8 features.\n",
-        "sorted_ix = example.abs().sort_values()[-TOP_N:].index\n",
-        "ax = example[sorted_ix].plot(kind='barh', color=sns_colors[3])\n",
-        "ax.grid(False, axis='y')\n",
-        "\n",
-        "ax.set_title('Feature contributions for example {}\\n pred: {:1.2f}; label: {}'.format(ID, probs[ID], labels[ID]))\n",
-        "ax.set_xlabel('Contribution to predicted probability')\n",
-        "plt.show()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "L4i4mjK66FYg"
-      },
-      "source": [
-        "The larger magnitude contributions have a larger impact on the model's prediction. Negative contributions indicate the feature value for this given example reduced the model's prediction, while positive values contribute an increase in the prediction."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "tx5p4vEhuczg"
-      },
-      "source": [
-        "### Improved plotting\n",
-        "Let's make the plot nice by color coding based on the contributions' directionality and add the feature values on figure."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "6z_Tq1Pquczj"
-      },
-      "outputs": [],
-      "source": [
-        "# Boilerplate code for plotting :)\n",
-        "def _get_color(value):\n",
-        "    \"\"\"To make positive DFCs plot green, negative DFCs plot red.\"\"\"\n",
-        "    green, red = sns.color_palette()[2:4]\n",
-        "    if value >= 0: return green\n",
-        "    return red\n",
-        "\n",
-        "def _add_feature_values(feature_values, ax):\n",
-        "    \"\"\"Display feature's values on left of plot.\"\"\"\n",
-        "    x_coord = ax.get_xlim()[0]\n",
-        "    OFFSET = 0.15\n",
-        "    for y_coord, (feat_name, feat_val) in enumerate(feature_values.items()):\n",
-        "        t = plt.text(x_coord, y_coord - OFFSET, '{}'.format(feat_val), size=12)\n",
-        "        t.set_bbox(dict(facecolor='white', alpha=0.5))\n",
-        "    from matplotlib.font_manager import FontProperties\n",
-        "    font = FontProperties()\n",
-        "    font.set_weight('bold')\n",
-        "    t = plt.text(x_coord, y_coord + 1 - OFFSET, 'feature\\nvalue',\n",
-        "    fontproperties=font, size=12)\n",
-        "\n",
-        "def plot_example(example):\n",
-        "  TOP_N = 8 # View top 8 features.\n",
-        "  sorted_ix = example.abs().sort_values()[-TOP_N:].index  # Sort by magnitude.\n",
-        "  example = example[sorted_ix]\n",
-        "  colors = example.map(_get_color).tolist()\n",
-        "  ax = example.to_frame().plot(kind='barh',\n",
-        "                          color=[colors],\n",
-        "                          legend=None,\n",
-        "                          alpha=0.75,\n",
-        "                          figsize=(10,6))\n",
-        "  ax.grid(False, axis='y')\n",
-        "  ax.set_yticklabels(ax.get_yticklabels(), size=14)\n",
-        "\n",
-        "  # Add feature values.\n",
-        "  _add_feature_values(dfeval.iloc[ID][sorted_ix], ax)\n",
-        "  return ax"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "FlrsuOu8-Yds"
-      },
-      "source": [
-        "Plot example."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Ht1P2-1euczk"
-      },
-      "outputs": [],
-      "source": [
-        "example = df_dfc.iloc[ID]  # Choose IDth example from evaluation set.\n",
-        "ax = plot_example(example)\n",
-        "ax.set_title('Feature contributions for example {}\\n pred: {:1.2f}; label: {}'.format(ID, probs[ID], labels[ID]))\n",
-        "ax.set_xlabel('Contribution to predicted probability', size=14)\n",
-        "plt.show()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "0swvlkZFaY1Z"
-      },
-      "source": [
-        "You can also plot the example's DFCs compare with the entire distribution using a voilin plot."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "zo7rNd1v_5e2"
-      },
-      "outputs": [],
-      "source": [
-        "# Boilerplate plotting code.\n",
-        "def dist_violin_plot(df_dfc, ID):\n",
-        "  # Initialize plot.\n",
-        "  fig, ax = plt.subplots(1, 1, figsize=(10, 6))\n",
-        "\n",
-        "  # Create example dataframe.\n",
-        "  TOP_N = 8  # View top 8 features.\n",
-        "  example = df_dfc.iloc[ID]\n",
-        "  ix = example.abs().sort_values()[-TOP_N:].index\n",
-        "  example = example[ix]\n",
-        "  example_df = example.to_frame(name='dfc')\n",
-        "\n",
-        "  # Add contributions of entire distribution.\n",
-        "  parts=ax.violinplot([df_dfc[w] for w in ix],\n",
-        "                 vert=False,\n",
-        "                 showextrema=False,\n",
-        "                 widths=0.7,\n",
-        "                 positions=np.arange(len(ix)))\n",
-        "  face_color = sns_colors[0]\n",
-        "  alpha = 0.15\n",
-        "  for pc in parts['bodies']:\n",
-        "      pc.set_facecolor(face_color)\n",
-        "      pc.set_alpha(alpha)\n",
-        "\n",
-        "  # Add feature values.\n",
-        "  _add_feature_values(dfeval.iloc[ID][sorted_ix], ax)\n",
-        "\n",
-        "  # Add local contributions.\n",
-        "  ax.scatter(example,\n",
-        "              np.arange(example.shape[0]),\n",
-        "              color=sns.color_palette()[2],\n",
-        "              s=100,\n",
-        "              marker=\"s\",\n",
-        "              label='contributions for example')\n",
-        "\n",
-        "  # Legend\n",
-        "  # Proxy plot, to show violinplot dist on legend.\n",
-        "  ax.plot([0,0], [1,1], label='eval set contributions\\ndistributions',\n",
-        "          color=face_color, alpha=alpha, linewidth=10)\n",
-        "  legend = ax.legend(loc='lower right', shadow=True, fontsize='x-large',\n",
-        "                     frameon=True)\n",
-        "  legend.get_frame().set_facecolor('white')\n",
-        "\n",
-        "  # Format plot.\n",
-        "  ax.set_yticks(np.arange(example.shape[0]))\n",
-        "  ax.set_yticklabels(example.index)\n",
-        "  ax.grid(False, axis='y')\n",
-        "  ax.set_xlabel('Contribution to predicted probability', size=14)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "PiLw2tlm_9aK"
-      },
-      "source": [
-        "Plot this example."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "VkCqraA2uczm"
-      },
-      "outputs": [],
-      "source": [
-        "dist_violin_plot(df_dfc, ID)\n",
-        "plt.title('Feature contributions for example {}\\n pred: {:1.2f}; label: {}'.format(ID, probs[ID], labels[ID]))\n",
-        "plt.show()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "TVJFM85SAWVq"
-      },
-      "source": [
-        "Finally, third-party tools, such as [LIME](https://github.com/marcotcr/lime) and [shap](https://github.com/slundberg/shap), can also help understand individual predictions for a model."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "PnNXH6mZuczr"
-      },
-      "source": [
-        "## Global feature importances\n",
-        "\n",
-        "Additionally, you might want to understand the model as a whole, rather than studying individual predictions. Below, you will compute and use:\n",
-        "\n",
-        "* Gain-based feature importances using `est.experimental_feature_importances`\n",
-        "* Permutation importances\n",
-        "* Aggregate DFCs using `est.experimental_predict_with_explanations`\n",
-        "\n",
-        "Gain-based feature importances measure the loss change when splitting on a particular feature, while permutation feature importances are computed by evaluating model performance on the evaluation set by shuffling each feature one-by-one and attributing the change in model performance to the shuffled feature.\n",
-        "\n",
-        "In general, permutation feature importance are preferred to gain-based feature importance, though both methods can be unreliable in situations where potential predictor variables vary in their scale of measurement or their number of categories and when features are correlated ([source](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-9-307)). Check out [this article](http://explained.ai/rf-importance/index.html) for an in-depth overview and great discussion on different feature importance types."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "3ocBcMatuczs"
-      },
-      "source": [
-        "### Gain-based feature importances"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "gMaxCgPbBJ-j"
-      },
-      "source": [
-        "Gain-based feature importances are built into the TensorFlow Boosted Trees estimators using `est.experimental_feature_importances`."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "pPTxbAaeuczt"
-      },
-      "outputs": [],
-      "source": [
-        "importances = est.experimental_feature_importances(normalize=True)\n",
-        "df_imp = pd.Series(importances)\n",
-        "\n",
-        "# Visualize importances.\n",
-        "N = 8\n",
-        "ax = (df_imp.iloc[0:N][::-1]\n",
-        "    .plot(kind='barh',\n",
-        "          color=sns_colors[0],\n",
-        "          title='Gain feature importances',\n",
-        "          figsize=(10, 6)))\n",
-        "ax.grid(False, axis='y')"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "GvfAcBeGuczw"
-      },
-      "source": [
-        "### Average absolute DFCs\n",
-        "You can also average the absolute values of DFCs to understand impact at a global level."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "JkvAWLWLuczx"
-      },
-      "outputs": [],
-      "source": [
-        "# Plot.\n",
-        "dfc_mean = df_dfc.abs().mean()\n",
-        "N = 8\n",
-        "sorted_ix = dfc_mean.abs().sort_values()[-N:].index  # Average and sort by absolute.\n",
-        "ax = dfc_mean[sorted_ix].plot(kind='barh',\n",
-        "                       color=sns_colors[1],\n",
-        "                       title='Mean |directional feature contributions|',\n",
-        "                       figsize=(10, 6))\n",
-        "ax.grid(False, axis='y')"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Z0k_DvPLaY1o"
-      },
-      "source": [
-        "You can also see how DFCs vary as a feature value varies."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "ZcIfN1IpaY1o"
-      },
-      "outputs": [],
-      "source": [
-        "FEATURE = 'fare'\n",
-        "feature = pd.Series(df_dfc[FEATURE].values, index=dfeval[FEATURE].values).sort_index()\n",
-        "ax = sns.regplot(feature.index.values, feature.values, lowess=True)\n",
-        "ax.set_ylabel('contribution')\n",
-        "ax.set_xlabel(FEATURE)\n",
-        "ax.set_xlim(0, 100)\n",
-        "plt.show()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "lbpG72ULucz0"
-      },
-      "source": [
-        "### Permutation feature importance"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "6esOw1VOucz0"
-      },
-      "outputs": [],
-      "source": [
-        "def permutation_importances(est, X_eval, y_eval, metric, features):\n",
-        "    \"\"\"Column by column, shuffle values and observe effect on eval set.\n",
-        "\n",
-        "    source: http://explained.ai/rf-importance/index.html\n",
-        "    A similar approach can be done during training. See \"Drop-column importance\"\n",
-        "    in the above article.\"\"\"\n",
-        "    baseline = metric(est, X_eval, y_eval)\n",
-        "    imp = []\n",
-        "    for col in features:\n",
-        "        save = X_eval[col].copy()\n",
-        "        X_eval[col] = np.random.permutation(X_eval[col])\n",
-        "        m = metric(est, X_eval, y_eval)\n",
-        "        X_eval[col] = save\n",
-        "        imp.append(baseline - m)\n",
-        "    return np.array(imp)\n",
-        "\n",
-        "def accuracy_metric(est, X, y):\n",
-        "    \"\"\"TensorFlow estimator accuracy.\"\"\"\n",
-        "    eval_input_fn = make_input_fn(X,\n",
-        "                                  y=y,\n",
-        "                                  shuffle=False,\n",
-        "                                  n_epochs=1)\n",
-        "    return est.evaluate(input_fn=eval_input_fn)['accuracy']\n",
-        "features = CATEGORICAL_COLUMNS + NUMERIC_COLUMNS\n",
-        "importances = permutation_importances(est, dfeval, y_eval, accuracy_metric,\n",
-        "                                      features)\n",
-        "df_imp = pd.Series(importances, index=features)\n",
-        "\n",
-        "sorted_ix = df_imp.abs().sort_values().index\n",
-        "ax = df_imp[sorted_ix][-5:].plot(kind='barh', color=sns_colors[2], figsize=(10, 6))\n",
-        "ax.grid(False, axis='y')\n",
-        "ax.set_title('Permutation feature importance')\n",
-        "plt.show()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "E236y3pVEzHg"
-      },
-      "source": [
-        "## Visualizing model fitting"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "TrcQ-839EzZ6"
-      },
-      "source": [
-        "Lets first simulate/create training data using the following formula:\n",
-        "\n",
-        "\n",
-        "$$z=x* e^{-x^2 - y^2}$$\n",
-        "\n",
-        "\n",
-        "Where \\\\(z\\\\) is the dependent variable you are trying to predict and \\\\(x\\\\) and \\\\(y\\\\) are the features."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "e8woaj81GGE9"
-      },
-      "outputs": [],
-      "source": [
-        "from numpy.random import uniform, seed\n",
-        "from matplotlib.mlab import griddata\n",
-        "\n",
-        "# Create fake data\n",
-        "seed(0)\n",
-        "npts = 5000\n",
-        "x = uniform(-2, 2, npts)\n",
-        "y = uniform(-2, 2, npts)\n",
-        "z = x*np.exp(-x**2 - y**2)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "GRI3KHfLZsGP"
-      },
-      "outputs": [],
-      "source": [
-        "# Prep data for training.\n",
-        "df = pd.DataFrame({'x': x, 'y': y, 'z': z})\n",
-        "\n",
-        "xi = np.linspace(-2.0, 2.0, 200),\n",
-        "yi = np.linspace(-2.1, 2.1, 210),\n",
-        "xi,yi = np.meshgrid(xi, yi)\n",
-        "\n",
-        "df_predict = pd.DataFrame({\n",
-        "    'x' : xi.flatten(),\n",
-        "    'y' : yi.flatten(),\n",
-        "})\n",
-        "predict_shape = xi.shape"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "w0JnH4IhZuAb"
-      },
-      "outputs": [],
-      "source": [
-        "def plot_contour(x, y, z, **kwargs):\n",
-        "  # Grid the data.\n",
-        "  plt.figure(figsize=(10, 8))\n",
-        "  # Contour the gridded data, plotting dots at the nonuniform data points.\n",
-        "  CS = plt.contour(x, y, z, 15, linewidths=0.5, colors='k')\n",
-        "  CS = plt.contourf(x, y, z, 15,\n",
-        "                    vmax=abs(zi).max(), vmin=-abs(zi).max(), cmap='RdBu_r')\n",
-        "  plt.colorbar()  # Draw colorbar.\n",
-        "  # Plot data points.\n",
-        "  plt.xlim(-2, 2)\n",
-        "  plt.ylim(-2, 2)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "KF7WsIcYGF_E"
-      },
-      "source": [
-        "You can visualize the function. Redder colors correspond to larger function values."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "WrxuqaaXGFOK"
-      },
-      "outputs": [],
-      "source": [
-        "zi = griddata(x, y, z, xi, yi, interp='linear')\n",
-        "plot_contour(xi, yi, zi)\n",
-        "plt.scatter(df.x, df.y, marker='.')\n",
-        "plt.title('Contour on training data')\n",
-        "plt.show()"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "hoANr0f2GFrM"
-      },
-      "outputs": [],
-      "source": [
-        "fc = [tf.feature_column.numeric_column('x'),\n",
-        "      tf.feature_column.numeric_column('y')]"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "xVRWyoY3ayTK"
-      },
-      "outputs": [],
-      "source": [
-        "def predict(est):\n",
-        "  \"\"\"Predictions from a given estimator.\"\"\"\n",
-        "  predict_input_fn = lambda: tf.data.Dataset.from_tensors(dict(df_predict))\n",
-        "  preds = np.array([p['predictions'][0] for p in est.predict(predict_input_fn)])\n",
-        "  return preds.reshape(predict_shape)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "uyPu5618GU7K"
-      },
-      "source": [
-        "First let's try to fit a linear model to the data."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "zUIV2IVgGVSk"
-      },
-      "outputs": [],
-      "source": [
-        "train_input_fn = make_input_fn(df, df.z)\n",
-        "est = tf.estimator.LinearRegressor(fc)\n",
-        "est.train(train_input_fn, max_steps=500);"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "_u4WAcCqfbco"
-      },
-      "outputs": [],
-      "source": [
-        "plot_contour(xi, yi, predict(est))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "XD_fMAUtSCSa"
-      },
-      "source": [
-        "It's not a very good fit. Next let's try to fit a GBDT model to it and try to understand how the model fits the function."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "ka1GgvqmSCK7"
-      },
-      "outputs": [],
-      "source": [
-        "def create_bt_est(n_trees):\n",
-        "  return tf.estimator.BoostedTreesRegressor(fc,\n",
-        "                                            n_batches_per_layer=1,\n",
-        "                                            n_trees=n_trees)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "w0s86Kq1R_Fc"
-      },
-      "outputs": [],
-      "source": [
-        "N_TREES = [1,2,3,4,10,20,50,100]\n",
-        "for n in N_TREES:\n",
-        "  est = create_bt_est(n)\n",
-        "  est.train(train_input_fn, max_steps=500)\n",
-        "  plot_contour(xi, yi, predict(est))\n",
-        "  plt.text(-1.8, 2.1, '# trees: {}'.format(n), color='w', backgroundcolor='black', size=20)\n",
-        "plt.show()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "5WcZ9fubh1wT"
-      },
-      "source": [
-        "As you increase the number of trees, the model's predictions better approximates the underlying function."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "SMKoEZnCdrsp"
-      },
-      "source": [
-        "## Conclusion"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ZSZUSrjXdw9g"
-      },
-      "source": [
-        "In this tutorial you learned how to interpret Boosted Trees models using directional feature contributions and feature importance techniques. These techniques provide insight into how the features impact a model's predictions. Finally, you also gained intution for how a Boosted Tree model fits a complex function by viewing the decision surface for several models."
-      ]
-    }
-  ],
-  "metadata": {
-    "colab": {
-      "collapsed_sections": [],
-      "name": "boosted_trees_model_understanding.ipynb",
-      "toc_visible": true
-    },
-    "kernelspec": {
-      "display_name": "Python 3",
-      "name": "python3"
-    }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 0
-}
diff --git a/site/en/r1/tutorials/estimators/cnn.ipynb b/site/en/r1/tutorials/estimators/cnn.ipynb
deleted file mode 100644
index 6ce033f2d30..00000000000
--- a/site/en/r1/tutorials/estimators/cnn.ipynb
+++ /dev/null
@@ -1,973 +0,0 @@
-{
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Tce3stUlHN0L"
-      },
-      "source": [
-        "##### Copyright 2018 The TensorFlow Authors.\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "cellView": "form",
-        "id": "tuOe1ymfHZPu"
-      },
-      "outputs": [],
-      "source": [
-        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
-        "# you may not use this file except in compliance with the License.\n",
-        "# You may obtain a copy of the License at\n",
-        "#\n",
-        "# https://www.apache.org/licenses/LICENSE-2.0\n",
-        "#\n",
-        "# Unless required by applicable law or agreed to in writing, software\n",
-        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
-        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
-        "# See the License for the specific language governing permissions and\n",
-        "# limitations under the License."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "MfBg1C5NB3X0"
-      },
-      "source": [
-        "# Build a Convolutional Neural Network using Estimators\n",
-        "\n",
-        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/r1/tutorials/estimators/cnn.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
-        "  </td>\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/r1/tutorials/estimators/cnn.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
-        "  </td>\n",
-        "</table>"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "MfBg1C5NB3X0"
-      },
-      "source": [
-        "> Note: This is an archived TF1 notebook. These are configured\n",
-        "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
-        "but will run in TF1 as well. To use TF1 in Colab, use the\n",
-        "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
-        "magic."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "xHxb-dlhMIzW"
-      },
-      "source": [
-        "The `tf.layers` module provides a high-level API that makes\n",
-        "it easy to construct a neural network. It provides methods that facilitate the\n",
-        "creation of dense (fully connected) layers and convolutional layers, adding\n",
-        "activation functions, and applying dropout regularization. In this tutorial,\n",
-        "you'll learn how to use `layers` to build a convolutional neural network model\n",
-        "to recognize the handwritten digits in the MNIST data set.\n",
-        "\n",
-        "![handwritten digits 0–9 from the MNIST data set](https://www.tensorflow.org/images/mnist_0-9.png)\n",
-        "\n",
-        "The [MNIST dataset](http://yann.lecun.com/exdb/mnist/) comprises 60,000\n",
-        "training examples and 10,000 test examples of the handwritten digits 0–9,\n",
-        "formatted as 28x28-pixel monochrome images."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "wTe-6uXpP2Ts"
-      },
-      "source": [
-        "## Get Started\n",
-        "\n",
-        "Let's set up the imports for our TensorFlow program:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "6-tpguHLP6Rm"
-      },
-      "outputs": [],
-      "source": [
-        "import tensorflow.compat.v1 as tf\n",
-        "\n",
-        "import numpy as np\n",
-        "\n",
-        "tf.logging.set_verbosity(tf.logging.INFO)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "4j5yyyDFQgSB"
-      },
-      "source": [
-        "## Intro to Convolutional Neural Networks\n",
-        "\n",
-        "Convolutional neural networks (CNNs) are the current state-of-the-art model\n",
-        "architecture for image classification tasks. CNNs apply a series of filters to\n",
-        "the raw pixel data of an image to extract and learn higher-level features, which\n",
-        "the model can then use for classification. CNNs contains three components:\n",
-        "\n",
-        "*   **Convolutional layers**, which apply a specified number of convolution\n",
-        "    filters to the image. For each subregion, the layer performs a set of\n",
-        "    mathematical operations to produce a single value in the output feature map.\n",
-        "    Convolutional layers then typically apply a\n",
-        "    [ReLU activation function](https://en.wikipedia.org/wiki/Rectifier_\\(neural_networks\\)) to\n",
-        "    the output to introduce nonlinearities into the model.\n",
-        "\n",
-        "*   **Pooling layers**, which\n",
-        "    [downsample the image data](https://en.wikipedia.org/wiki/Convolutional_neural_network#Pooling_layer)\n",
-        "    extracted by the convolutional layers to reduce the dimensionality of the\n",
-        "    feature map in order to decrease processing time. A commonly used pooling\n",
-        "    algorithm is max pooling, which extracts subregions of the feature map\n",
-        "    (e.g., 2x2-pixel tiles), keeps their maximum value, and discards all other\n",
-        "    values.\n",
-        "\n",
-        "*   **Dense (fully connected) layers**, which perform classification on the\n",
-        "    features extracted by the convolutional layers and downsampled by the\n",
-        "    pooling layers. In a dense layer, every node in the layer is connected to\n",
-        "    every node in the preceding layer.\n",
-        "\n",
-        "Typically, a CNN is composed of a stack of convolutional modules that perform\n",
-        "feature extraction. Each module consists of a convolutional layer followed by a\n",
-        "pooling layer. The last convolutional module is followed by one or more dense\n",
-        "layers that perform classification. The final dense layer in a CNN contains a\n",
-        "single node for each target class in the model (all the possible classes the\n",
-        "model may predict), with a\n",
-        "[softmax](https://en.wikipedia.org/wiki/Softmax_function) activation function to\n",
-        "generate a value between 0–1 for each node (the sum of all these softmax values\n",
-        "is equal to 1). We can interpret the softmax values for a given image as\n",
-        "relative measurements of how likely it is that the image falls into each target\n",
-        "class.\n",
-        "\n",
-        "Note: For a more comprehensive walkthrough of CNN architecture, see Stanford University's [Convolutional Neural Networks for Visual Recognition course material](https://cs231n.github.io/convolutional-networks/)."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "j23E_Z0FQvZB"
-      },
-      "source": [
-        "## Building the CNN MNIST Classifier\n",
-        "\n",
-        "Let's build a model to classify the images in the MNIST dataset using the\n",
-        "following CNN architecture:\n",
-        "\n",
-        "1.  **Convolutional Layer #1**: Applies 32 5x5 filters (extracting 5x5-pixel\n",
-        "    subregions), with ReLU activation function\n",
-        "2.  **Pooling Layer #1**: Performs max pooling with a 2x2 filter and stride of 2\n",
-        "    (which specifies that pooled regions do not overlap)\n",
-        "3.  **Convolutional Layer #2**: Applies 64 5x5 filters, with ReLU activation\n",
-        "    function\n",
-        "4.  **Pooling Layer #2**: Again, performs max pooling with a 2x2 filter and\n",
-        "    stride of 2\n",
-        "5.  **Dense Layer #1**: 1,024 neurons, with dropout regularization rate of 0.4\n",
-        "    (probability of 0.4 that any given element will be dropped during training)\n",
-        "6.  **Dense Layer #2 (Logits Layer)**: 10 neurons, one for each digit target\n",
-        "    class (0–9).\n",
-        "\n",
-        "The `tf.layers` module contains methods to create each of the three layer types\n",
-        "above:\n",
-        "\n",
-        "*   `conv2d()`. Constructs a two-dimensional convolutional layer. Takes number\n",
-        "    of filters, filter kernel size, padding, and activation function as\n",
-        "    arguments.\n",
-        "*   `max_pooling2d()`. Constructs a two-dimensional pooling layer using the\n",
-        "    max-pooling algorithm. Takes pooling filter size and stride as arguments.\n",
-        "*   `dense()`. Constructs a dense layer. Takes number of neurons and activation\n",
-        "    function as arguments.\n",
-        "\n",
-        "Each of these methods accepts a tensor as input and returns a transformed tensor\n",
-        "as output. This makes it easy to connect one layer to another: just take the\n",
-        "output from one layer-creation method and supply it as input to another.\n",
-        "\n",
-        "Add the following `cnn_model_fn` function, which\n",
-        "conforms to the interface expected by TensorFlow's Estimator API (more on this\n",
-        "later in [Create the Estimator](#create-the-estimator)). This function takes\n",
-        "MNIST feature data, labels, and mode (from\n",
-        "`tf.estimator.ModeKeys`: `TRAIN`, `EVAL`, `PREDICT`) as arguments;\n",
-        "configures the CNN; and returns predictions, loss, and a training operation:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "gMR-_3rkRKPa"
-      },
-      "outputs": [],
-      "source": [
-        "def cnn_model_fn(features, labels, mode):\n",
-        "  \"\"\"Model function for CNN.\"\"\"\n",
-        "  # Input Layer\n",
-        "  input_layer = tf.reshape(features[\"x\"], [-1, 28, 28, 1])\n",
-        "\n",
-        "  # Convolutional Layer #1\n",
-        "  conv1 = tf.layers.conv2d(\n",
-        "      inputs=input_layer,\n",
-        "      filters=32,\n",
-        "      kernel_size=[5, 5],\n",
-        "      padding=\"same\",\n",
-        "      activation=tf.nn.relu)\n",
-        "\n",
-        "  # Pooling Layer #1\n",
-        "  pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)\n",
-        "\n",
-        "  # Convolutional Layer #2 and Pooling Layer #2\n",
-        "  conv2 = tf.layers.conv2d(\n",
-        "      inputs=pool1,\n",
-        "      filters=64,\n",
-        "      kernel_size=[5, 5],\n",
-        "      padding=\"same\",\n",
-        "      activation=tf.nn.relu)\n",
-        "  pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)\n",
-        "\n",
-        "  # Dense Layer\n",
-        "  pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])\n",
-        "  dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)\n",
-        "  dropout = tf.layers.dropout(\n",
-        "      inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)\n",
-        "\n",
-        "  # Logits Layer\n",
-        "  logits = tf.layers.dense(inputs=dropout, units=10)\n",
-        "\n",
-        "  predictions = {\n",
-        "      # Generate predictions (for PREDICT and EVAL mode)\n",
-        "      \"classes\": tf.argmax(input=logits, axis=1),\n",
-        "      # Add `softmax_tensor` to the graph. It is used for PREDICT and by the\n",
-        "      # `logging_hook`.\n",
-        "      \"probabilities\": tf.nn.softmax(logits, name=\"softmax_tensor\")\n",
-        "  }\n",
-        "\n",
-        "  if mode == tf.estimator.ModeKeys.PREDICT:\n",
-        "    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)\n",
-        "\n",
-        "  # Calculate Loss (for both TRAIN and EVAL modes)\n",
-        "  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)\n",
-        "\n",
-        "  # Configure the Training Op (for TRAIN mode)\n",
-        "  if mode == tf.estimator.ModeKeys.TRAIN:\n",
-        "    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)\n",
-        "    train_op = optimizer.minimize(\n",
-        "        loss=loss,\n",
-        "        global_step=tf.train.get_global_step())\n",
-        "    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)\n",
-        "\n",
-        "  # Add evaluation metrics (for EVAL mode)\n",
-        "  eval_metric_ops = {\n",
-        "      \"accuracy\": tf.metrics.accuracy(\n",
-        "          labels=labels, predictions=predictions[\"classes\"])\n",
-        "  }\n",
-        "  return tf.estimator.EstimatorSpec(\n",
-        "      mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "b7z8qC9FRSLB"
-      },
-      "source": [
-        "The following sections (with headings corresponding to each code block above)\n",
-        "dive deeper into the `tf.layers` code used to create each layer, as well as how\n",
-        "to calculate loss, configure the training op, and generate predictions. If\n",
-        "you're already experienced with CNNs and [TensorFlow `Estimator`s](../../guide/custom_estimators.md),\n",
-        "and find the above code intuitive, you may want to skim these sections or just\n",
-        "skip ahead to [\"Training and Evaluating the CNN MNIST Classifier\"](#train_eval_mnist)."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "sFBXEYRlRUWu"
-      },
-      "source": [
-        "### Input Layer\n",
-        "\n",
-        "The methods in the `layers` module for creating convolutional and pooling layers\n",
-        "for two-dimensional image data expect input tensors to have a shape of\n",
-        "<code>[<em>batch_size</em>, <em>image_height</em>, <em>image_width</em>,\n",
-        "<em>channels</em>]</code> by default. This behavior can be changed using the\n",
-        "<code><em>data_format</em></code> parameter; defined as follows:\n",
-        "\n",
-        "*   `batch_size` —Size of the subset of examples to use when performing\n",
-        "    gradient descent during training.\n",
-        "*   `image_height` —Height of the example images.\n",
-        "*   `image_width` —Width of the example images.\n",
-        "*   `channels` —Number of color channels in the example images. For color\n",
-        "    images, the number of channels is 3 (red, green, blue). For monochrome\n",
-        "    images, there is just 1 channel (black).\n",
-        "*   `data_format` —A string, one of `channels_last` (default) or `channels_first`.\n",
-        "      `channels_last` corresponds to inputs with shape\n",
-        "      `(batch, ..., channels)` while `channels_first` corresponds to\n",
-        "      inputs with shape `(batch, channels, ...)`.\n",
-        "\n",
-        "Here, our MNIST dataset is composed of monochrome 28x28 pixel images, so the\n",
-        "desired shape for our input layer is <code>[<em>batch_size</em>, 28, 28,\n",
-        "1]</code>.\n",
-        "\n",
-        "To convert our input feature map (`features`) to this shape, we can perform the\n",
-        "following `reshape` operation:\n",
-        "\n",
-        "```\n",
-        "input_layer = tf.reshape(features[\"x\"], [-1, 28, 28, 1])\n",
-        "```\n",
-        "\n",
-        "Note that we've indicated `-1` for batch size, which specifies that this\n",
-        "dimension should be dynamically computed based on the number of input values in\n",
-        "`features[\"x\"]`, holding the size of all other dimensions constant. This allows\n",
-        "us to treat `batch_size` as a hyperparameter that we can tune. For example, if\n",
-        "we feed examples into our model in batches of 5, `features[\"x\"]` will contain\n",
-        "3,920 values (one value for each pixel in each image), and `input_layer` will\n",
-        "have a shape of `[5, 28, 28, 1]`. Similarly, if we feed examples in batches of\n",
-        "100, `features[\"x\"]` will contain 78,400 values, and `input_layer` will have a\n",
-        "shape of `[100, 28, 28, 1]`."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "iU8Jr1_JRiKA"
-      },
-      "source": [
-        "### Convolutional Layer #1\n",
-        "\n",
-        "In our first convolutional layer, we want to apply 32 5x5 filters to the input\n",
-        "layer, with a ReLU activation function. We can use the `conv2d()` method in the\n",
-        "`layers` module to create this layer as follows:\n",
-        "\n",
-        "```\n",
-        "conv1 = tf.layers.conv2d(\n",
-        "    inputs=input_layer,\n",
-        "    filters=32,\n",
-        "    kernel_size=[5, 5],\n",
-        "    padding=\"same\",\n",
-        "    activation=tf.nn.relu)\n",
-        "```\n",
-        "\n",
-        "The `inputs` argument specifies our input tensor, which must have the shape\n",
-        "<code>[<em>batch_size</em>, <em>image_height</em>, <em>image_width</em>,\n",
-        "<em>channels</em>]</code>. Here, we're connecting our first convolutional layer\n",
-        "to `input_layer`, which has the shape <code>[<em>batch_size</em>, 28, 28,\n",
-        "1]</code>.\n",
-        "\n",
-        "Note: `conv2d()` will instead accept a shape of `[<em>batch_size</em>, <em>channels</em>, <em>image_height</em>, <em>image_width</em>]` when passed the argument `data_format=channels_first`.\n",
-        "\n",
-        "The `filters` argument specifies the number of filters to apply (here, 32), and\n",
-        "`kernel_size` specifies the dimensions of the filters as `[<em>height</em>,\n",
-        "<em>width</em>]</code> (here, <code>[5, 5]`).\n",
-        "\n",
-        "<p class=\"tip\"><b>TIP:</b> If filter height and width have the same value, you can instead specify a\n",
-        "single integer for <code>kernel_size</code>—e.g., <code>kernel_size=5</code>.</p>\n",
-        "\n",
-        "The `padding` argument specifies one of two enumerated values\n",
-        "(case-insensitive): `valid` (default value) or `same`. To specify that the\n",
-        "output tensor should have the same height and width values as the input tensor,\n",
-        "we set `padding=same` here, which instructs TensorFlow to add 0 values to the\n",
-        "edges of the input tensor to preserve height and width of 28. (Without padding,\n",
-        "a 5x5 convolution over a 28x28 tensor will produce a 24x24 tensor, as there are\n",
-        "24x24 locations to extract a 5x5 tile from a 28x28 grid.)\n",
-        "\n",
-        "The `activation` argument specifies the activation function to apply to the\n",
-        "output of the convolution. Here, we specify ReLU activation with\n",
-        "`tf.nn.relu`.\n",
-        "\n",
-        "Our output tensor produced by `conv2d()` has a shape of\n",
-        "<code>[<em>batch_size</em>, 28, 28, 32]</code>: the same height and width\n",
-        "dimensions as the input, but now with 32 channels holding the output from each\n",
-        "of the filters."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "8qzx1ZMFRqt_"
-      },
-      "source": [
-        "### Pooling Layer #1\n",
-        "\n",
-        "Next, we connect our first pooling layer to the convolutional layer we just\n",
-        "created. We can use the `max_pooling2d()` method in `layers` to construct a\n",
-        "layer that performs max pooling with a 2x2 filter and stride of 2:\n",
-        "\n",
-        "```\n",
-        "pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)\n",
-        "```\n",
-        "\n",
-        "Again, `inputs` specifies the input tensor, with a shape of\n",
-        "<code>[<em>batch_size</em>, <em>image_height</em>, <em>image_width</em>,\n",
-        "<em>channels</em>]</code>. Here, our input tensor is `conv1`, the output from\n",
-        "the first convolutional layer, which has a shape of <code>[<em>batch_size</em>,\n",
-        "28, 28, 32]</code>.\n",
-        "\n",
-        "Note: As with <code>conv2d()</code>, <code>max_pooling2d()</code> will instead\n",
-        "accept a shape of <code>[<em>batch_size</em>, <em>channels</em>,\n",
-        "<em>image_height</em>, <em>image_width</em>]</code> when passed the argument\n",
-        "<code>data_format=channels_first</code>.\n",
-        "\n",
-        "The `pool_size` argument specifies the size of the max pooling filter as\n",
-        "<code>[<em>height</em>, <em>width</em>]</code> (here, `[2, 2]`). If both\n",
-        "dimensions have the same value, you can instead specify a single integer (e.g.,\n",
-        "`pool_size=2`).\n",
-        "\n",
-        "The `strides` argument specifies the size of the stride. Here, we set a stride\n",
-        "of 2, which indicates that the subregions extracted by the filter should be\n",
-        "separated by 2 pixels in both the height and width dimensions (for a 2x2 filter,\n",
-        "this means that none of the regions extracted will overlap). If you want to set\n",
-        "different stride values for height and width, you can instead specify a tuple or\n",
-        "list (e.g., `stride=[3, 6]`).\n",
-        "\n",
-        "Our output tensor produced by `max_pooling2d()` (`pool1`) has a shape of\n",
-        "<code>[<em>batch_size</em>, 14, 14, 32]</code>: the 2x2 filter reduces height and width by 50% each."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "xXej53NlRzFh"
-      },
-      "source": [
-        "### Convolutional Layer #2 and Pooling Layer #2\n",
-        "\n",
-        "We can connect a second convolutional and pooling layer to our CNN using\n",
-        "`conv2d()` and `max_pooling2d()` as before. For convolutional layer #2, we\n",
-        "configure 64 5x5 filters with ReLU activation, and for pooling layer #2, we use\n",
-        "the same specs as pooling layer #1 (a 2x2 max pooling filter with stride of 2):\n",
-        "\n",
-        "```\n",
-        "conv2 = tf.layers.conv2d(\n",
-        "    inputs=pool1,\n",
-        "    filters=64,\n",
-        "    kernel_size=[5, 5],\n",
-        "    padding=\"same\",\n",
-        "    activation=tf.nn.relu)\n",
-        "\n",
-        "pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)\n",
-        "```\n",
-        "\n",
-        "Note that convolutional layer #2 takes the output tensor of our first pooling\n",
-        "layer (`pool1`) as input, and produces the tensor `conv2` as output. `conv2`\n",
-        "has a shape of <code>[<em>batch_size</em>, 14, 14, 64]</code>, the same height and width as `pool1` (due to `padding=\"same\"`), and 64 channels for the 64\n",
-        "filters applied.\n",
-        "\n",
-        "Pooling layer #2 takes `conv2` as input, producing `pool2` as output. `pool2`\n",
-        "has shape <code>[<em>batch_size</em>, 7, 7, 64]</code> (50% reduction of height and width from `conv2`)."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "jjmLqVP7R7z6"
-      },
-      "source": [
-        "### Dense Layer\n",
-        "\n",
-        "Next, we want to add a dense layer (with 1,024 neurons and ReLU activation) to\n",
-        "our CNN to perform classification on the features extracted by the\n",
-        "convolution/pooling layers. Before we connect the layer, however, we'll flatten\n",
-        "our feature map (`pool2`) to shape <code>[<em>batch_size</em>,\n",
-        "<em>features</em>]</code>, so that our tensor has only two dimensions:\n",
-        "\n",
-        "```\n",
-        "pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])\n",
-        "```\n",
-        "\n",
-        "In the `reshape()` operation above, the `-1` signifies that the *`batch_size`*\n",
-        "dimension will be dynamically calculated based on the number of examples in our\n",
-        "input data. Each example has 7 (`pool2` height) * 7 (`pool2` width) * 64\n",
-        "(`pool2` channels) features, so we want the `features` dimension to have a value\n",
-        "of 7 * 7 * 64 (3136 in total). The output tensor, `pool2_flat`, has shape\n",
-        "<code>[<em>batch_size</em>, 3136]</code>.\n",
-        "\n",
-        "Now, we can use the `dense()` method in `layers` to connect our dense layer as\n",
-        "follows:\n",
-        "\n",
-        "```\n",
-        "dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)\n",
-        "```\n",
-        "\n",
-        "The `inputs` argument specifies the input tensor: our flattened feature map,\n",
-        "`pool2_flat`. The `units` argument specifies the number of neurons in the dense\n",
-        "layer (1,024). The `activation` argument takes the activation function; again,\n",
-        "we'll use `tf.nn.relu` to add ReLU activation.\n",
-        "\n",
-        "To help improve the results of our model, we also apply dropout regularization\n",
-        "to our dense layer, using the `dropout` method in `layers`:\n",
-        "\n",
-        "```\n",
-        "dropout = tf.layers.dropout(\n",
-        "    inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)\n",
-        "```\n",
-        "\n",
-        "Again, `inputs` specifies the input tensor, which is the output tensor from our\n",
-        "dense layer (`dense`).\n",
-        "\n",
-        "The `rate` argument specifies the dropout rate; here, we use `0.4`, which means\n",
-        "40% of the elements will be randomly dropped out during training.\n",
-        "\n",
-        "The `training` argument takes a boolean specifying whether or not the model is\n",
-        "currently being run in training mode; dropout will only be performed if\n",
-        "`training` is `True`. Here, we check if the `mode` passed to our model function\n",
-        "`cnn_model_fn` is `TRAIN` mode.\n",
-        "\n",
-        "Our output tensor `dropout` has shape <code>[<em>batch_size</em>, 1024]</code>."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "rzUcwkCZSTF7"
-      },
-      "source": [
-        "### Logits Layer\n",
-        "\n",
-        "The final layer in our neural network is the logits layer, which will return the\n",
-        "raw values for our predictions. We create a dense layer with 10 neurons (one for\n",
-        "each target class 0–9), with linear activation (the default):\n",
-        "\n",
-        "```\n",
-        "logits = tf.layers.dense(inputs=dropout, units=10)\n",
-        "```\n",
-        "\n",
-        "Our final output tensor of the CNN, `logits`, has shape `[batch_size, 10]`."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "y3uJ0V1KSakc"
-      },
-      "source": [
-        "### Generate Predictions {#generate_predictions}\n",
-        "\n",
-        "The logits layer of our model returns our predictions as raw values in a\n",
-        "<code>[<em>batch_size</em>, 10]</code>-dimensional tensor. Let's convert these\n",
-        "raw values into two different formats that our model function can return:\n",
-        "\n",
-        "*   The **predicted class** for each example: a digit from 0–9.\n",
-        "*   The **probabilities** for each possible target class for each example: the\n",
-        "    probability that the example is a 0, is a 1, is a 2, etc.\n",
-        "\n",
-        "For a given example, our predicted class is the element in the corresponding row\n",
-        "of the logits tensor with the highest raw value. We can find the index of this\n",
-        "element using the `tf.argmax`\n",
-        "function:\n",
-        "\n",
-        "```\n",
-        "tf.argmax(input=logits, axis=1)\n",
-        "```\n",
-        "\n",
-        "The `input` argument specifies the tensor from which to extract maximum\n",
-        "values—here `logits`. The `axis` argument specifies the axis of the `input`\n",
-        "tensor along which to find the greatest value. Here, we want to find the largest\n",
-        "value along the dimension with index of 1, which corresponds to our predictions\n",
-        "(recall that our logits tensor has shape <code>[<em>batch_size</em>,\n",
-        "10]</code>).\n",
-        "\n",
-        "We can derive probabilities from our logits layer by applying softmax activation\n",
-        "using `tf.nn.softmax`:\n",
-        "\n",
-        "```\n",
-        "tf.nn.softmax(logits, name=\"softmax_tensor\")\n",
-        "```\n",
-        "\n",
-        "Note: We use the `name` argument to explicitly name this operation `softmax_tensor`, so we can reference it later. (We'll set up logging for the softmax values in [\"Set Up a Logging Hook\"](#set-up-a-logging-hook)).\n",
-        "\n",
-        "We compile our predictions in a dict, and return an `EstimatorSpec` object:\n",
-        "\n",
-        "```\n",
-        "predictions = {\n",
-        "    \"classes\": tf.argmax(input=logits, axis=1),\n",
-        "    \"probabilities\": tf.nn.softmax(logits, name=\"softmax_tensor\")\n",
-        "}\n",
-        "if mode == tf.estimator.ModeKeys.PREDICT:\n",
-        "  return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)\n",
-        "```"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "f2ks_tqSSucg"
-      },
-      "source": [
-        "### Calculate Loss {#calculating-loss}\n",
-        "\n",
-        "For both training and evaluation, we need to define a\n",
-        "[loss function](https://en.wikipedia.org/wiki/Loss_function)\n",
-        "that measures how closely the model's predictions match the target classes. For\n",
-        "multiclass classification problems like MNIST,\n",
-        "[cross entropy](https://en.wikipedia.org/wiki/Cross_entropy) is typically used\n",
-        "as the loss metric. The following code calculates cross entropy when the model\n",
-        "runs in either `TRAIN` or `EVAL` mode:\n",
-        "\n",
-        "```\n",
-        "loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)\n",
-        "```\n",
-        "\n",
-        "Let's take a closer look at what's happening above.\n",
-        "\n",
-        "Our `labels` tensor contains a list of prediction indices for our examples, e.g. `[1,\n",
-        "9, ...]`. `logits` contains the linear outputs of our last layer.\n",
-        "\n",
-        "`tf.losses.sparse_softmax_cross_entropy`, calculates the softmax crossentropy\n",
-        "(aka: categorical crossentropy, negative log-likelihood) from these two inputs\n",
-        "in an efficient, numerically stable way."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "YgE7Ll3pS2FG"
-      },
-      "source": [
-        "### Configure the Training Op\n",
-        "\n",
-        "In the previous section, we defined loss for our CNN as the softmax\n",
-        "cross-entropy of the logits layer and our labels. Let's configure our model to\n",
-        "optimize this loss value during training. We'll use a learning rate of 0.001 and\n",
-        "[stochastic gradient descent](https://en.wikipedia.org/wiki/Stochastic_gradient_descent)\n",
-        "as the optimization algorithm:\n",
-        "\n",
-        "```\n",
-        "if mode == tf.estimator.ModeKeys.TRAIN:\n",
-        "  optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)\n",
-        "  train_op = optimizer.minimize(\n",
-        "      loss=loss,\n",
-        "      global_step=tf.train.get_global_step())\n",
-        "  return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)\n",
-        "```"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "rEJPnXAzS6m9"
-      },
-      "source": [
-        "Note: For a more in-depth look at configuring training ops for Estimator model functions, see [\"Defining the training op for the model\"](../../guide/custom_estimators.md#defining-the-training-op-for-the-model) in the [\"Creating Estimations in tf.estimator\"](../../guide/custom_estimators.md) tutorial."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "QQuGDWvHTAib"
-      },
-      "source": [
-        "### Add evaluation metrics\n",
-        "\n",
-        "To add accuracy metric in our model, we define `eval_metric_ops` dict in EVAL\n",
-        "mode as follows:\n",
-        "\n",
-        "```\n",
-        "eval_metric_ops = {\n",
-        "    \"accuracy\": tf.metrics.accuracy(\n",
-        "        labels=labels, predictions=predictions[\"classes\"])\n",
-        "}\n",
-        "return tf.estimator.EstimatorSpec(\n",
-        "    mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)\n",
-        "```"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Y2Bwe-AdTRzX"
-      },
-      "source": [
-        "<a id=\"train_eval_mnist\"></a>\n",
-        "## Training and Evaluating the CNN MNIST Classifier\n",
-        "\n",
-        "We've coded our MNIST CNN model function; now we're ready to train and evaluate\n",
-        "it."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "6EC9aOY2TTLU"
-      },
-      "source": [
-        "### Load Training and Test Data\n",
-        "\n",
-        "First, let's load our training and test data with the following code:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "ccobb0qETV-S"
-      },
-      "outputs": [],
-      "source": [
-        "# Load training and eval data\n",
-        "((train_data, train_labels),\n",
-        " (eval_data, eval_labels)) = tf.keras.datasets.mnist.load_data()\n",
-        "\n",
-        "train_data = train_data/np.float32(255)\n",
-        "train_labels = train_labels.astype(np.int32)  # not required\n",
-        "\n",
-        "eval_data = eval_data/np.float32(255)\n",
-        "eval_labels = eval_labels.astype(np.int32)  # not required"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "8l84-IxSTZnO"
-      },
-      "source": [
-        "We store the training feature data (the raw pixel values for 55,000 images of\n",
-        "hand-drawn digits) and training labels (the corresponding value from 0–9 for\n",
-        "each image) as [numpy\n",
-        "arrays](https://docs.scipy.org/doc/numpy/reference/generated/numpy.array.html)\n",
-        "in `train_data` and `train_labels`, respectively. Similarly, we store the\n",
-        "evaluation feature data (10,000 images) and evaluation labels in `eval_data`\n",
-        "and `eval_labels`, respectively."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "S2_Isc7kTa45"
-      },
-      "source": [
-        "### Create the Estimator {#create-the-estimator}\n",
-        "\n",
-        "Next, let's create an `Estimator` (a TensorFlow class for performing high-level\n",
-        "model training, evaluation, and inference) for our model. Add the following code\n",
-        "to `main()`:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "yjC6HdwZTdg4"
-      },
-      "outputs": [],
-      "source": [
-        "# Create the Estimator\n",
-        "mnist_classifier = tf.estimator.Estimator(\n",
-        "    model_fn=cnn_model_fn, model_dir=\"/tmp/mnist_convnet_model\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "f78EBcg7TfTU"
-      },
-      "source": [
-        "The `model_fn` argument specifies the model function to use for training,\n",
-        "evaluation, and prediction; we pass it the `cnn_model_fn` we created in\n",
-        "[\"Building the CNN MNIST Classifier.\"](#building-the-cnn-mnist-classifier) The\n",
-        "`model_dir` argument specifies the directory where model data (checkpoints) will\n",
-        "be saved (here, we specify the temp directory `/tmp/mnist_convnet_model`, but\n",
-        "feel free to change to another directory of your choice).\n",
-        "\n",
-        "Note: For an in-depth walkthrough of the TensorFlow `Estimator` API, see the tutorial [Creating Estimators in tf.estimator](../../guide/custom_estimators.md)."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "_6ow7hVYTm3f"
-      },
-      "source": [
-        "### Set Up a Logging Hook {#set_up_a_logging_hook}\n",
-        "\n",
-        "Since CNNs can take a while to train, let's set up some logging so we can track\n",
-        "progress during training. We can use TensorFlow's `tf.train.SessionRunHook` to create a\n",
-        "`tf.train.LoggingTensorHook`\n",
-        "that will log the probability values from the softmax layer of our CNN. Add the\n",
-        "following to `main()`:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "S6T10kssTpdz"
-      },
-      "outputs": [],
-      "source": [
-        "# Set up logging for predictions\n",
-        "tensors_to_log = {\"probabilities\": \"softmax_tensor\"}\n",
-        "\n",
-        "logging_hook = tf.train.LoggingTensorHook(\n",
-        "    tensors=tensors_to_log, every_n_iter=50)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "RZdtZ6JQTsmg"
-      },
-      "source": [
-        "We store a dict of the tensors we want to log in `tensors_to_log`. Each key is a\n",
-        "label of our choice that will be printed in the log output, and the\n",
-        "corresponding label is the name of a `Tensor` in the TensorFlow graph. Here, our\n",
-        "`probabilities` can be found in `softmax_tensor`, the name we gave our softmax\n",
-        "operation earlier when we generated the probabilities in `cnn_model_fn`.\n",
-        "\n",
-        "Note: If you don't explicitly assign a name to an operation via the `name` argument, TensorFlow will assign a default name. A couple easy ways to discover the names applied to operations are to visualize your graph on [TensorBoard](../../guide/graph_viz.md)) or to enable the [TensorFlow Debugger (tfdbg)](../../guide/debugger.md).\n",
-        "\n",
-        "Next, we create the `LoggingTensorHook`, passing `tensors_to_log` to the\n",
-        "`tensors` argument. We set `every_n_iter=50`, which specifies that probabilities\n",
-        "should be logged after every 50 steps of training."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "brVs1dRMT0NM"
-      },
-      "source": [
-        "### Train the Model\n",
-        "\n",
-        "Now we're ready to train our model, which we can do by creating `train_input_fn`\n",
-        "and calling `train()` on `mnist_classifier`. In the `numpy_input_fn` call, we pass the training feature data and labels to\n",
-        "`x` (as a dict) and `y`, respectively. We set a `batch_size` of `100` (which\n",
-        "means that the model will train on minibatches of 100 examples at each step).\n",
-        "`num_epochs=None` means that the model will train until the specified number of\n",
-        "steps is reached. We also set `shuffle=True` to shuffle the training data. Then train the model a single step and log the output:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "h-dewpleT2sk"
-      },
-      "outputs": [],
-      "source": [
-        "# Train the model\n",
-        "train_input_fn = tf.estimator.inputs.numpy_input_fn(\n",
-        "    x={\"x\": train_data},\n",
-        "    y=train_labels,\n",
-        "    batch_size=100,\n",
-        "    num_epochs=None,\n",
-        "    shuffle=True)\n",
-        "\n",
-        "# train one step and display the probabilties\n",
-        "mnist_classifier.train(\n",
-        "    input_fn=train_input_fn,\n",
-        "    steps=1,\n",
-        "    hooks=[logging_hook])"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "gyNSE3e-14Lq"
-      },
-      "source": [
-        "Now—without logging each step—set `steps=1000` to train the model longer, but in a reasonable time to run this example. Training CNNs is computationally intensive. To increase the accuracy of your model, increase the number of `steps` passed to `train()`, like 20,000 steps."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "cri6zqcf2IXY"
-      },
-      "outputs": [],
-      "source": [
-        "mnist_classifier.train(input_fn=train_input_fn, steps=1000)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "4bQdkLMeUE5U"
-      },
-      "source": [
-        "### Evaluate the Model\n",
-        "\n",
-        "Once training is complete, we want to evaluate our model to determine its\n",
-        "accuracy on the MNIST test set. We call the `evaluate` method, which evaluates\n",
-        "the metrics we specified in `eval_metric_ops` argument in the `model_fn`.\n",
-        "Add the following to `main()`:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "I0RGiqd0UF0N"
-      },
-      "outputs": [],
-      "source": [
-        "eval_input_fn = tf.estimator.inputs.numpy_input_fn(\n",
-        "    x={\"x\": eval_data},\n",
-        "    y=eval_labels,\n",
-        "    num_epochs=1,\n",
-        "    shuffle=False)\n",
-        "\n",
-        "eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)\n",
-        "print(eval_results)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "JIBVID6dUIXT"
-      },
-      "source": [
-        "To create `eval_input_fn`, we set `num_epochs=1`, so that the model evaluates\n",
-        "the metrics over one epoch of data and returns the result. We also set\n",
-        "`shuffle=False` to iterate through the data sequentially."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "htmLZ-zEUZZk"
-      },
-      "source": [
-        "## Additional Resources\n",
-        "\n",
-        "To learn more about TensorFlow Estimators and CNNs in TensorFlow, see the\n",
-        "following resources:\n",
-        "\n",
-        "*   [Creating Estimators in tf.estimator](../../guide/custom_estimators.md)\n",
-        "    provides an introduction to the TensorFlow Estimator API. It walks through\n",
-        "    configuring an Estimator, writing a model function, calculating loss, and\n",
-        "    defining a training op.\n",
-        "*   [Advanced Convolutional Neural Networks](../../tutorials/images/deep_cnn.md) walks through how to build a MNIST CNN classification model\n",
-        "    *without estimators* using lower-level TensorFlow operations."
-      ]
-    }
-  ],
-  "metadata": {
-    "colab": {
-      "collapsed_sections": [
-        "Tce3stUlHN0L"
-      ],
-      "name": "cnn.ipynb",
-      "toc_visible": true
-    },
-    "kernelspec": {
-      "display_name": "Python 3",
-      "name": "python3"
-    }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 0
-}
diff --git a/site/en/r1/tutorials/estimators/linear.ipynb b/site/en/r1/tutorials/estimators/linear.ipynb
deleted file mode 100644
index 4155e0974a1..00000000000
--- a/site/en/r1/tutorials/estimators/linear.ipynb
+++ /dev/null
@@ -1,1260 +0,0 @@
-{
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "MWW1TyjaecRh"
-      },
-      "source": [
-        "##### Copyright 2018 The TensorFlow Authors."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "cellView": "form",
-        "id": "mOtR1FzCef-u"
-      },
-      "outputs": [],
-      "source": [
-        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
-        "# you may not use this file except in compliance with the License.\n",
-        "# You may obtain a copy of the License at\n",
-        "#\n",
-        "# https://www.apache.org/licenses/LICENSE-2.0\n",
-        "#\n",
-        "# Unless required by applicable law or agreed to in writing, software\n",
-        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
-        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
-        "# See the License for the specific language governing permissions and\n",
-        "# limitations under the License."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Zr7KpBhMcYvE"
-      },
-      "source": [
-        "# Build a linear model with Estimators"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "uJl4gaPFzxQz"
-      },
-      "source": [
-        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/r1/tutorials/estimators/linear.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
-        "  </td>\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/r1/tutorials/estimators/linear.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
-        "  </td>\n",
-        "</table>"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "uJl4gaPFzxQy"
-      },
-      "source": [
-        "> Note: This is an archived TF1 notebook. These are configured\n",
-        "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
-        "but will run in TF1 as well. To use TF1 in Colab, use the\n",
-        "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
-        "magic."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "77aETSYDcdoK"
-      },
-      "source": [
-        "This tutorial uses the `tf.estimator` API in TensorFlow to solve a benchmark binary classification problem. Estimators are TensorFlow's most scalable and production-oriented model type. For more information see the [Estimator guide](https://www.tensorflow.org/r1/guide/estimators).\n",
-        "\n",
-        "## Overview\n",
-        "\n",
-        "Using census data which contains data about a person's age, education, marital status, and occupation (the *features*), you will try to predict whether or not the person earns more than 50,000 dollars a year (the target *label*). You will train a *logistic regression* model that, given an individual's information, outputs a number between 0 and 1—this can be interpreted as the probability that the individual has an annual income of over 50,000 dollars.\n",
-        "\n",
-        "Key Point: As a modeler and developer, think about how this data is used and the potential benefits and harm a model's predictions can cause. A model like this could reinforce societal biases and disparities. Is each  feature relevant to the problem you want to solve or will it introduce bias? For more information, read about [ML fairness](https://developers.google.com/machine-learning/fairness-overview/).\n",
-        "\n",
-        "## Setup\n",
-        "\n",
-        "Import TensorFlow, feature column support, and supporting modules:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "NQgONe5ecYvE"
-      },
-      "outputs": [],
-      "source": [
-        "import tensorflow.compat.v1 as tf\n",
-        "\n",
-        "import tensorflow.feature_column as fc\n",
-        "\n",
-        "import os\n",
-        "import sys\n",
-        "\n",
-        "import matplotlib.pyplot as plt\n",
-        "from IPython.display import clear_output"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Rpb1JSMj1nqk"
-      },
-      "source": [
-        "And let's enable [eager execution](https://www.tensorflow.org/r1/guide/eager) to inspect this program as you run it:"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "-MPr95UccYvL"
-      },
-      "source": [
-        "## Download the official implementation\n",
-        "\n",
-        "You will use the [wide and deep model](https://github.com/tensorflow/models/tree/master/official/r1/wide_deep/) available in TensorFlow's [model repository](https://github.com/tensorflow/models/). Download the code, add the root directory to your Python path, and jump to the `wide_deep` directory:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "tTwQzWcn8aBu"
-      },
-      "outputs": [],
-      "source": [
-        "! pip install requests\n",
-        "! git clone --depth 1 --branch r2.1.0 https://github.com/tensorflow/models"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "sRpuysc73Eb-"
-      },
-      "source": [
-        "Add the root directory of the repository to your Python path:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "yVvFyhnkcYvL"
-      },
-      "outputs": [],
-      "source": [
-        "models_path = os.path.join(os.getcwd(), 'models')\n",
-        "\n",
-        "sys.path.append(models_path)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "15Ethw-wcYvP"
-      },
-      "source": [
-        "Download the dataset:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "6QilS4-0cYvQ"
-      },
-      "outputs": [],
-      "source": [
-        "from official.r1.wide_deep import census_dataset\n",
-        "from official.r1.wide_deep import census_main\n",
-        "\n",
-        "census_dataset.download(\"/tmp/census_data/\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "cD5e3ibAcYvS"
-      },
-      "source": [
-        "### Command line usage\n",
-        "\n",
-        "The repo includes a complete program for experimenting with this type of model.\n",
-        "\n",
-        "To execute the tutorial code from the command line first add the path to tensorflow/models to your `PYTHONPATH`."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "DYOkY8boUptJ"
-      },
-      "outputs": [],
-      "source": [
-        "#export PYTHONPATH=${PYTHONPATH}:\"$(pwd)/models\"\n",
-        "#running from python you need to set the `os.environ` or the subprocess will not see the directory.\n",
-        "\n",
-        "if \"PYTHONPATH\" in os.environ:\n",
-        "  os.environ['PYTHONPATH'] += os.pathsep +  models_path\n",
-        "else:\n",
-        "  os.environ['PYTHONPATH'] = models_path"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "5r0V9YUMUyoh"
-      },
-      "source": [
-        "Use `--help` to see what command line options are available:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "1_3tBaLW4YM4"
-      },
-      "outputs": [],
-      "source": [
-        "!python -m official.r1.wide_deep.census_main --help"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "RrMLazEN6DMj"
-      },
-      "source": [
-        "Now run the model:\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "py7MarZl5Yh6"
-      },
-      "outputs": [],
-      "source": [
-        "!python -m official.r1.wide_deep.census_main --model_type=wide --train_epochs=2"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "AmZ4CpaOcYvV"
-      },
-      "source": [
-        "## Read the U.S. Census data\n",
-        "\n",
-        "This example uses the [U.S Census Income Dataset](https://archive.ics.uci.edu/ml/datasets/Census+Income) from 1994 and 1995. The [census_dataset.py](https://github.com/tensorflow/models/tree/master/official/r1/wide_deep/census_dataset.py) script is provided to download the data and perform a little cleanup.\n",
-        "\n",
-        "Since the task is a *binary classification problem*, you will construct a label column named \"label\" whose value is 1 if the income is over 50K, and 0 otherwise. For reference, see the `input_fn` in [census_main.py](https://github.com/tensorflow/models/tree/master/official/r1/wide_deep/census_main.py).\n",
-        "\n",
-        "Let's look at the data to see which columns you can use to predict the target label:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "N6Tgye8bcYvX"
-      },
-      "outputs": [],
-      "source": [
-        "!ls  /tmp/census_data/"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "6y3mj9zKcYva"
-      },
-      "outputs": [],
-      "source": [
-        "train_file = \"/tmp/census_data/adult.data\"\n",
-        "test_file = \"/tmp/census_data/adult.test\""
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "EO_McKgE5il2"
-      },
-      "source": [
-        "[pandas](https://pandas.pydata.org/) provides some convenient utilities for data analysis. Here's a list of columns available in the Census Income dataset:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "vkn1FNmpcYvb"
-      },
-      "outputs": [],
-      "source": [
-        "import pandas\n",
-        "\n",
-        "train_df = pandas.read_csv(train_file, names=census_dataset._CSV_COLUMNS)\n",
-        "test_df = pandas.read_csv(test_file, names=census_dataset._CSV_COLUMNS)\n",
-        "\n",
-        "train_df.head()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "QZZtXes4cYvf"
-      },
-      "source": [
-        "The columns are grouped into two types: *categorical* and *continuous* columns:\n",
-        "\n",
-        "* A column is called *categorical* if its value can only be one of the categories in a finite set. For example, the relationship status of a person (wife, husband, unmarried, etc.) or the education level (high school, college, etc.) are categorical columns.\n",
-        "* A column is called *continuous* if its value can be any numerical value in a continuous range. For example, the capital gain of a person (e.g. $14,084) is a continuous column.\n",
-        "\n",
-        "## Converting Data into Tensors\n",
-        "\n",
-        "When building a `tf.estimator` model, the input data is specified by using an *input function* (or `input_fn`). This builder function returns a `tf.data.Dataset` of batches of `(features-dict, label)` pairs. It is not called until it is passed to `tf.estimator.Estimator` methods such as `train` and `evaluate`.\n",
-        "\n",
-        "The input builder function returns the following pair:\n",
-        "\n",
-        "1. `features`: A dict from feature names to `Tensors` or `SparseTensors` containing batches of features.\n",
-        "2. `labels`: A `Tensor` containing batches of labels.\n",
-        "\n",
-        "The keys of the `features` are used to configure the model's input layer.\n",
-        "\n",
-        "Note: The input function is called while constructing the TensorFlow graph, *not* while running the graph. It is returning a representation of the input data as a sequence of TensorFlow graph operations.\n",
-        "\n",
-        "For small problems like this, it's easy to make a `tf.data.Dataset` by slicing the `pandas.DataFrame`:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "N7zNJflKcYvg"
-      },
-      "outputs": [],
-      "source": [
-        "def easy_input_function(df, label_key, num_epochs, shuffle, batch_size):\n",
-        "  label = df[label_key]\n",
-        "  ds = tf.data.Dataset.from_tensor_slices((dict(df),label))\n",
-        "\n",
-        "  if shuffle:\n",
-        "    ds = ds.shuffle(10000)\n",
-        "\n",
-        "  ds = ds.batch(batch_size).repeat(num_epochs)\n",
-        "\n",
-        "  return ds"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "WeEgNR9AcYvh"
-      },
-      "source": [
-        "Since you have eager execution enabled, it's easy to inspect the resulting dataset:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "ygaKuikecYvi"
-      },
-      "outputs": [],
-      "source": [
-        "ds = easy_input_function(train_df, label_key='income_bracket', num_epochs=5, shuffle=True, batch_size=10)\n",
-        "\n",
-        "for feature_batch, label_batch in ds.take(1):\n",
-        "  print('Some feature keys:', list(feature_batch.keys())[:5])\n",
-        "  print()\n",
-        "  print('A batch of Ages  :', feature_batch['age'])\n",
-        "  print()\n",
-        "  print('A batch of Labels:', label_batch )"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "O_KZxQUucYvm"
-      },
-      "source": [
-        "But this approach has severly-limited scalability. Larger datasets should be streamed from disk. The `census_dataset.input_fn` provides an example of how to do this using `tf.decode_csv` and `tf.data.TextLineDataset`:\n",
-        "\n",
-        "<!-- TODO(markdaoust): This `input_fn` should use `tf.contrib.data.make_csv_dataset` -->"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "vUTeXaEUcYvn"
-      },
-      "outputs": [],
-      "source": [
-        "import inspect\n",
-        "print(inspect.getsource(census_dataset.input_fn))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "yyGcv_e-cYvq"
-      },
-      "source": [
-        "This `input_fn` returns equivalent output:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Mv3as_CEcYvu"
-      },
-      "outputs": [],
-      "source": [
-        "ds = census_dataset.input_fn(train_file, num_epochs=5, shuffle=True, batch_size=10)\n",
-        "\n",
-        "for feature_batch, label_batch in ds.take(1):\n",
-        "  print('Feature keys:', list(feature_batch.keys())[:5])\n",
-        "  print()\n",
-        "  print('Age batch   :', feature_batch['age'])\n",
-        "  print()\n",
-        "  print('Label batch :', label_batch )"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "810fnfY5cYvz"
-      },
-      "source": [
-        "Because `Estimators` expect an `input_fn` that takes no arguments, you typically wrap configurable input function into an object with the expected signature. For this notebook configure the `train_inpf` to iterate over the data twice:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "wnQdpEcVcYv0"
-      },
-      "outputs": [],
-      "source": [
-        "import functools\n",
-        "\n",
-        "train_inpf = functools.partial(census_dataset.input_fn, train_file, num_epochs=2, shuffle=True, batch_size=64)\n",
-        "test_inpf = functools.partial(census_dataset.input_fn, test_file, num_epochs=1, shuffle=False, batch_size=64)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "pboNpNWhcYv4"
-      },
-      "source": [
-        "## Selecting and Engineering Features for the Model\n",
-        "\n",
-        "Estimators use a system called [feature columns](https://www.tensorflow.org/r1/guide/feature_columns) to describe how the model should interpret each of the raw input features. An Estimator expects a vector of numeric inputs, and feature columns describe how the model should convert each feature.\n",
-        "\n",
-        "Selecting and crafting the right set of feature columns is key to learning an effective model. A *feature column* can be either one of the raw inputs in the original features `dict` (a *base feature column*), or any new columns created using transformations defined over one or multiple base columns (a *derived feature columns*).\n",
-        "\n",
-        "A feature column is an abstract concept of any raw or derived variable that can be used to predict the target label."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "_hh-cWdU__Lq"
-      },
-      "source": [
-        "### Base Feature Columns"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "BKz6LA8_ACI7"
-      },
-      "source": [
-        "#### Numeric columns\n",
-        "\n",
-        "The simplest `feature_column` is `numeric_column`. This indicates that a feature is a numeric value that should be input to the model directly. For example:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "ZX0r2T5OcYv6"
-      },
-      "outputs": [],
-      "source": [
-        "age = fc.numeric_column('age')"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "tnLUiaHxcYv-"
-      },
-      "source": [
-        "The model will use the `feature_column` definitions to build the model input. You can inspect the resulting output using the `input_layer` function:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "kREtIPfwcYv_"
-      },
-      "outputs": [],
-      "source": [
-        "fc.input_layer(feature_batch, [age]).numpy()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "OPuLduCucYwD"
-      },
-      "source": [
-        "The following will train and evaluate a model using only the `age` feature:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "9R5eSJ1pcYwE"
-      },
-      "outputs": [],
-      "source": [
-        "classifier = tf.estimator.LinearClassifier(feature_columns=[age])\n",
-        "classifier.train(train_inpf)\n",
-        "result = classifier.evaluate(test_inpf)\n",
-        "\n",
-        "clear_output()  # used for display in notebook\n",
-        "print(result)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "YDZGcdTdcYwI"
-      },
-      "source": [
-        "Similarly, you can define a `NumericColumn` for each continuous feature column\n",
-        "that you want to use in the model:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "uqPbUqlxcYwJ"
-      },
-      "outputs": [],
-      "source": [
-        "education_num = tf.feature_column.numeric_column('education_num')\n",
-        "capital_gain = tf.feature_column.numeric_column('capital_gain')\n",
-        "capital_loss = tf.feature_column.numeric_column('capital_loss')\n",
-        "hours_per_week = tf.feature_column.numeric_column('hours_per_week')\n",
-        "\n",
-        "my_numeric_columns = [age,education_num, capital_gain, capital_loss, hours_per_week]\n",
-        "\n",
-        "fc.input_layer(feature_batch, my_numeric_columns).numpy()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "cBGDN97IcYwQ"
-      },
-      "source": [
-        "You could retrain a model on these features by changing the `feature_columns` argument to the constructor:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "XN8k5S95cYwR"
-      },
-      "outputs": [],
-      "source": [
-        "classifier = tf.estimator.LinearClassifier(feature_columns=my_numeric_columns)\n",
-        "classifier.train(train_inpf)\n",
-        "\n",
-        "result = classifier.evaluate(test_inpf)\n",
-        "\n",
-        "clear_output()\n",
-        "\n",
-        "for key,value in sorted(result.items()):\n",
-        "  print('%s: %s' % (key, value))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "jBRq9_AzcYwU"
-      },
-      "source": [
-        "#### Categorical columns\n",
-        "\n",
-        "To define a feature column for a categorical feature, create a `CategoricalColumn` using one of the `tf.feature_column.categorical_column*` functions.\n",
-        "\n",
-        "If you know the set of all possible feature values of a column—and there are only a few of them—use `categorical_column_with_vocabulary_list`. Each key in the list is assigned an auto-incremented ID starting from 0. For example, for the `relationship` column you can assign the feature string `Husband` to an integer ID of 0 and \"Not-in-family\" to 1, etc."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "0IjqSi9tcYwV"
-      },
-      "outputs": [],
-      "source": [
-        "relationship = fc.categorical_column_with_vocabulary_list(\n",
-        "    'relationship',\n",
-        "    ['Husband', 'Not-in-family', 'Wife', 'Own-child', 'Unmarried', 'Other-relative'])"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "-RjoWv-7cYwW"
-      },
-      "source": [
-        "This creates a sparse one-hot vector from the raw input feature.\n",
-        "\n",
-        "The `input_layer` function you are using is designed for DNN models and expects dense inputs. To demonstrate the categorical column you must wrap it in a `tf.feature_column.indicator_column` to create the dense one-hot output (Linear `Estimators` can often skip this dense-step).\n",
-        "\n",
-        "Note: the other sparse-to-dense option is `tf.feature_column.embedding_column`.\n",
-        "\n",
-        "Run the input layer, configured with both the `age` and `relationship` columns:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "kI43CYlncYwY"
-      },
-      "outputs": [],
-      "source": [
-        "fc.input_layer(feature_batch, [age, fc.indicator_column(relationship)])"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "tTudP7WHcYwb"
-      },
-      "source": [
-        "If you don't know the set of possible values in advance, use the `categorical_column_with_hash_bucket` instead:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "8pSBaliCcYwb"
-      },
-      "outputs": [],
-      "source": [
-        "occupation = tf.feature_column.categorical_column_with_hash_bucket(\n",
-        "    'occupation', hash_bucket_size=1000)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "fSAPrqQkcYwd"
-      },
-      "source": [
-        "Here, each possible value in the feature column `occupation` is hashed to an integer ID as you encounter them in training. The example batch has a few different occupations:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "dCvQNv36cYwe"
-      },
-      "outputs": [],
-      "source": [
-        "for item in feature_batch['occupation'].numpy():\n",
-        "    print(item.decode())"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "KP5hN2rAcYwh"
-      },
-      "source": [
-        "If you run `input_layer` with the hashed column, you see that the output shape is `(batch_size, hash_bucket_size)`:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "0Y16peWacYwh"
-      },
-      "outputs": [],
-      "source": [
-        "occupation_result = fc.input_layer(feature_batch, [fc.indicator_column(occupation)])\n",
-        "\n",
-        "occupation_result.numpy().shape"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "HMW2MzWAcYwk"
-      },
-      "source": [
-        "It's easier to see the actual results if you take the `tf.argmax` over the `hash_bucket_size` dimension. Notice how any duplicate occupations are mapped to the same pseudo-random index:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "q_ryRglmcYwk"
-      },
-      "outputs": [],
-      "source": [
-        "tf.argmax(occupation_result, axis=1).numpy()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "j1e5NfyKcYwn"
-      },
-      "source": [
-        "Note: Hash collisions are unavoidable, but often have minimal impact on model quality. The effect may be noticable if the hash buckets are being used to compress the input space.\n",
-        "\n",
-        "No matter how you choose to define a `SparseColumn`, each feature string is mapped into an integer ID by looking up a fixed mapping or by hashing. Under the hood, the `LinearModel` class is responsible for managing the mapping and creating `tf.Variable` to store the model parameters (model *weights*) for each feature ID. The model parameters are learned through the model training process described later.\n",
-        "\n",
-        "Let's do the similar trick to define the other categorical features:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "0Z5eUrd_cYwo"
-      },
-      "outputs": [],
-      "source": [
-        "education = tf.feature_column.categorical_column_with_vocabulary_list(\n",
-        "    'education', [\n",
-        "        'Bachelors', 'HS-grad', '11th', 'Masters', '9th', 'Some-college',\n",
-        "        'Assoc-acdm', 'Assoc-voc', '7th-8th', 'Doctorate', 'Prof-school',\n",
-        "        '5th-6th', '10th', '1st-4th', 'Preschool', '12th'])\n",
-        "\n",
-        "marital_status = tf.feature_column.categorical_column_with_vocabulary_list(\n",
-        "    'marital_status', [\n",
-        "        'Married-civ-spouse', 'Divorced', 'Married-spouse-absent',\n",
-        "        'Never-married', 'Separated', 'Married-AF-spouse', 'Widowed'])\n",
-        "\n",
-        "workclass = tf.feature_column.categorical_column_with_vocabulary_list(\n",
-        "    'workclass', [\n",
-        "        'Self-emp-not-inc', 'Private', 'State-gov', 'Federal-gov',\n",
-        "        'Local-gov', '?', 'Self-emp-inc', 'Without-pay', 'Never-worked'])\n",
-        "\n",
-        "\n",
-        "my_categorical_columns = [relationship, occupation, education, marital_status, workclass]"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ASQJM1pEcYwr"
-      },
-      "source": [
-        "It's easy to use both sets of columns to configure a model that uses all these features:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "_i_MLoo9cYws"
-      },
-      "outputs": [],
-      "source": [
-        "classifier = tf.estimator.LinearClassifier(feature_columns=my_numeric_columns+my_categorical_columns)\n",
-        "classifier.train(train_inpf)\n",
-        "result = classifier.evaluate(test_inpf)\n",
-        "\n",
-        "clear_output()\n",
-        "\n",
-        "for key,value in sorted(result.items()):\n",
-        "  print('%s: %s' % (key, value))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "zdKEqF6xcYwv"
-      },
-      "source": [
-        "### Derived feature columns"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "RgYaf_48FSU2"
-      },
-      "source": [
-        "#### Make Continuous Features Categorical through Bucketization\n",
-        "\n",
-        "Sometimes the relationship between a continuous feature and the label is not linear. For example, *age* and *income*—a person's income may grow in the early stage of their career, then the growth may slow at some point, and finally, the income decreases after retirement. In this scenario, using the raw `age` as a real-valued feature column might not be a good choice because the model can only learn one of the three cases:\n",
-        "\n",
-        "1.  Income always increases at some rate as age grows (positive correlation),\n",
-        "2.  Income always decreases at some rate as age grows (negative correlation), or\n",
-        "3.  Income stays the same no matter at what age (no correlation).\n",
-        "\n",
-        "If you want to learn the fine-grained correlation between income and each age group separately, you can leverage *bucketization*. Bucketization is a process of dividing the entire range of a continuous feature into a set of consecutive buckets, and then converting the original numerical feature into a bucket ID (as a categorical feature) depending on which bucket that value falls into. So, you can define a `bucketized_column` over `age` as:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "KT4pjD9AcYww"
-      },
-      "outputs": [],
-      "source": [
-        "age_buckets = tf.feature_column.bucketized_column(\n",
-        "    age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "S-XOscrEcYwx"
-      },
-      "source": [
-        "`boundaries` is a list of bucket boundaries. In this case, there are 10 boundaries, resulting in 11 age group buckets (from age 17 and below, 18-24, 25-29, ..., to 65 and over).\n",
-        "\n",
-        "With bucketing, the model sees each bucket as a one-hot feature:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Lr40vm3qcYwy"
-      },
-      "outputs": [],
-      "source": [
-        "fc.input_layer(feature_batch, [age, age_buckets]).numpy()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Z_tQI9j8cYw1"
-      },
-      "source": [
-        "#### Learn complex relationships with crossed column\n",
-        "\n",
-        "Using each base feature column separately may not be enough to explain the data. For example, the correlation between education and the label (earning > 50,000 dollars) may be different for different occupations. Therefore, if you only learn a single model weight for `education=\"Bachelors\"` and `education=\"Masters\"`, you won't capture every education-occupation combination (e.g. distinguishing between `education=\"Bachelors\"` AND `occupation=\"Exec-managerial\"` AND `education=\"Bachelors\" AND occupation=\"Craft-repair\"`).\n",
-        "\n",
-        "To learn the differences between different feature combinations, you can add *crossed feature columns* to the model:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "IAPhPzXscYw1"
-      },
-      "outputs": [],
-      "source": [
-        "education_x_occupation = tf.feature_column.crossed_column(\n",
-        "    ['education', 'occupation'], hash_bucket_size=1000)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "UeTxMunbcYw5"
-      },
-      "source": [
-        "You can also create a `crossed_column` over more than two columns. Each constituent column can be either a base feature column that is categorical (`SparseColumn`), a bucketized real-valued feature column, or even another `CrossColumn`. For example:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "y8UaBld9cYw7"
-      },
-      "outputs": [],
-      "source": [
-        "age_buckets_x_education_x_occupation = tf.feature_column.crossed_column(\n",
-        "    [age_buckets, 'education', 'occupation'], hash_bucket_size=1000)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "HvKmW6U5cYw8"
-      },
-      "source": [
-        "These crossed columns always use hash buckets to avoid the exponential explosion in the number of categories, and put the control over number of model weights in the hands of the user.\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "HtjpheB6cYw9"
-      },
-      "source": [
-        "## Define the logistic regression model\n",
-        "\n",
-        "After processing the input data and defining all the feature columns, you can put them together and build a *logistic regression* model. The previous section showed several types of base and derived feature columns, including:\n",
-        "\n",
-        "*   `CategoricalColumn`\n",
-        "*   `NumericColumn`\n",
-        "*   `BucketizedColumn`\n",
-        "*   `CrossedColumn`\n",
-        "\n",
-        "All of these are subclasses of the abstract `FeatureColumn` class and can be added to the `feature_columns` field of a model:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Klmf3OxpcYw-"
-      },
-      "outputs": [],
-      "source": [
-        "import tempfile\n",
-        "\n",
-        "base_columns = [\n",
-        "    education, marital_status, relationship, workclass, occupation,\n",
-        "    age_buckets,\n",
-        "]\n",
-        "\n",
-        "crossed_columns = [\n",
-        "    tf.feature_column.crossed_column(\n",
-        "        ['education', 'occupation'], hash_bucket_size=1000),\n",
-        "    tf.feature_column.crossed_column(\n",
-        "        [age_buckets, 'education', 'occupation'], hash_bucket_size=1000),\n",
-        "]\n",
-        "\n",
-        "model = tf.estimator.LinearClassifier(\n",
-        "    model_dir=tempfile.mkdtemp(),\n",
-        "    feature_columns=base_columns + crossed_columns,\n",
-        "    optimizer=tf.train.FtrlOptimizer(learning_rate=0.1))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "jRhnPxUucYxC"
-      },
-      "source": [
-        "The model automatically learns a bias term, which controls the prediction made without observing any features. The learned model files are stored in `model_dir`.\n",
-        "\n",
-        "## Train and evaluate the model\n",
-        "\n",
-        "After adding all the features to the model, let's train the model. Training a model is just a single command using the `tf.estimator` API:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "ZlrIBuoecYxD"
-      },
-      "outputs": [],
-      "source": [
-        "train_inpf = functools.partial(census_dataset.input_fn, train_file,\n",
-        "                               num_epochs=40, shuffle=True, batch_size=64)\n",
-        "\n",
-        "model.train(train_inpf)\n",
-        "\n",
-        "clear_output()  # used for notebook display"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "IvY3a9pzcYxH"
-      },
-      "source": [
-        "After the model is trained, evaluate the accuracy of the model by predicting the labels of the holdout data:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "L9nVJEO8cYxI"
-      },
-      "outputs": [],
-      "source": [
-        "results = model.evaluate(test_inpf)\n",
-        "\n",
-        "clear_output()\n",
-        "\n",
-        "for key,value in sorted(results.items()):\n",
-        "  print('%s: %0.2f' % (key, value))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "E0fAibNDcYxL"
-      },
-      "source": [
-        "The first line of the output should display something like: `accuracy: 0.84`, which means the accuracy is 84%. You can try using more features and transformations to see if you can do better!\n",
-        "\n",
-        "After the model is evaluated, you can use it to predict whether an individual has an annual income of over 50,000 dollars given an individual's information input.\n",
-        "\n",
-        "Let's look in more detail how the model performed:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "8R5bz5CxcYxL"
-      },
-      "outputs": [],
-      "source": [
-        "import numpy as np\n",
-        "\n",
-        "predict_df = test_df[:20].copy()\n",
-        "\n",
-        "pred_iter = model.predict(\n",
-        "    lambda:easy_input_function(predict_df, label_key='income_bracket',\n",
-        "                               num_epochs=1, shuffle=False, batch_size=10))\n",
-        "\n",
-        "classes = np.array(['<=50K', '>50K'])\n",
-        "pred_class_id = []\n",
-        "\n",
-        "for pred_dict in pred_iter:\n",
-        "  pred_class_id.append(pred_dict['class_ids'])\n",
-        "\n",
-        "predict_df['predicted_class'] = classes[np.array(pred_class_id)]\n",
-        "predict_df['correct'] = predict_df['predicted_class'] == predict_df['income_bracket']\n",
-        "\n",
-        "clear_output()\n",
-        "\n",
-        "predict_df[['income_bracket','predicted_class', 'correct']]"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "N_uCpFTicYxN"
-      },
-      "source": [
-        "For a working end-to-end example,  download our [example code](https://github.com/tensorflow/models/tree/master/official/r1/wide_deep/census_main.py) and set the `model_type` flag to `wide`."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "oyKy1lM_3gkL"
-      },
-      "source": [
-        "## Adding Regularization to Prevent Overfitting\n",
-        "\n",
-        "Regularization is a technique used to avoid overfitting. Overfitting happens when a model performs well on the data it is trained on, but worse on test data that the model has not seen before. Overfitting can occur when a model is excessively complex, such as having too many parameters relative to the number of observed training data. Regularization allows you to control the model's complexity and make the model more generalizable to unseen data.\n",
-        "\n",
-        "You can add L1 and L2 regularizations to the model with the following code:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "lzMUSBQ03hHx"
-      },
-      "outputs": [],
-      "source": [
-        "model_l1 = tf.estimator.LinearClassifier(\n",
-        "    feature_columns=base_columns + crossed_columns,\n",
-        "    optimizer=tf.train.FtrlOptimizer(\n",
-        "        learning_rate=0.1,\n",
-        "        l1_regularization_strength=10.0,\n",
-        "        l2_regularization_strength=0.0))\n",
-        "\n",
-        "model_l1.train(train_inpf)\n",
-        "\n",
-        "results = model_l1.evaluate(test_inpf)\n",
-        "clear_output()\n",
-        "for key in sorted(results):\n",
-        "  print('%s: %0.2f' % (key, results[key]))"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "ofmPL212JIy2"
-      },
-      "outputs": [],
-      "source": [
-        "model_l2 = tf.estimator.LinearClassifier(\n",
-        "    feature_columns=base_columns + crossed_columns,\n",
-        "    optimizer=tf.train.FtrlOptimizer(\n",
-        "        learning_rate=0.1,\n",
-        "        l1_regularization_strength=0.0,\n",
-        "        l2_regularization_strength=10.0))\n",
-        "\n",
-        "model_l2.train(train_inpf)\n",
-        "\n",
-        "results = model_l2.evaluate(test_inpf)\n",
-        "clear_output()\n",
-        "for key in sorted(results):\n",
-        "  print('%s: %0.2f' % (key, results[key]))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Lp1Rfy_k4e7w"
-      },
-      "source": [
-        "These regularized models don't perform much better than the base model. Let's look at the model's weight distributions to better see the effect of the regularization:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Wb6093N04XlS"
-      },
-      "outputs": [],
-      "source": [
-        "def get_flat_weights(model):\n",
-        "  weight_names = [\n",
-        "      name for name in model.get_variable_names()\n",
-        "      if \"linear_model\" in name and \"Ftrl\" not in name]\n",
-        "\n",
-        "  weight_values = [model.get_variable_value(name) for name in weight_names]\n",
-        "\n",
-        "  weights_flat = np.concatenate([item.flatten() for item in weight_values], axis=0)\n",
-        "\n",
-        "  return weights_flat\n",
-        "\n",
-        "weights_flat = get_flat_weights(model)\n",
-        "weights_flat_l1 = get_flat_weights(model_l1)\n",
-        "weights_flat_l2 = get_flat_weights(model_l2)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "GskJmtfmL0p-"
-      },
-      "source": [
-        "The models have many zero-valued weights caused by unused hash bins (there are many more hash bins than categories in some columns). You can mask these weights when viewing the weight distributions:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "rM3agZe3MT3D"
-      },
-      "outputs": [],
-      "source": [
-        "weight_mask = weights_flat != 0\n",
-        "\n",
-        "weights_base = weights_flat[weight_mask]\n",
-        "weights_l1 = weights_flat_l1[weight_mask]\n",
-        "weights_l2 = weights_flat_l2[weight_mask]"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "NqBpxLLQNEBE"
-      },
-      "source": [
-        "Now plot the distributions:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "IdFK7wWa5_0K"
-      },
-      "outputs": [],
-      "source": [
-        "plt.figure()\n",
-        "_ = plt.hist(weights_base, bins=np.linspace(-3,3,30))\n",
-        "plt.title('Base Model')\n",
-        "plt.ylim([0,500])\n",
-        "\n",
-        "plt.figure()\n",
-        "_ = plt.hist(weights_l1, bins=np.linspace(-3,3,30))\n",
-        "plt.title('L1 - Regularization')\n",
-        "plt.ylim([0,500])\n",
-        "\n",
-        "plt.figure()\n",
-        "_ = plt.hist(weights_l2, bins=np.linspace(-3,3,30))\n",
-        "plt.title('L2 - Regularization')\n",
-        "_=plt.ylim([0,500])"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Mv6knhFa5-iJ"
-      },
-      "source": [
-        "Both types of regularization squeeze the distribution of weights towards zero. L2 regularization has a greater effect in the tails of the distribution eliminating extreme weights. L1 regularization produces more exactly-zero values, in this case it sets ~200 to zero."
-      ]
-    }
-  ],
-  "metadata": {
-    "colab": {
-      "collapsed_sections": [
-        "MWW1TyjaecRh"
-      ],
-      "name": "linear.ipynb",
-      "toc_visible": true
-    },
-    "kernelspec": {
-      "display_name": "Python 3",
-      "name": "python3"
-    }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 0
-}
diff --git a/site/en/r1/tutorials/images/deep_cnn.md b/site/en/r1/tutorials/images/deep_cnn.md
index 00a914d8976..885f3907aa7 100644
--- a/site/en/r1/tutorials/images/deep_cnn.md
+++ b/site/en/r1/tutorials/images/deep_cnn.md
@@ -80,15 +80,15 @@ for details.  It consists of 1,068,298 learnable parameters and requires about
 ## Code Organization
 
 The code for this tutorial resides in
-[`models/tutorials/image/cifar10/`](https://github.com/tensorflow/models/tree/master/research/tutorials/image/cifar10/).
+[`models/tutorials/image/cifar10/`](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/image/cifar10/).
 
 File | Purpose
 --- | ---
-[`cifar10_input.py`](https://github.com/tensorflow/models/tree/master/research/tutorials/image/cifar10/cifar10_input.py) | Loads CIFAR-10 dataset using [tensorflow-datasets library](https://github.com/tensorflow/datasets).
-[`cifar10.py`](https://github.com/tensorflow/models/tree/master/research/tutorials/image/cifar10/cifar10.py) | Builds the CIFAR-10 model.
-[`cifar10_train.py`](https://github.com/tensorflow/models/tree/master/research/tutorials/image/cifar10/cifar10_train.py) | Trains a CIFAR-10 model on a CPU or GPU.
-[`cifar10_multi_gpu_train.py`](https://github.com/tensorflow/models/tree/master/research/tutorials/image/cifar10/cifar10_multi_gpu_train.py) | Trains a CIFAR-10 model on multiple GPUs.
-[`cifar10_eval.py`](https://github.com/tensorflow/models/tree/master/research/tutorials/image/cifar10/cifar10_eval.py) | Evaluates the predictive performance of a CIFAR-10 model.
+[`cifar10_input.py`](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/image/cifar10/cifar10_input.py) | Loads CIFAR-10 dataset using [tensorflow-datasets library](https://github.com/tensorflow/datasets).
+[`cifar10.py`](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/image/cifar10/cifar10.py) | Builds the CIFAR-10 model.
+[`cifar10_train.py`](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/image/cifar10/cifar10_train.py) | Trains a CIFAR-10 model on a CPU or GPU.
+[`cifar10_multi_gpu_train.py`](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/image/cifar10/cifar10_multi_gpu_train.py) | Trains a CIFAR-10 model on multiple GPUs.
+[`cifar10_eval.py`](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/image/cifar10/cifar10_eval.py) | Evaluates the predictive performance of a CIFAR-10 model.
 
 To run this tutorial, you will need to:
 
@@ -99,7 +99,7 @@ pip install tensorflow-datasets
 ## CIFAR-10 Model
 
 The CIFAR-10 network is largely contained in
-[`cifar10.py`](https://github.com/tensorflow/models/tree/master/research/tutorials/image/cifar10/cifar10.py).
+[`cifar10.py`](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/image/cifar10/cifar10.py).
 The complete training
 graph contains roughly 765 operations. We find that we can make the code most
 reusable by constructing the graph with the following modules:
@@ -108,7 +108,7 @@ reusable by constructing the graph with the following modules:
 operations that read and preprocess CIFAR images for evaluation and training,
 respectively.
 1. [**Model prediction:**](#model-prediction) `inference()`
-adds operations that perform inference, i.e. classification, on supplied images.
+adds operations that perform inference, i.e., classification, on supplied images.
 1. [**Model training:**](#model-training) `loss()` and `train()`
 add operations that compute the loss,
 gradients, variable updates and visualization summaries.
@@ -405,7 +405,7 @@ a "tower". We must set two attributes for each tower:
 * A unique name for all operations within a tower.
 `tf.name_scope` provides
 this unique name by prepending a scope. For instance, all operations in
-the first tower are prepended with `tower_0`, e.g. `tower_0/conv1/Conv2D`.
+the first tower are prepended with `tower_0`, e.g., `tower_0/conv1/Conv2D`.
 
 * A preferred hardware device to run the operation within a tower.
 `tf.device` specifies this. For
diff --git a/site/en/r1/tutorials/images/hub_with_keras.ipynb b/site/en/r1/tutorials/images/hub_with_keras.ipynb
index ece9c0fa4a9..f4e683e8936 100644
--- a/site/en/r1/tutorials/images/hub_with_keras.ipynb
+++ b/site/en/r1/tutorials/images/hub_with_keras.ipynb
@@ -60,7 +60,7 @@
       "source": [
         "> Note: This is an archived TF1 notebook. These are configured\n",
         "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
+        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
         "but will run in TF1 as well. To use TF1 in Colab, use the\n",
         "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
         "magic."
@@ -841,7 +841,7 @@
         "t = time.time()\n",
         "\n",
         "export_path = \"/tmp/saved_models/{}\".format(int(t))\n",
-        "tf.keras.experimental.export_saved_model(model, export_path)\n",
+        "model.save(export_path)\n",
         "\n",
         "export_path"
       ]
@@ -863,7 +863,7 @@
       },
       "outputs": [],
       "source": [
-        "reloaded = tf.keras.experimental.load_from_saved_model(export_path, custom_objects={'KerasLayer':hub.KerasLayer})"
+        "reloaded = tf.keras.models.load_model(export_path, custom_objects={'KerasLayer':hub.KerasLayer})"
       ]
     },
     {
diff --git a/site/en/r1/tutorials/images/image_recognition.md b/site/en/r1/tutorials/images/image_recognition.md
index 0be884de403..cb66e594629 100644
--- a/site/en/r1/tutorials/images/image_recognition.md
+++ b/site/en/r1/tutorials/images/image_recognition.md
@@ -140,13 +140,13 @@ score of 0.8.
   <img style="width:100%" src="https://www.tensorflow.org/images/grace_hopper.jpg">
 </div>
 
-Next, try it out on your own images by supplying the --image= argument, e.g.
+Next, try it out on your own images by supplying the --image= argument, e.g.,
 
 ```bash
 bazel-bin/tensorflow/examples/label_image/label_image --image=my_image.png
 ```
 
-If you look inside the [`tensorflow/examples/label_image/main.cc`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/label_image/main.cc)
+If you look inside the [`tensorflow/examples/label_image/main.cc`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/examples/label_image/main.cc)
 file, you can find out
 how it works. We hope this code will help you integrate TensorFlow into
 your own applications, so we will walk step by step through the main functions:
@@ -164,7 +164,7 @@ training. If you have a graph that you've trained yourself, you'll just need
 to adjust the values to match whatever you used during your training process.
 
 You can see how they're applied to an image in the
-[`ReadTensorFromImageFile()`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/label_image/main.cc#L88)
+[`ReadTensorFromImageFile()`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/examples/label_image/main.cc#L88)
 function.
 
 ```C++
@@ -334,7 +334,7 @@ The `PrintTopLabels()` function takes those sorted results, and prints them out
 friendly way. The `CheckTopLabel()` function is very similar, but just makes sure that
 the top label is the one we expect, for debugging purposes.
 
-At the end, [`main()`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/label_image/main.cc#L252)
+At the end, [`main()`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/examples/label_image/main.cc#L252)
 ties together all of these calls.
 
 ```C++
diff --git a/site/en/r1/tutorials/images/transfer_learning.ipynb b/site/en/r1/tutorials/images/transfer_learning.ipynb
index bdb05a86382..25779babd17 100644
--- a/site/en/r1/tutorials/images/transfer_learning.ipynb
+++ b/site/en/r1/tutorials/images/transfer_learning.ipynb
@@ -64,7 +64,7 @@
       "source": [
         "> Note: This is an archived TF1 notebook. These are configured\n",
         "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
+        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
         "but will run in TF1 as well. To use TF1 in Colab, use the\n",
         "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
         "magic."
@@ -364,7 +364,7 @@
       },
       "outputs": [],
       "source": [
-        "model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=0.0001),\n",
+        "model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.0001),\n",
         "              loss='binary_crossentropy',\n",
         "              metrics=['accuracy'])"
       ]
@@ -547,7 +547,7 @@
         "\n",
         "# Freeze all the layers before the `fine_tune_at` layer\n",
         "for layer in base_model.layers[:fine_tune_at]:\n",
-        "  layer.trainable =  False"
+        "  layer.trainable = False"
       ]
     },
     {
@@ -569,7 +569,7 @@
       },
       "outputs": [],
       "source": [
-        "model.compile(optimizer = tf.keras.optimizers.RMSprop(lr=2e-5),\n",
+        "model.compile(optimizer = tf.keras.optimizers.RMSprop(learning_rate=2e-5),\n",
         "              loss='binary_crossentropy',\n",
         "              metrics=['accuracy'])"
       ]
diff --git a/site/en/r1/tutorials/keras/README.md b/site/en/r1/tutorials/keras/README.md
index 4da2f72dca9..47aca7e0052 100644
--- a/site/en/r1/tutorials/keras/README.md
+++ b/site/en/r1/tutorials/keras/README.md
@@ -4,7 +4,7 @@ This notebook collection is inspired by the book
 *[Deep Learning with Python](https://books.google.com/books?id=Yo3CAQAACAAJ)*.
 These tutorials use `tf.keras`, TensorFlow's high-level Python API for building
 and training deep learning models. To learn more about using Keras with
-TensorFlow, see the [TensorFlow Keras Guide](../../guide/keras.ipynb).
+TensorFlow, see the [TensorFlow Keras Guide](https://www.tensorflow.org/guide/keras).
 
 Publisher's note: *Deep Learning with Python* introduces the field of deep
 learning using the Python language and the powerful Keras library. Written by
diff --git a/site/en/r1/tutorials/keras/basic_classification.ipynb b/site/en/r1/tutorials/keras/basic_classification.ipynb
index be7f5e9e8b1..14950538ce4 100644
--- a/site/en/r1/tutorials/keras/basic_classification.ipynb
+++ b/site/en/r1/tutorials/keras/basic_classification.ipynb
@@ -96,7 +96,7 @@
       "source": [
         "> Note: This is an archived TF1 notebook. These are configured\n",
         "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
+        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
         "but will run in TF1 as well. To use TF1 in Colab, use the\n",
         "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
         "magic."
diff --git a/site/en/r1/tutorials/keras/basic_regression.ipynb b/site/en/r1/tutorials/keras/basic_regression.ipynb
index 7d9cb711efa..4bffd62f982 100644
--- a/site/en/r1/tutorials/keras/basic_regression.ipynb
+++ b/site/en/r1/tutorials/keras/basic_regression.ipynb
@@ -96,7 +96,7 @@
       "source": [
         "> Note: This is an archived TF1 notebook. These are configured\n",
         "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
+        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
         "but will run in TF1 as well. To use TF1 in Colab, use the\n",
         "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
         "magic."
diff --git a/site/en/r1/tutorials/keras/basic_text_classification.ipynb b/site/en/r1/tutorials/keras/basic_text_classification.ipynb
index 0303d54d973..5424185bcbd 100644
--- a/site/en/r1/tutorials/keras/basic_text_classification.ipynb
+++ b/site/en/r1/tutorials/keras/basic_text_classification.ipynb
@@ -96,7 +96,7 @@
       "source": [
         "> Note: This is an archived TF1 notebook. These are configured\n",
         "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
+        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
         "but will run in TF1 as well. To use TF1 in Colab, use the\n",
         "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
         "magic."
diff --git a/site/en/r1/tutorials/keras/overfit_and_underfit.ipynb b/site/en/r1/tutorials/keras/overfit_and_underfit.ipynb
index a8f266f9869..8e35b06e556 100644
--- a/site/en/r1/tutorials/keras/overfit_and_underfit.ipynb
+++ b/site/en/r1/tutorials/keras/overfit_and_underfit.ipynb
@@ -96,7 +96,7 @@
       "source": [
         "> Note: This is an archived TF1 notebook. These are configured\n",
         "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
+        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
         "but will run in TF1 as well. To use TF1 in Colab, use the\n",
         "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
         "magic."
diff --git a/site/en/r1/tutorials/keras/save_and_restore_models.ipynb b/site/en/r1/tutorials/keras/save_and_restore_models.ipynb
index 7911e37e139..04cc94417a9 100644
--- a/site/en/r1/tutorials/keras/save_and_restore_models.ipynb
+++ b/site/en/r1/tutorials/keras/save_and_restore_models.ipynb
@@ -96,7 +96,7 @@
       "source": [
         "> Note: This is an archived TF1 notebook. These are configured\n",
         "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
+        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
         "but will run in TF1 as well. To use TF1 in Colab, use the\n",
         "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
         "magic."
@@ -115,7 +115,7 @@
         "\n",
         "Sharing this data helps others understand how the model works and try it themselves with new data.\n",
         "\n",
-        "Caution: Be careful with untrusted code—TensorFlow models are code. See [Using TensorFlow Securely](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md) for details.\n",
+        "Caution: Be careful with untrusted code—TensorFlow models are code. See [Using TensorFlow Securely](https://github.com/tensorflow/tensorflow/blob/r1.15/SECURITY.md) for details.\n",
         "\n",
         "### Options\n",
         "\n",
@@ -698,7 +698,7 @@
         "id": "B7qfpvpY9HCe"
       },
       "source": [
-        "Load the the saved model."
+        "Load the saved model."
       ]
     },
     {
diff --git a/site/en/r1/tutorials/load_data/images.ipynb b/site/en/r1/tutorials/load_data/images.ipynb
index dbee204323b..923b95130d1 100644
--- a/site/en/r1/tutorials/load_data/images.ipynb
+++ b/site/en/r1/tutorials/load_data/images.ipynb
@@ -64,7 +64,7 @@
       "source": [
         "> Note: This is an archived TF1 notebook. These are configured\n",
         "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
+        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
         "but will run in TF1 as well. To use TF1 in Colab, use the\n",
         "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
         "magic."
diff --git a/site/en/r1/tutorials/load_data/tf_records.ipynb b/site/en/r1/tutorials/load_data/tf_records.ipynb
index 8b57d3f2f1e..45635034c69 100644
--- a/site/en/r1/tutorials/load_data/tf_records.ipynb
+++ b/site/en/r1/tutorials/load_data/tf_records.ipynb
@@ -57,7 +57,7 @@
       "source": [
         "> Note: This is an archived TF1 notebook. These are configured\n",
         "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
+        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
         "but will run in TF1 as well. To use TF1 in Colab, use the\n",
         "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
         "magic."
@@ -141,7 +141,7 @@
       "source": [
         "Fundamentally a `tf.Example` is a `{\"string\": tf.train.Feature}` mapping.\n",
         "\n",
-        "The `tf.train.Feature` message type can accept one of the following three types (See the [`.proto` file](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/feature.proto) for reference). Most other generic types can be coerced into one of these.\n",
+        "The `tf.train.Feature` message type can accept one of the following three types (See the [`.proto` file](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/example/feature.proto) for reference). Most other generic types can be coerced into one of these.\n",
         "\n",
         "1. `tf.train.BytesList` (the following types can be coerced)\n",
         "\n",
@@ -276,7 +276,7 @@
         "\n",
         "1. We create a map (dictionary) from the feature name string to the encoded feature value produced in #1.\n",
         "\n",
-        "1. The map produced in #2 is converted to a [`Features` message](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/feature.proto#L85)."
+        "1. The map produced in #2 is converted to a [`Features` message](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/example/feature.proto#L85)."
       ]
     },
     {
@@ -365,7 +365,7 @@
         "id": "XftzX9CN_uGT"
       },
       "source": [
-        "For example, suppose we have a single observation from the dataset, `[False, 4, bytes('goat'), 0.9876]`. We can create and print the `tf.Example` message for this observation using `create_message()`. Each single observation will be written as a `Features` message as per the above. Note that the `tf.Example` [message](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/example.proto#L88) is just a wrapper around the `Features` message."
+        "For example, suppose we have a single observation from the dataset, `[False, 4, bytes('goat'), 0.9876]`. We can create and print the `tf.Example` message for this observation using `create_message()`. Each single observation will be written as a `Features` message as per the above. Note that the `tf.Example` [message](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/example/example.proto#L88) is just a wrapper around the `Features` message."
       ]
     },
     {
@@ -632,7 +632,7 @@
       "source": [
         "We can also read the TFRecord file using the `tf.data.TFRecordDataset` class.\n",
         "\n",
-        "More information on consuming TFRecord files using `tf.data` can be found [here](https://www.tensorflow.org/r1/guide/datasets#consuming_tfrecord_data).\n",
+        "More information on consuming TFRecord files using `tf.data` can be found [here](https://www.tensorflow.org/guide/data#consuming_tfrecord_data).\n",
         "\n",
         "Using `TFRecordDataset`s can be useful for standardizing input data and optimizing performance."
       ]
diff --git a/site/en/r1/tutorials/non-ml/mandelbrot.ipynb b/site/en/r1/tutorials/non-ml/mandelbrot.ipynb
index 88177211896..bca8a142be4 100644
--- a/site/en/r1/tutorials/non-ml/mandelbrot.ipynb
+++ b/site/en/r1/tutorials/non-ml/mandelbrot.ipynb
@@ -64,7 +64,7 @@
       "source": [
         "> Note: This is an archived TF1 notebook. These are configured\n",
         "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
+        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
         "but will run in TF1 as well. To use TF1 in Colab, use the\n",
         "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
         "magic."
diff --git a/site/en/r1/tutorials/non-ml/pdes.ipynb b/site/en/r1/tutorials/non-ml/pdes.ipynb
index d2646daa8da..832fa450523 100644
--- a/site/en/r1/tutorials/non-ml/pdes.ipynb
+++ b/site/en/r1/tutorials/non-ml/pdes.ipynb
@@ -64,7 +64,7 @@
       "source": [
         "> Note: This is an archived TF1 notebook. These are configured\n",
         "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
+        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
         "but will run in TF1 as well. To use TF1 in Colab, use the\n",
         "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
         "magic."
diff --git a/site/en/r1/tutorials/representation/kernel_methods.md b/site/en/r1/tutorials/representation/kernel_methods.md
index 67adc4951c6..227fe81d515 100644
--- a/site/en/r1/tutorials/representation/kernel_methods.md
+++ b/site/en/r1/tutorials/representation/kernel_methods.md
@@ -24,7 +24,7 @@ following sources for an introduction:
 Currently, TensorFlow supports explicit kernel mappings for dense features only;
 TensorFlow will provide support for sparse features at a later release.
 
-This tutorial uses [tf.contrib.learn](https://www.tensorflow.org/code/tensorflow/contrib/learn/python/learn)
+This tutorial uses [tf.contrib.learn](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/contrib/learn/python/learn)
 (TensorFlow's high-level Machine Learning API) Estimators for our ML models.
 If you are not familiar with this API, The [Estimator guide](../../guide/estimators.md)
 is a good place to start. We will use the MNIST dataset. The tutorial consists
@@ -131,7 +131,7 @@ In addition to experimenting with the (training) batch size and the number of
 training steps, there are a couple other parameters that can be tuned as well.
 For instance, you can change the optimization method used to minimize the loss
 by explicitly selecting another optimizer from the collection of
-[available optimizers](https://www.tensorflow.org/code/tensorflow/python/training).
+[available optimizers](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/training).
 As an example, the following code constructs a LinearClassifier estimator that
 uses the Follow-The-Regularized-Leader (FTRL) optimization strategy with a
 specific learning rate and L2-regularization.
diff --git a/site/en/r1/tutorials/representation/linear.md b/site/en/r1/tutorials/representation/linear.md
index 5516672b34a..d996a13bc1f 100644
--- a/site/en/r1/tutorials/representation/linear.md
+++ b/site/en/r1/tutorials/representation/linear.md
@@ -12,7 +12,7 @@ those tools. It explains:
 
 Read this overview to decide whether the Estimator's linear model tools  might
 be useful to you. Then work through the
-[Estimator wide and deep learning tutorial](https://github.com/tensorflow/models/tree/master/official/r1/wide_deep)
+[Estimator wide and deep learning tutorial](https://github.com/tensorflow/models/tree/r1.15/official/r1/wide_deep)
 to give it a try. This overview uses code samples from the tutorial, but the
 tutorial walks through the code in greater detail.
 
@@ -177,7 +177,7 @@ the name of a `FeatureColumn`. Each key's value is a tensor containing the
 values of that feature for all data instances. See
 [Premade Estimators](../../guide/premade_estimators.md#input_fn) for a
 more comprehensive look at input functions, and `input_fn` in the
-[wide and deep learning tutorial](https://github.com/tensorflow/models/tree/master/official/r1/wide_deep)
+[wide and deep learning tutorial](https://github.com/tensorflow/models/tree/r1.15/official/r1/wide_deep)
 for an example implementation of an input function.
 
 The input function is passed to the `train()` and `evaluate()` calls that
@@ -236,4 +236,4 @@ e = tf.estimator.DNNLinearCombinedClassifier(
     dnn_hidden_units=[100, 50])
 ```
 For more information, see the
-[wide and deep learning tutorial](https://github.com/tensorflow/models/tree/master/official/r1/wide_deep).
+[wide and deep learning tutorial](https://github.com/tensorflow/models/tree/r1.15/official/r1/wide_deep).
diff --git a/site/en/r1/tutorials/representation/unicode.ipynb b/site/en/r1/tutorials/representation/unicode.ipynb
index 6762a483a42..f76977c3c92 100644
--- a/site/en/r1/tutorials/representation/unicode.ipynb
+++ b/site/en/r1/tutorials/representation/unicode.ipynb
@@ -57,7 +57,7 @@
       "source": [
         "> Note: This is an archived TF1 notebook. These are configured\n",
         "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
+        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
         "but will run in TF1 as well. To use TF1 in Colab, use the\n",
         "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
         "magic."
@@ -136,7 +136,7 @@
         "id": "jsMPnjb6UDJ1"
       },
       "source": [
-        "Note: When using python to construct strings, the handling of unicode differs betweeen v2 and v3. In v2, unicode strings are indicated by the \"u\" prefix, as above. In v3, strings are unicode-encoded by default."
+        "Note: When using python to construct strings, the handling of unicode differs between v2 and v3. In v2, unicode strings are indicated by the \"u\" prefix, as above. In v3, strings are unicode-encoded by default."
       ]
     },
     {
@@ -425,7 +425,7 @@
       "source": [
         "### Character substrings\n",
         "\n",
-        "Similarly, the `tf.strings.substr` operation accepts the \"`unit`\" parameter, and uses it to determine what kind of offsets the \"`pos`\" and \"`len`\" paremeters contain."
+        "Similarly, the `tf.strings.substr` operation accepts the \"`unit`\" parameter, and uses it to determine what kind of offsets the \"`pos`\" and \"`len`\" parameters contain."
       ]
     },
     {
@@ -587,7 +587,7 @@
         "id": "CapnbShuGU8i"
       },
       "source": [
-        "First, we decode the sentences into character codepoints, and find the script identifeir for each character."
+        "First, we decode the sentences into character codepoints, and find the script identifier for each character."
       ]
     },
     {
diff --git a/site/en/r1/tutorials/representation/word2vec.md b/site/en/r1/tutorials/representation/word2vec.md
index f6a27c68f3c..517a5dbc5c5 100644
--- a/site/en/r1/tutorials/representation/word2vec.md
+++ b/site/en/r1/tutorials/representation/word2vec.md
@@ -36,7 +36,7 @@ like to get your hands dirty with the details.
 
 Image and audio processing systems work with rich, high-dimensional datasets
 encoded as vectors of the individual raw pixel-intensities for image data, or
-e.g. power spectral density coefficients for audio data. For tasks like object
+e.g., power spectral density coefficients for audio data. For tasks like object
 or speech recognition we know that all the information required to successfully
 perform the task is encoded in the data (because humans can perform these tasks
 from the raw data).  However, natural language processing systems traditionally
@@ -109,7 +109,7 @@ $$
 where \\(\text{score}(w_t, h)\\) computes the compatibility of word \\(w_t\\)
 with the context \\(h\\) (a dot product is commonly used). We train this model
 by maximizing its [log-likelihood](https://en.wikipedia.org/wiki/Likelihood_function)
-on the training set, i.e. by maximizing
+on the training set, i.e., by maximizing
 
 $$
 \begin{align}
@@ -176,7 +176,7 @@ As an example, let's consider the dataset
 We first form a dataset of words and the contexts in which they appear. We
 could define 'context' in any way that makes sense, and in fact people have
 looked at syntactic contexts (i.e. the syntactic dependents of the current
-target word, see e.g.
+target word, see e.g.,
 [Levy et al.](https://levyomer.files.wordpress.com/2014/04/dependency-based-word-embeddings-acl-2014.pdf)),
 words-to-the-left of the target, words-to-the-right of the target, etc. For now,
 let's stick to the vanilla definition and define 'context' as the window
@@ -204,7 +204,7 @@ where the goal is to predict `the` from `quick`. We select `num_noise` number
 of noisy (contrastive) examples by drawing from some noise distribution,
 typically the unigram distribution, \\(P(w)\\). For simplicity let's say
 `num_noise=1` and we select `sheep` as a noisy example. Next we compute the
-loss for this pair of observed and noisy examples, i.e. the objective at time
+loss for this pair of observed and noisy examples, i.e., the objective at time
 step \\(t\\) becomes
 
 $$J^{(t)}_\text{NEG} = \log Q_\theta(D=1 | \text{the, quick}) +
@@ -212,7 +212,7 @@ $$J^{(t)}_\text{NEG} = \log Q_\theta(D=1 | \text{the, quick}) +
 
 The goal is to make an update to the embedding parameters \\(\theta\\) to improve
 (in this case, maximize) this objective function.  We do this by deriving the
-gradient of the loss with respect to the embedding parameters \\(\theta\\), i.e.
+gradient of the loss with respect to the embedding parameters \\(\theta\\), i.e.,
 \\(\frac{\partial}{\partial \theta} J_\text{NEG}\\) (luckily TensorFlow provides
 easy helper functions for doing this!). We then perform an update to the
 embeddings by taking a small step in the direction of the gradient. When this
@@ -227,7 +227,7 @@ When we inspect these visualizations it becomes apparent that the vectors
 capture some general, and in fact quite useful, semantic information about
 words and their relationships to one another. It was very interesting when we
 first discovered that certain directions in the induced vector space specialize
-towards certain semantic relationships, e.g. *male-female*, *verb tense* and
+towards certain semantic relationships, e.g., *male-female*, *verb tense* and
 even *country-capital* relationships between words, as illustrated in the figure
 below (see also for example
 [Mikolov et al., 2013](https://www.aclweb.org/anthology/N13-1090)).
@@ -327,7 +327,7 @@ for inputs, labels in generate_batch(...):
 ```
 
 See the full example code in
-[tensorflow/examples/tutorials/word2vec/word2vec_basic.py](https://www.tensorflow.org/code/tensorflow/examples/tutorials/word2vec/word2vec_basic.py).
+[tensorflow/examples/tutorials/word2vec/word2vec_basic.py](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/examples/tutorials/word2vec/word2vec_basic.py).
 
 ## Visualizing the learned embeddings
 
@@ -341,7 +341,7 @@ t-SNE.
 Et voila! As expected, words that are similar end up clustering nearby each
 other. For a more heavyweight implementation of word2vec that showcases more of
 the advanced features of TensorFlow, see the implementation in
-[models/tutorials/embedding/word2vec.py](https://github.com/tensorflow/models/tree/master/research/tutorials/embedding/word2vec.py).
+[models/tutorials/embedding/word2vec.py](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/embedding/word2vec.py).
 
 ## Evaluating embeddings: analogical reasoning
 
@@ -357,7 +357,7 @@ Download the dataset for this task from
 
 To see how we do this evaluation, have a look at the `build_eval_graph()` and
 `eval()` functions in
-[models/tutorials/embedding/word2vec.py](https://github.com/tensorflow/models/tree/master/research/tutorials/embedding/word2vec.py).
+[models/tutorials/embedding/word2vec.py](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/embedding/word2vec.py).
 
 The choice of hyperparameters can strongly influence the accuracy on this task.
 To achieve state-of-the-art performance on this task requires training over a
diff --git a/site/en/r1/tutorials/sequences/audio_recognition.md b/site/en/r1/tutorials/sequences/audio_recognition.md
index 8ad71b88a3c..0388514ec92 100644
--- a/site/en/r1/tutorials/sequences/audio_recognition.md
+++ b/site/en/r1/tutorials/sequences/audio_recognition.md
@@ -159,9 +159,9 @@ accuracy. If the training accuracy increases but the validation doesn't, that's
 a sign that overfitting is occurring, and your model is only learning things
 about the training clips, not broader patterns that generalize.
 
-## Tensorboard
+## TensorBoard
 
-A good way to visualize how the training is progressing is using Tensorboard. By
+A good way to visualize how the training is progressing is using TensorBoard. By
 default, the script saves out events to /tmp/retrain_logs, and you can load
 these by running:
 
diff --git a/site/en/r1/tutorials/sequences/recurrent.md b/site/en/r1/tutorials/sequences/recurrent.md
index 6654795d944..e7c1f8c0b16 100644
--- a/site/en/r1/tutorials/sequences/recurrent.md
+++ b/site/en/r1/tutorials/sequences/recurrent.md
@@ -2,7 +2,7 @@
 
 ## Introduction
 
-See [Understanding LSTM Networks](https://colah.github.io/posts/2015-08-Understanding-LSTMs/){:.external}
+See [Understanding LSTM Networks](https://colah.github.io/posts/2015-08-Understanding-LSTMs/)
 for an introduction to recurrent neural networks and LSTMs.
 
 ## Language Modeling
diff --git a/site/en/r1/tutorials/sequences/recurrent_quickdraw.md b/site/en/r1/tutorials/sequences/recurrent_quickdraw.md
index 435076f629c..d6a85377d17 100644
--- a/site/en/r1/tutorials/sequences/recurrent_quickdraw.md
+++ b/site/en/r1/tutorials/sequences/recurrent_quickdraw.md
@@ -109,7 +109,7 @@ This download will take a while and download a bit more than 23GB of data.
 
 To convert the `ndjson` files to
 [TFRecord](../../api_guides/python/python_io.md#TFRecords_Format_Details) files containing
-[`tf.train.Example`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto)
+[`tf.train.Example`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/example/example.proto)
 protos run the following command.
 
 ```shell
@@ -213,7 +213,7 @@ screen coordinates and normalize the size such that the drawing has unit height.
 
 Finally, we compute the differences between consecutive points and store these
 as a `VarLenFeature` in a
-[tensorflow.Example](https://www.tensorflow.org/code/tensorflow/core/example/example.proto)
+[tensorflow.Example](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/example/example.proto)
 under the key `ink`. In addition we store the `class_index` as a single entry
 `FixedLengthFeature` and the `shape` of the `ink` as a `FixedLengthFeature` of
 length 2.
diff --git a/site/en/r1/tutorials/sequences/text_generation.ipynb b/site/en/r1/tutorials/sequences/text_generation.ipynb
index 5911d1c7673..84d942c8bd0 100644
--- a/site/en/r1/tutorials/sequences/text_generation.ipynb
+++ b/site/en/r1/tutorials/sequences/text_generation.ipynb
@@ -65,7 +65,7 @@
       "source": [
         "> Note: This is an archived TF1 notebook. These are configured\n",
         "to run in TF2's \n",
-        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
+        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
         "but will run in TF1 as well. To use TF1 in Colab, use the\n",
         "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
         "magic."
@@ -77,9 +77,9 @@
         "id": "BwpJ5IffzRG6"
       },
       "source": [
-        "This tutorial demonstrates how to generate text using a character-based RNN. We will work with a dataset of Shakespeare's writing from Andrej Karpathy's [The Unreasonable Effectiveness of Recurrent Neural Networks](http://karpathy.github.io/2015/05/21/rnn-effectiveness/). Given a sequence of characters from this data (\"Shakespear\"), train a model to predict the next character in the sequence (\"e\"). Longer sequences of text can be generated by calling the model repeatedly.\n",
+        "This tutorial demonstrates how to generate text using a character-based RNN. You will work with a dataset of Shakespeare's writing from Andrej Karpathy's [The Unreasonable Effectiveness of Recurrent Neural Networks](http://karpathy.github.io/2015/05/21/rnn-effectiveness/). Given a sequence of characters from this data (\"Shakespear\"), train a model to predict the next character in the sequence (\"e\"). Longer sequences of text can be generated by calling the model repeatedly.\n",
         "\n",
-        "Note: Enable GPU acceleration to execute this notebook faster. In Colab: *Runtime > Change runtime type > Hardware acclerator > GPU*. If running locally make sure TensorFlow version >= 1.11.\n",
+        "Note: Enable GPU acceleration to execute this notebook faster. In Colab: *Runtime > Change runtime type > Hardware accelerator > GPU*. If running locally make sure TensorFlow version >= 1.11.\n",
         "\n",
         "This tutorial includes runnable code implemented using [tf.keras](https://www.tensorflow.org/programmers_guide/keras) and [eager execution](https://www.tensorflow.org/programmers_guide/eager). The following is sample output when the model in this tutorial trained for 30 epochs, and started with the string \"Q\":\n",
         "\n",
@@ -98,7 +98,7 @@
         "To watch the next way with his father with his face?\n",
         "\n",
         "ESCALUS:\n",
-        "The cause why then we are all resolved more sons.\n",
+        "The cause why then us all resolved more sons.\n",
         "\n",
         "VOLUMNIA:\n",
         "O, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, it is no sin it should be dead,\n",
@@ -248,7 +248,7 @@
       "source": [
         "### Vectorize the text\n",
         "\n",
-        "Before training, we need to map strings to a numerical representation. Create two lookup tables: one mapping characters to numbers, and another for numbers to characters."
+        "Before training, you need to map strings to a numerical representation. Create two lookup tables: one mapping characters to numbers, and another for numbers to characters."
       ]
     },
     {
@@ -272,7 +272,7 @@
         "id": "tZfqhkYCymwX"
       },
       "source": [
-        "Now we have an integer representation for each character. Notice that we mapped the character as indexes from 0 to `len(unique)`."
+        "Now you have an integer representation for each character. Notice that you mapped the character as indexes from 0 to `len(unique)`."
       ]
     },
     {
@@ -316,7 +316,7 @@
         "id": "wssHQ1oGymwe"
       },
       "source": [
-        "Given a character, or a sequence of characters, what is the most probable next character? This is the task we're training the model to perform. The input to the model will be a sequence of characters, and we train the model to predict the output—the following character at each time step.\n",
+        "Given a character, or a sequence of characters, what is the most probable next character? This is the task you are training the model to perform. The input to the model will be a sequence of characters, and you train the model to predict the output—the following character at each time step.\n",
         "\n",
         "Since RNNs maintain an internal state that depends on the previously seen elements, given all the characters computed until this moment, what is the next character?\n"
       ]
@@ -346,7 +346,7 @@
       },
       "outputs": [],
       "source": [
-        "# The maximum length sentence we want for a single input in characters\n",
+        "# The maximum length sentence you want for a single input in characters\n",
         "seq_length = 100\n",
         "examples_per_epoch = len(text)//seq_length\n",
         "\n",
@@ -458,7 +458,7 @@
       "source": [
         "### Create training batches\n",
         "\n",
-        "We used `tf.data` to split the text into manageable sequences. But before feeding this data into the model, we need to shuffle the data and pack it into batches."
+        "You used `tf.data` to split the text into manageable sequences. But before feeding this data into the model, you need to shuffle the data and pack it into batches."
       ]
     },
     {
@@ -543,7 +543,7 @@
       },
       "outputs": [],
       "source": [
-        "if tf.test.is_gpu_available():\n",
+        "if tf.config.list_physical_devices('GPU'):\n",
         "  rnn = tf.keras.layers.CuDNNGRU\n",
         "else:\n",
         "  import functools\n",
@@ -650,7 +650,7 @@
         "id": "uwv0gEkURfx1"
       },
       "source": [
-        "To get actual predictions from the model we need to sample from the output distribution, to get actual character indices. This distribution is defined by the logits over the character vocabulary.\n",
+        "To get actual predictions from the model you need to sample from the output distribution, to get actual character indices. This distribution is defined by the logits over the character vocabulary.\n",
         "\n",
         "Note: It is important to _sample_ from this distribution as taking the _argmax_ of the distribution can easily get the model stuck in a loop.\n",
         "\n",
@@ -746,7 +746,7 @@
       "source": [
         "The standard `tf.keras.losses.sparse_categorical_crossentropy` loss function works in this case because it is applied across the last dimension of the predictions.\n",
         "\n",
-        "Because our model returns logits, we need to set the `from_logits` flag.\n"
+        "Because our model returns logits, you need to set the `from_logits` flag.\n"
       ]
     },
     {
@@ -771,7 +771,7 @@
         "id": "jeOXriLcymww"
       },
       "source": [
-        "Configure the training procedure using the `tf.keras.Model.compile` method. We'll use `tf.train.AdamOptimizer` with default arguments and the loss function."
+        "Configure the training procedure using the `tf.keras.Model.compile` method. You'll use `tf.train.AdamOptimizer` with default arguments and the loss function."
       ]
     },
     {
@@ -891,7 +891,7 @@
         "\n",
         "Because of the way the RNN state is passed from timestep to timestep, the model only accepts a fixed batch size once built.\n",
         "\n",
-        "To run the model with a different `batch_size`, we need to rebuild the model and restore the weights from the checkpoint.\n"
+        "To run the model with a different `batch_size`, you need to rebuild the model and restore the weights from the checkpoint.\n"
       ]
     },
     {
@@ -992,7 +992,7 @@
         "      predictions = predictions / temperature\n",
         "      predicted_id = tf.multinomial(predictions, num_samples=1)[-1,0].numpy()\n",
         "\n",
-        "      # We pass the predicted word as the next input to the model\n",
+        "      # You pass the predicted word as the next input to the model\n",
         "      # along with the previous hidden state\n",
         "      input_eval = tf.expand_dims([predicted_id], 0)\n",
         "\n",
@@ -1035,11 +1035,11 @@
         "\n",
         "So now that you've seen how to run the model manually let's unpack the training loop, and implement it ourselves. This gives a starting point, for example, to implement _curriculum learning_ to help stabilize the model's open-loop output.\n",
         "\n",
-        "We will use `tf.GradientTape` to track the gradients. You can learn more about this approach by reading the [eager execution guide](https://www.tensorflow.org/r1/guide/eager).\n",
+        "You will use `tf.GradientTape` to track the gradients. You can learn more about this approach by reading the [eager execution guide](https://www.tensorflow.org/r1/guide/eager).\n",
         "\n",
         "The procedure works as follows:\n",
         "\n",
-        "* First, initialize the RNN state. We do this by calling the `tf.keras.Model.reset_states` method.\n",
+        "* First, initialize the RNN state. You do this by calling the `tf.keras.Model.reset_states` method.\n",
         "\n",
         "* Next, iterate over the dataset (batch by batch) and calculate the *predictions* associated with each.\n",
         "\n",
diff --git a/site/en/swift/README.md b/site/en/swift/README.md
deleted file mode 100644
index 162a81fa7d3..00000000000
--- a/site/en/swift/README.md
+++ /dev/null
@@ -1,6 +0,0 @@
-Welcome to the warp zone!
-
-# Swift for TensorFlow
-
-These docs are available here:
-https://github.com/tensorflow/swift/tree/main/docs/site
diff --git a/site/en/tensorboard/README.md b/site/en/tensorboard/README.md
deleted file mode 100644
index 7e2126c23d4..00000000000
--- a/site/en/tensorboard/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-Welcome to the warp zone!
-
-# Tensorboard
-
-These docs are available here: https://github.com/tensorflow/tensorboard/tree/master/docs
diff --git a/site/en/tfx/README.md b/site/en/tfx/README.md
deleted file mode 100644
index c56ad2dbf01..00000000000
--- a/site/en/tfx/README.md
+++ /dev/null
@@ -1,10 +0,0 @@
-Welcome to the warp zone!
-
-# TensorFlow Extended (TFX)
-
-These docs are available here:
-
-* Data Validation: https://github.com/tensorflow/data-validation/tree/master/g3doc
-* Model Analysis: https://github.com/tensorflow/model-analysis/tree/master/g3doc
-* Transform: https://github.com/tensorflow/transform/tree/master/docs
-* Serving: https://github.com/tensorflow/serving/tree/master/tensorflow_serving/g3doc
diff --git a/site/en/tutorials/_index.yaml b/site/en/tutorials/_index.yaml
index e2fc95aff1f..0d09f04c5c7 100644
--- a/site/en/tutorials/_index.yaml
+++ b/site/en/tutorials/_index.yaml
@@ -16,8 +16,9 @@ landing_page:
     - description: >
         <p>
         The TensorFlow tutorials are written as Jupyter notebooks and run
-        directly in Google&nbsp;Colab—a hosted notebook environment that requires
-        no setup. Click the <em>Run in Google&nbsp;Colab</em> button.
+        directly in Google Colab—a hosted notebook environment that requires
+        no setup. At the top of each tutorial, you'll see a <b>Run in Google Colab</b> button. Click
+        the button to open the notebook and run the code yourself.
         </p>
 
   - classname: devsite-landing-row-100
@@ -84,38 +85,16 @@ landing_page:
   - classname: devsite-landing-row-100
     items:
     - description: >
-        <h2 class="tfo-landing-page-heading no-link">Video and blog updates</h2>
-        Subscribe to the
-        <a href="https://blog.tensorflow.org" class="external">TensorFlow blog</a>,
-        <a href="https://www.youtube.com/tensorflow" class="external">YouTube channel</a>,
-        and <a href="https://twitter.com/tensorflow" class="external">Twitter</a>
-        for the latest updates.
+        <h2 class="tfo-landing-page-heading no-link">Video tutorials</h2>
+        Check out these videos for an introduction to machine learning with TensorFlow:
 
   - items:
-    - heading: "Intro to Machine Learning"
-      path: "https://www.youtube.com/watch?v=KNAWp2S3w94"
+    - heading: "TensorFlow ML Zero to Hero"
+      path: "https://www.youtube.com/watch?v=KNAWp2S3w94&list=PLQY2H8rRoyvwWuPiWnuTDBHe7I0fMSsfO"
       youtube_id: "KNAWp2S3w94?rel=0&show_info=0"
-    - heading: "TensorFlow 2.0 and Keras"
-      path: "https://www.youtube.com/watch?v=wGI_VtE9CJM"
-      youtube_id: "wGI_VtE9CJM?rel=0&show_info=0"
-
-  - classname: devsite-landing-row-cards
-    items:
-    - heading: "Looking Back at 2019"
-      path: https://blog.tensorflow.org/2019/12/looking-back-at-2019.html
-      buttons:
-      - label: "Read on the TensorFlow blog"
-        path: https://blog.tensorflow.org/2019/12/looking-back-at-2019.html
-    - heading: "TensorFlow 2 is now available"
-      path: https://blog.tensorflow.org/2019/09/tensorflow-20-is-now-available.html
-      buttons:
-      - label: "Read on the TensorFlow blog"
-        path: https://blog.tensorflow.org/2019/09/tensorflow-20-is-now-available.html
-    - heading: "Standardizing on Keras: Guidance on High-level APIs in TensorFlow 2"
-      path: https://blog.tensorflow.org/2018/12/standardizing-on-keras-guidance.html
-      buttons:
-      - label: "Read on the TensorFlow blog"
-        path: https://blog.tensorflow.org/2018/12/standardizing-on-keras-guidance.html
+    - heading: "Basic Computer Vision with ML"
+      path: "https://www.youtube.com/watch?v=bemDFpNooA8&list=PLQY2H8rRoyvwWuPiWnuTDBHe7I0fMSsfO"
+      youtube_id: "bemDFpNooA8?rel=0&show_info=0"
 
   - classname: devsite-landing-row-100
     items:
@@ -132,8 +111,8 @@ landing_page:
       - description: >
           <a href="/tensorboard"><h3 class="tfo-landing-page-heading no-link">TensorBoard</h3></a>
           <ul class="tfo-landing-page-resources-ul">
-          <li><a href="/tensorboard/r2/get_started">Get started with TensorBoard</a></li>
-          <li><a href="/tensorboard/r2/scalars_and_keras">Logging training metrics in Keras</a></li>
+          <li><a href="/tensorboard/get_started">Get started with TensorBoard</a></li>
+          <li><a href="/tensorboard/scalars_and_keras">Logging training metrics in Keras</a></li>
           </ul>
         path: /tensorboard
         icon:
@@ -243,7 +222,7 @@ landing_page:
           <a href="/xla"><h3 class="tfo-landing-page-heading no-link">XLA</h3></a>
           <ul class="tfo-landing-page-resources-ul">
           <li><a href="/xla/tutorials/autoclustering_xla">Classifying CIFAR-10 with XLA</a></li>
-          <li><a href="/xla/tutorials/compile">Use XLA with tf.function</a></li>
+          <li><a href="/xla/tutorials/jit_compile">Use XLA with tf.function</a></li>
           </ul>
         path: /xla
         icon:
@@ -295,3 +274,13 @@ landing_page:
           icon_name: chevron_right
           foreground: theme
           background: grey
+
+  - classname: devsite-landing-row-100
+    items:
+    - description: >
+        <h2 class="tfo-landing-page-heading no-link">TensorFlow updates</h2>
+        Subscribe to the
+        <a href="https://blog.tensorflow.org" class="external">TensorFlow blog</a>,
+        <a href="https://www.youtube.com/tensorflow" class="external">YouTube channel</a>,
+        and <a href="https://twitter.com/tensorflow" class="external">Twitter</a>
+        for the latest updates.
diff --git a/site/en/tutorials/_toc.yaml b/site/en/tutorials/_toc.yaml
index 27c1d422823..a3907ffe9a4 100644
--- a/site/en/tutorials/_toc.yaml
+++ b/site/en/tutorials/_toc.yaml
@@ -35,6 +35,9 @@ toc:
   section:
   - title: "Images"
     path: /tutorials/load_data/images
+  - title: "Video"
+    path: /tutorials/load_data/video
+    status: new
   - title: "CSV"
     path: /tutorials/load_data/csv
   - title: "NumPy"
@@ -74,6 +77,12 @@ toc:
   section:
   - title: "Distributed training with Keras"
     path: /tutorials/distribute/keras
+  - title: "Distributed training with DTensors"
+    path: /tutorials/distribute/dtensor_ml_tutorial
+    status: experimental
+  - title: "Using DTensors with Keras"
+    path: /tutorials/distribute/dtensor_keras_tutorial
+    status: experimental
   - title: "Custom training loops"
     path: /tutorials/distribute/custom_training
   - title: "Multi-worker training with Keras"
@@ -88,9 +97,14 @@ toc:
   - title: "Distributed input"
     path: /tutorials/distribute/input
 
-- title: "Images"
+- title: "Vision"
   style: accordion
   section:
+  - title: "Computer vision"
+    path: /tutorials/images
+  - title: "KerasCV"
+    path: https://keras.io/keras_cv/
+    status: external
   - title: "Convolutional Neural Network"
     path: /tutorials/images/cnn
   - title: "Image classification"
@@ -104,31 +118,27 @@ toc:
   - title: "Image segmentation"
     path: /tutorials/images/segmentation
   - title: "Object detection with TF Hub"
-    path: https://github.com/tensorflow/hub/blob/master/examples/colab/tf2_object_detection.ipynb
+    path: /hub/tutorials/tf2_object_detection
     status: external
+  - title: "Video classification"
+    status: new
+    path: /tutorials/video/video_classification
+  - title: "Transfer learning with MoViNet"
+    status: new
+    path: /tutorials/video/transfer_learning_with_movinet
 
 - title: "Text"
   style: accordion
   section:
-  - title: "Word embeddings"
-    path: /text/guide/word_embeddings
-    status: external
-  - title: "Word2Vec"
-    path: /tutorials/text/word2vec
-  - title: "Text classification with an RNN"
-    path: /text/tutorials/text_classification_rnn
-    status: external
-  - title: "Classify Text with BERT"
-    path: /text/tutorials/classify_text_with_bert
-    status: external
-  - title: "Solve GLUE tasks using BERT on TPU"
-    path: /text/tutorials/bert_glue
+  - title: "Text and natural language processing"
+    path: /tutorials/text/index
+  - title: "Get started with KerasNLP"
+    path: https://keras.io/guides/keras_nlp/getting_started/
     status: external
-  - title: "Neural machine translation with attention"
-    path: /text/tutorials/nmt_with_attention
+  - title: "Text and NLP guide"
+    path: /text
     status: external
-  - title: "Image captioning"
-    path: /tutorials/text/image_captioning
+
 - title: "Audio"
   style: accordion
   section:
@@ -136,10 +146,8 @@ toc:
     path: /tutorials/audio/simple_audio
   - title: "Transfer learning for audio recognition"
     path: /tutorials/audio/transfer_learning_audio
-    status: new
   - title: "Generate music with an RNN"
     path: /tutorials/audio/music_generation
-    status: new
 
 - title: "Structured data"
   style: accordion
@@ -160,6 +168,9 @@ toc:
 - title: "Generative"
   style: accordion
   section:
+  - title: "Stable Diffusion"
+    status: new
+    path: /tutorials/generative/generate_images_with_stable_diffusion
   - title: "Neural style transfer"
     path: /tutorials/generative/style_transfer
   - title: "DeepDream"
@@ -176,6 +187,17 @@ toc:
     path: /tutorials/generative/autoencoder
   - title: "Variational Autoencoder"
     path: /tutorials/generative/cvae
+  - title: "Lossy data compression"
+    path: /tutorials/generative/data_compression
+
+- title: "Model optimization"
+  style: accordion
+  section:
+  - title: "Scalable model compression with EPR"
+    path: /tutorials/optimization/compression
+  - title: "TensorFlow model optimization"
+    status: external
+    path: /model_optimization
 
 - title: "Model Understanding"
   style: accordion
@@ -184,9 +206,10 @@ toc:
     path: /tutorials/interpretability/integrated_gradients
   - title: "Uncertainty quantification with SNGP"
     path: /tutorials/understanding/sngp
-  - title: "Probabalistic regression"
+  - title: "Probabilistic regression"
     path: /probability/examples/Probabilistic_Layers_Regression
     status: external
+
 - title: "Reinforcement learning"
   style: accordion
   section:
@@ -198,15 +221,12 @@ toc:
 
 - title: "tf.Estimator"
   style: accordion
+  status: deprecated
   section:
   - title: "Premade estimator"
     path: /tutorials/estimator/premade
   - title: "Linear model"
     path: /tutorials/estimator/linear
-  - title: "Boosted trees"
-    path: /tutorials/estimator/boosted_trees
-  - title: "Boosted trees model understanding"
-    path: /tutorials/estimator/boosted_trees_model_understanding
   - title: "Keras model to Estimator"
     path: /tutorials/estimator/keras_model_to_estimator
   - title: "Multi-worker training with Estimator"
diff --git a/site/en/tutorials/audio/music_generation.ipynb b/site/en/tutorials/audio/music_generation.ipynb
index 89802d0447b..e1423ef7cf2 100644
--- a/site/en/tutorials/audio/music_generation.ipynb
+++ b/site/en/tutorials/audio/music_generation.ipynb
@@ -68,9 +68,9 @@
         "id": "hr78EkAY-FFg"
       },
       "source": [
-        "This tutorial shows you how to generate musical notes using a simple RNN. You will train a model using a collection of piano MIDI files from the [MAESTRO dataset](https://magenta.tensorflow.org/datasets/maestro). Given a sequence of notes, your model will learn to predict the next note in the sequence. You can generate a longer sequences of notes by calling the model repeatedly.\n",
+        "This tutorial shows you how to generate musical notes using a simple recurrent neural network (RNN). You will train a model using a collection of piano MIDI files from the [MAESTRO dataset](https://magenta.tensorflow.org/datasets/maestro). Given a sequence of notes, your model will learn to predict the next note in the sequence. You can generate longer sequences of notes by calling the model repeatedly.\n",
         "\n",
-        "This tutorial contains complete code to parse and create MIDI files. You can learn more about how RNNs work by visiting [Text generation with an RNN](https://www.tensorflow.org/text/tutorials/text_generation)."
+        "This tutorial contains complete code to parse and create MIDI files. You can learn more about how RNNs work by visiting the [Text generation with an RNN](https://www.tensorflow.org/text/tutorials/text_generation) tutorial."
       ]
     },
     {
@@ -145,7 +145,7 @@
         "\n",
         "from IPython import display\n",
         "from matplotlib import pyplot as plt\n",
-        "from typing import Dict, List, Optional, Sequence, Tuple"
+        "from typing import Optional"
       ]
     },
     {
@@ -680,7 +680,7 @@
         "id": "xIBLvj-cODWS"
       },
       "source": [
-        "Next, create a [tf.data.Dataset](https://www.tensorflow.org/datasets) from the parsed notes."
+        "Next, create a `tf.data.Dataset` from the parsed notes."
       ]
     },
     {
@@ -713,7 +713,7 @@
         "id": "Sj9SXRCjt3I7"
       },
       "source": [
-        "You will train the model on batches of sequences of notes. Each example will consist of a sequence of notes as the input features, and next note as the label. In this way, the model will be trained to predict the next note in a sequence. You can find a diagram explaining this process (and more details) in [Text classification with an RNN](https://www.tensorflow.org/text/tutorials/text_generation).\n",
+        "You will train the model on batches of sequences of notes. Each example will consist of a sequence of notes as the input features, and the next note as the label. In this way, the model will be trained to predict the next note in a sequence. You can find a diagram describing this process (and more details) in [Text classification with an RNN](https://www.tensorflow.org/text/tutorials/text_generation).\n",
         "\n",
         "You can use the handy [window](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#window) function with size `seq_length` to create the features and labels in this format."
       ]
@@ -857,7 +857,7 @@
         "id": "iGQn32q-hdK2"
       },
       "source": [
-        "The model will have three outputs, one for each note variable. For `pitch` and `duration`, you will use a custom loss function based on mean squared error that encourages the model to output non-negative values."
+        "The model will have three outputs, one for each note variable. For `step` and `duration`, you will use a custom loss function based on mean squared error that encourages the model to output non-negative values."
       ]
     },
     {
@@ -1056,7 +1056,7 @@
       "source": [
         "To use the model to generate notes, you will first need to provide a starting sequence of notes. The function below generates one note from a sequence of notes. \n",
         "\n",
-        "For note pitch, it draws a sample from softmax distribution of notes produced by the model, and does not simply pick the note with the highest probability.\n",
+        "For note pitch, it draws a sample from the softmax distribution of notes produced by the model, and does not simply pick the note with the highest probability.\n",
         "Always picking the note with the highest probability would lead to repetitive sequences of notes being generated.\n",
         "\n",
         "The `temperature` parameter can be used to control the randomness of notes generated. You can find more details on temperature in [Text generation with an RNN](https://www.tensorflow.org/text/tutorials/text_generation)."
@@ -1072,9 +1072,9 @@
       "source": [
         "def predict_next_note(\n",
         "    notes: np.ndarray, \n",
-        "    keras_model: tf.keras.Model, \n",
-        "    temperature: float = 1.0) -> int:\n",
-        "  \"\"\"Generates a note IDs using a trained sequence model.\"\"\"\n",
+        "    model: tf.keras.Model, \n",
+        "    temperature: float = 1.0) -> tuple[int, float, float]:\n",
+        "  \"\"\"Generates a note as a tuple of (pitch, step, duration), using a trained sequence model.\"\"\"\n",
         "\n",
         "  assert temperature > 0\n",
         "\n",
@@ -1229,9 +1229,8 @@
       "source": [
         "In the above plots, you will notice the change in distribution of the note variables.\n",
         "Since there is a feedback loop between the model's outputs and inputs, the model tends to generate similar sequences of outputs to reduce the loss. \n",
-        "This is particularly relevant for `step` and `duration`, which has uses MSE loss.\n",
-        "For `pitch`, you can increase the randomness by increasing the `temperature` in `predict_next_note`.\n",
-        "\n"
+        "This is particularly relevant for `step` and `duration`, which uses the MSE loss.\n",
+        "For `pitch`, you can increase the randomness by increasing the `temperature` in `predict_next_note`.\n"
       ]
     },
     {
@@ -1244,7 +1243,7 @@
         "\n",
         "This tutorial demonstrated the mechanics of using an RNN to generate sequences of notes from a dataset of MIDI files. To learn more, you can visit the closely related [Text generation with an RNN](https://www.tensorflow.org/text/tutorials/text_generation) tutorial, which contains additional diagrams and explanations. \n",
         "\n",
-        "An alternative to using RNNs for music generation is using GANs. Rather than generating audio, a GAN-based approach can generate a entire sequence in parallel. The Magenta team has done impressive work on this approach with [GANSynth](https://magenta.tensorflow.org/gansynth). You can also find many wonderful music and art projects and open-source code on [Magenta project website](https://magenta.tensorflow.org/)."
+        "One of the alternatives to using RNNs for music generation is using GANs. Rather than generating audio, a GAN-based approach can generate an entire sequence in parallel. The Magenta team has done impressive work on this approach with [GANSynth](https://magenta.tensorflow.org/gansynth). You can also find many wonderful music and art projects and open-source code on [Magenta project website](https://magenta.tensorflow.org/)."
       ]
     }
   ],
@@ -1253,7 +1252,6 @@
     "colab": {
       "collapsed_sections": [],
       "name": "music_generation.ipynb",
-      "provenance": [],
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/tutorials/audio/simple_audio.ipynb b/site/en/tutorials/audio/simple_audio.ipynb
index 3d208668d4e..9d79742fbb7 100644
--- a/site/en/tutorials/audio/simple_audio.ipynb
+++ b/site/en/tutorials/audio/simple_audio.ipynb
@@ -74,7 +74,9 @@
         "id": "SPfDNFlb66XF"
       },
       "source": [
-        "This tutorial will show you how to build a basic speech recognition network that recognizes ten different words. It's important to know that real speech and audio recognition systems are much more complex, but like MNIST for images, it should give you a basic understanding of the techniques involved. Once you've completed this tutorial, you'll have a model that tries to classify a one second audio clip as \"down\", \"go\", \"left\", \"no\", \"right\", \"stop\", \"up\" and \"yes\"."
+        "This tutorial demonstrates how to preprocess audio files in the WAV format and build and train a basic [automatic speech recognition](https://en.wikipedia.org/wiki/Speech_recognition) (ASR) model for recognizing ten different words. You will use a portion of the [Speech Commands dataset](https://www.tensorflow.org/datasets/catalog/speech_commands) ([Warden, 2018](https://arxiv.org/abs/1804.03209)), which contains short (one-second or less) audio clips of commands, such as \"down\", \"go\", \"left\", \"no\", \"right\", \"stop\", \"up\" and \"yes\".\n",
+        "\n",
+        "Real-world speech and audio recognition [systems](https://ai.googleblog.com/search/label/Speech%20Recognition) are complex. But, like [image classification with the MNIST dataset](../quickstart/beginner.ipynb), this tutorial should give you a basic understanding of the techniques involved."
       ]
     },
     {
@@ -85,7 +87,18 @@
       "source": [
         "## Setup\n",
         "\n",
-        "Import necessary modules and dependencies."
+        "Import necessary modules and dependencies. You'll be using `tf.keras.utils.audio_dataset_from_directory` (introduced in TensorFlow 2.10), which helps generate audio classification datasets from directories of `.wav` files. You'll also need [seaborn](https://seaborn.pydata.org) for visualization in this tutorial."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "hhNW45sjDEDe"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install -U -q tensorflow tensorflow_datasets"
       ]
     },
     {
@@ -104,13 +117,11 @@
         "import seaborn as sns\n",
         "import tensorflow as tf\n",
         "\n",
-        "from tensorflow.keras.layers.experimental import preprocessing\n",
         "from tensorflow.keras import layers\n",
         "from tensorflow.keras import models\n",
         "from IPython import display\n",
         "\n",
-        "\n",
-        "# Set seed for experiment reproducibility\n",
+        "# Set the seed value for experiment reproducibility.\n",
         "seed = 42\n",
         "tf.random.set_seed(seed)\n",
         "np.random.seed(seed)"
@@ -122,11 +133,11 @@
         "id": "yR0EdgrLCaWR"
       },
       "source": [
-        "## Import the Speech Commands dataset\n",
+        "## Import the mini Speech Commands dataset\n",
         "\n",
-        "You'll write a script to download a portion of the [Speech Commands dataset](https://www.tensorflow.org/datasets/catalog/speech_commands). The original dataset consists of over 105,000 WAV audio files of people saying thirty different words. This data was collected by Google and released under a CC BY license.\n",
+        "To save time with data loading, you will be working with a smaller version of the Speech Commands dataset. The [original dataset](https://www.tensorflow.org/datasets/catalog/speech_commands) consists of over 105,000 audio files in the [WAV (Waveform) audio file format](https://www.aelius.com/njh/wavemetatools/doc/riffmci.pdf) of people saying 35 different words. This data was collected by Google and released under a CC BY license.\n",
         "\n",
-        "You'll be using a portion of the dataset to save time with data loading. Extract the `mini_speech_commands.zip` and load it in using the `tf.data` API."
+        "Download and extract the `mini_speech_commands.zip` file containing the smaller Speech Commands datasets with `tf.keras.utils.get_file`:"
       ]
     },
     {
@@ -137,7 +148,9 @@
       },
       "outputs": [],
       "source": [
-        "data_dir = pathlib.Path('data/mini_speech_commands')\n",
+        "DATASET_PATH = 'data/mini_speech_commands'\n",
+        "\n",
+        "data_dir = pathlib.Path(DATASET_PATH)\n",
         "if not data_dir.exists():\n",
         "  tf.keras.utils.get_file(\n",
         "      'mini_speech_commands.zip',\n",
@@ -152,7 +165,7 @@
         "id": "BgvFq3uYiS5G"
       },
       "source": [
-        "Check basic statistics about the dataset."
+        "The dataset's audio clips are stored in eight folders corresponding to each speech command: `no`, `yes`, `down`, `go`, `left`, `up`, `right`, and `stop`:"
       ]
     },
     {
@@ -164,178 +177,140 @@
       "outputs": [],
       "source": [
         "commands = np.array(tf.io.gfile.listdir(str(data_dir)))\n",
-        "commands = commands[commands != 'README.md']\n",
+        "commands = commands[(commands != 'README.md') & (commands != '.DS_Store')]\n",
         "print('Commands:', commands)"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "aMvdU9SY8WXN"
+        "id": "TZ7GJjDvHqtt"
       },
       "source": [
-        "Extract the audio files into a list and shuffle it."
+        "Divided into directories this way, you can easily load the data using `keras.utils.audio_dataset_from_directory`. \n",
+        "\n",
+        "The audio clips are 1 second or less at 16kHz. The `output_sequence_length=16000` pads the short ones to exactly 1 second (and would trim longer ones) so that they can be easily batched."
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "hlX685l1wD9k"
+        "id": "mFM4c3aMC8Qv"
       },
       "outputs": [],
       "source": [
-        "filenames = tf.io.gfile.glob(str(data_dir) + '/*/*')\n",
-        "filenames = tf.random.shuffle(filenames)\n",
-        "num_samples = len(filenames)\n",
-        "print('Number of total examples:', num_samples)\n",
-        "print('Number of examples per label:',\n",
-        "      len(tf.io.gfile.listdir(str(data_dir/commands[0]))))\n",
-        "print('Example file tensor:', filenames[0])"
+        "train_ds, val_ds = tf.keras.utils.audio_dataset_from_directory(\n",
+        "    directory=data_dir,\n",
+        "    batch_size=64,\n",
+        "    validation_split=0.2,\n",
+        "    seed=0,\n",
+        "    output_sequence_length=16000,\n",
+        "    subset='both')\n",
+        "\n",
+        "label_names = np.array(train_ds.class_names)\n",
+        "print()\n",
+        "print(\"label names:\", label_names)"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "9vK3ymy23MCP"
+        "id": "cestp83qFnU5"
       },
       "source": [
-        "Split the files into training, validation and test sets using a 80:10:10 ratio, respectively."
+        "The dataset now contains batches of audio clips and integer labels. The audio clips have a shape of `(batch, samples, channels)`. "
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "Cv_wts-l3KgD"
+        "id": "3yU6SQGIFb3H"
       },
       "outputs": [],
       "source": [
-        "train_files = filenames[:6400]\n",
-        "val_files = filenames[6400: 6400 + 800]\n",
-        "test_files = filenames[-800:]\n",
-        "\n",
-        "print('Training set size', len(train_files))\n",
-        "print('Validation set size', len(val_files))\n",
-        "print('Test set size', len(test_files))"
+        "train_ds.element_spec"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "g2Cj9FyvfweD"
+        "id": "ppG9Dgq2Ex8R"
       },
       "source": [
-        "## Reading audio files and their labels"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "j1zjcWteOcBy"
-      },
-      "source": [
-        "The audio file will initially be read as a binary file, which you'll want to convert into a numerical tensor.\n",
-        "\n",
-        "To load an audio file, you will use [`tf.audio.decode_wav`](https://www.tensorflow.org/api_docs/python/tf/audio/decode_wav), which returns the WAV-encoded audio as a Tensor and the sample rate.\n",
-        "\n",
-        "A WAV file contains time series data with a set number of samples per second. \n",
-        "Each sample represents the amplitude of the audio signal at that specific time. In a 16-bit system, like the files in `mini_speech_commands`, the values range from -32768 to 32767. \n",
-        "The sample rate for this dataset is 16kHz.\n",
-        "Note that `tf.audio.decode_wav` will normalize the values to the range [-1.0, 1.0]."
+        "This dataset only contains single channel audio, so use the `tf.squeeze` function to drop the extra axis:"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "9PjJ2iXYwftD"
+        "id": "Xl-tnniUIBlM"
       },
       "outputs": [],
       "source": [
-        "def decode_audio(audio_binary):\n",
-        "  audio, _ = tf.audio.decode_wav(audio_binary)\n",
-        "  return tf.squeeze(audio, axis=-1)"
+        "def squeeze(audio, labels):\n",
+        "  audio = tf.squeeze(audio, axis=-1)\n",
+        "  return audio, labels\n",
+        "\n",
+        "train_ds = train_ds.map(squeeze, tf.data.AUTOTUNE)\n",
+        "val_ds = val_ds.map(squeeze, tf.data.AUTOTUNE)"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "GPQseZElOjVN"
+        "id": "DtsCSWZN5ILv"
       },
       "source": [
-        "The label for each WAV file is its parent directory."
+        "The `utils.audio_dataset_from_directory` function only returns up to two splits. It's a good idea to keep a test set separate from your validation set.\n",
+        "Ideally you'd keep it in a separate directory, but in this case you can use `Dataset.shard` to split the validation set into two halves. Note that iterating over **any** shard will load **all** the data, and only keep its fraction. "
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "8VTtX1nr3YT-"
+        "id": "u5UEGsqM5Gss"
       },
       "outputs": [],
       "source": [
-        "def get_label(file_path):\n",
-        "  parts = tf.strings.split(file_path, os.path.sep)\n",
-        "\n",
-        "  # Note: You'll use indexing here instead of tuple unpacking to enable this \n",
-        "  # to work in a TensorFlow graph.\n",
-        "  return parts[-2] "
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "E8Y9w_5MOsr-"
-      },
-      "source": [
-        "Let's define a method that will take in the filename of the WAV file and output a tuple containing the audio and labels for supervised training."
+        "test_ds = val_ds.shard(num_shards=2, index=0)\n",
+        "val_ds = val_ds.shard(num_shards=2, index=1)"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "WdgUD5T93NyT"
+        "id": "xIeoJcwJH5h9"
       },
       "outputs": [],
       "source": [
-        "def get_waveform_and_label(file_path):\n",
-        "  label = get_label(file_path)\n",
-        "  audio_binary = tf.io.read_file(file_path)\n",
-        "  waveform = decode_audio(audio_binary)\n",
-        "  return waveform, label"
+        "for example_audio, example_labels in train_ds.take(1):  \n",
+        "  print(example_audio.shape)\n",
+        "  print(example_labels.shape)"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "nvN8W_dDjYjc"
+        "id": "voxGEwvuh2L7"
       },
       "source": [
-        "You will now apply `process_path` to build your training set to extract the audio-label pairs and check the results. You'll build the validation and test sets using a similar procedure later on."
+        "Let's plot a few audio waveforms:"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "0SQl8yXl3kNP"
+        "id": "dYtGq2zYNHuT"
       },
       "outputs": [],
       "source": [
-        "AUTOTUNE = tf.data.AUTOTUNE\n",
-        "files_ds = tf.data.Dataset.from_tensor_slices(train_files)\n",
-        "waveform_ds = files_ds.map(get_waveform_and_label, num_parallel_calls=AUTOTUNE)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "voxGEwvuh2L7"
-      },
-      "source": [
-        "Let's examine a few audio waveforms with their corresponding labels."
+        "label_names[[1,1,3,0]]"
       ]
     },
     {
@@ -346,20 +321,17 @@
       },
       "outputs": [],
       "source": [
+        "plt.figure(figsize=(16, 10))\n",
         "rows = 3\n",
         "cols = 3\n",
-        "n = rows*cols\n",
-        "fig, axes = plt.subplots(rows, cols, figsize=(10, 12))\n",
-        "for i, (audio, label) in enumerate(waveform_ds.take(n)):\n",
-        "  r = i // cols\n",
-        "  c = i % cols\n",
-        "  ax = axes[r][c]\n",
-        "  ax.plot(audio.numpy())\n",
-        "  ax.set_yticks(np.arange(-1.2, 1.2, 0.2))\n",
-        "  label = label.numpy().decode('utf-8')\n",
-        "  ax.set_title(label)\n",
-        "\n",
-        "plt.show()"
+        "n = rows * cols\n",
+        "for i in range(n):\n",
+        "  plt.subplot(rows, cols, i+1)\n",
+        "  audio_signal = example_audio[i]\n",
+        "  plt.plot(audio_signal)\n",
+        "  plt.title(label_names[example_labels[i]])\n",
+        "  plt.yticks(np.arange(-1.2, 1.2, 0.2))\n",
+        "  plt.ylim([-1.1, 1.1])"
       ]
     },
     {
@@ -368,17 +340,17 @@
         "id": "EWXPphxm0B4m"
       },
       "source": [
-        "## Spectrogram\n",
+        "## Convert waveforms to spectrograms\n",
         "\n",
-        "You'll convert the waveform into a spectrogram, which shows frequency changes over time and can be represented as a 2D image. This can be done by applying the short-time Fourier transform (STFT) to convert the audio into the time-frequency domain.\n",
+        "The waveforms in the dataset are represented in the time domain. Next, you'll transform the waveforms from the time-domain signals into the time-frequency-domain signals by computing the [short-time Fourier transform (STFT)](https://en.wikipedia.org/wiki/Short-time_Fourier_transform) to convert the waveforms to as [spectrograms](https://en.wikipedia.org/wiki/Spectrogram), which show frequency changes over time and can be represented as 2D images. You will feed the spectrogram images into your neural network to train the model.\n",
         "\n",
-        "A Fourier transform ([`tf.signal.fft`](https://www.tensorflow.org/api_docs/python/tf/signal/fft)) converts a signal to its component frequencies, but loses all time information. The STFT ([`tf.signal.stft`](https://www.tensorflow.org/api_docs/python/tf/signal/stft)) splits the signal into windows of time and runs a Fourier transform on each window, preserving some time information, and returning a 2D tensor that you can run standard convolutions on.\n",
+        "A Fourier transform (`tf.signal.fft`) converts a signal to its component frequencies, but loses all time information. In comparison, STFT (`tf.signal.stft`) splits the signal into windows of time and runs a Fourier transform on each window, preserving some time information, and returning a 2D tensor that you can run standard convolutions on.\n",
         "\n",
-        "STFT produces an array of complex numbers representing magnitude and phase. However, you'll only need the magnitude for this tutorial, which can be derived by applying `tf.abs` on the output of `tf.signal.stft`. \n",
+        "Create a utility function for converting waveforms to spectrograms:\n",
         "\n",
-        "Choose `frame_length` and `frame_step` parameters such that the generated spectrogram \"image\" is almost square. For more information on STFT parameters choice, you can refer to [this video](https://www.coursera.org/lecture/audio-signal-processing/stft-2-tjEQe) on audio signal processing. \n",
-        "\n",
-        "You also want the waveforms to have the same length, so that when you convert it to a spectrogram image, the results will have similar dimensions. This can be done by simply zero padding the audio clips that are shorter than one second.\n"
+        "- The waveforms need to be of the same length, so that when you convert them to spectrograms, the results have similar dimensions. This can be done by simply zero-padding the audio clips that are shorter than one second (using `tf.zeros`).\n",
+        "- When calling `tf.signal.stft`, choose the `frame_length` and `frame_step` parameters such that the generated spectrogram \"image\" is almost square. For more information on the STFT parameters choice, refer to [this Coursera video](https://www.coursera.org/lecture/audio-signal-processing/stft-2-tjEQe) on audio signal processing and STFT.\n",
+        "- The STFT produces an array of complex numbers representing magnitude and phase. However, in this tutorial you'll only use the magnitude, which you can derive by applying `tf.abs` on the output of `tf.signal.stft`."
       ]
     },
     {
@@ -390,18 +362,15 @@
       "outputs": [],
       "source": [
         "def get_spectrogram(waveform):\n",
-        "  # Padding for files with less than 16000 samples\n",
-        "  zero_padding = tf.zeros([16000] - tf.shape(waveform), dtype=tf.float32)\n",
-        "\n",
-        "  # Concatenate audio with padding so that all audio clips will be of the \n",
-        "  # same length\n",
-        "  waveform = tf.cast(waveform, tf.float32)\n",
-        "  equal_length = tf.concat([waveform, zero_padding], 0)\n",
+        "  # Convert the waveform to a spectrogram via a STFT.\n",
         "  spectrogram = tf.signal.stft(\n",
-        "      equal_length, frame_length=255, frame_step=128)\n",
-        "      \n",
+        "      waveform, frame_length=255, frame_step=128)\n",
+        "  # Obtain the magnitude of the STFT.\n",
         "  spectrogram = tf.abs(spectrogram)\n",
-        "\n",
+        "  # Add a `channels` dimension, so that the spectrogram can be used\n",
+        "  # as image-like input data with convolution layers (which expect\n",
+        "  # shape (`batch_size`, `height`, `width`, `channels`).\n",
+        "  spectrogram = spectrogram[..., tf.newaxis]\n",
         "  return spectrogram"
       ]
     },
@@ -411,7 +380,7 @@
         "id": "5rdPiPYJphs2"
       },
       "source": [
-        "Next, you will explore the data. Compare the waveform, the spectrogram and the actual audio of one example from the dataset."
+        "Next, start exploring the data. Print the shapes of one example's tensorized waveform and the corresponding spectrogram, and play the original audio:"
       ]
     },
     {
@@ -422,15 +391,25 @@
       },
       "outputs": [],
       "source": [
-        "for waveform, label in waveform_ds.take(1):\n",
-        "  label = label.numpy().decode('utf-8')\n",
+        "for i in range(3):\n",
+        "  label = label_names[example_labels[i]]\n",
+        "  waveform = example_audio[i]\n",
         "  spectrogram = get_spectrogram(waveform)\n",
         "\n",
-        "print('Label:', label)\n",
-        "print('Waveform shape:', waveform.shape)\n",
-        "print('Spectrogram shape:', spectrogram.shape)\n",
-        "print('Audio playback')\n",
-        "display.display(display.Audio(waveform, rate=16000))"
+        "  print('Label:', label)\n",
+        "  print('Waveform shape:', waveform.shape)\n",
+        "  print('Spectrogram shape:', spectrogram.shape)\n",
+        "  print('Audio playback')\n",
+        "  display.display(display.Audio(waveform, rate=16000))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "xnSuqyxJ1isF"
+      },
+      "source": [
+        "Now, define a function for displaying a spectrogram:"
       ]
     },
     {
@@ -442,154 +421,136 @@
       "outputs": [],
       "source": [
         "def plot_spectrogram(spectrogram, ax):\n",
-        "  # Convert to frequencies to log scale and transpose so that the time is\n",
-        "  # represented in the x-axis (columns). An epsilon is added to avoid log of zero.\n",
-        "  log_spec = np.log(spectrogram.T+np.finfo(float).eps)\n",
+        "  if len(spectrogram.shape) > 2:\n",
+        "    assert len(spectrogram.shape) == 3\n",
+        "    spectrogram = np.squeeze(spectrogram, axis=-1)\n",
+        "  # Convert the frequencies to log scale and transpose, so that the time is\n",
+        "  # represented on the x-axis (columns).\n",
+        "  # Add an epsilon to avoid taking a log of zero.\n",
+        "  log_spec = np.log(spectrogram.T + np.finfo(float).eps)\n",
         "  height = log_spec.shape[0]\n",
         "  width = log_spec.shape[1]\n",
         "  X = np.linspace(0, np.size(spectrogram), num=width, dtype=int)\n",
         "  Y = range(height)\n",
-        "  ax.pcolormesh(X, Y, log_spec)\n",
-        "\n",
-        "\n",
-        "fig, axes = plt.subplots(2, figsize=(12, 8))\n",
-        "timescale = np.arange(waveform.shape[0])\n",
-        "axes[0].plot(timescale, waveform.numpy())\n",
-        "axes[0].set_title('Waveform')\n",
-        "axes[0].set_xlim([0, 16000])\n",
-        "plot_spectrogram(spectrogram.numpy(), axes[1])\n",
-        "axes[1].set_title('Spectrogram')\n",
-        "plt.show()"
+        "  ax.pcolormesh(X, Y, log_spec)"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "GyYXjW07jCHA"
+        "id": "baa5c91e8603"
       },
       "source": [
-        "Now transform the waveform dataset to have spectrogram images and their corresponding labels as integer IDs."
+        "Plot the example's waveform over time and the corresponding spectrogram (frequencies over time):"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "43IS2IouEV40"
+        "id": "d2_CikgY1tjv"
       },
       "outputs": [],
       "source": [
-        "def get_spectrogram_and_label_id(audio, label):\n",
-        "  spectrogram = get_spectrogram(audio)\n",
-        "  spectrogram = tf.expand_dims(spectrogram, -1)\n",
-        "  label_id = tf.argmax(label == commands)\n",
-        "  return spectrogram, label_id"
+        "fig, axes = plt.subplots(2, figsize=(12, 8))\n",
+        "timescale = np.arange(waveform.shape[0])\n",
+        "axes[0].plot(timescale, waveform.numpy())\n",
+        "axes[0].set_title('Waveform')\n",
+        "axes[0].set_xlim([0, 16000])\n",
+        "\n",
+        "plot_spectrogram(spectrogram.numpy(), axes[1])\n",
+        "axes[1].set_title('Spectrogram')\n",
+        "plt.suptitle(label.title())\n",
+        "plt.show()"
       ]
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
+      "cell_type": "markdown",
       "metadata": {
-        "id": "yEVb_oK0oBLQ"
+        "id": "GyYXjW07jCHA"
       },
-      "outputs": [],
       "source": [
-        "spectrogram_ds = waveform_ds.map(\n",
-        "    get_spectrogram_and_label_id, num_parallel_calls=AUTOTUNE)"
+        "Now, create spectrogram datasets from the audio datasets:"
       ]
     },
     {
-      "cell_type": "markdown",
+      "cell_type": "code",
+      "execution_count": null,
       "metadata": {
-        "id": "6gQpAAgMnyDi"
+        "id": "mAD0LpkgqtQo"
       },
+      "outputs": [],
       "source": [
-        "Examine the spectrogram \"images\" for different samples of the dataset."
+        "def make_spec_ds(ds):\n",
+        "  return ds.map(\n",
+        "      map_func=lambda audio,label: (get_spectrogram(audio), label),\n",
+        "      num_parallel_calls=tf.data.AUTOTUNE)"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "QUbHfTuon4iF"
+        "id": "yEVb_oK0oBLQ"
       },
       "outputs": [],
       "source": [
-        "rows = 3\n",
-        "cols = 3\n",
-        "n = rows*cols\n",
-        "fig, axes = plt.subplots(rows, cols, figsize=(10, 10))\n",
-        "for i, (spectrogram, label_id) in enumerate(spectrogram_ds.take(n)):\n",
-        "  r = i // cols\n",
-        "  c = i % cols\n",
-        "  ax = axes[r][c]\n",
-        "  plot_spectrogram(np.squeeze(spectrogram.numpy()), ax)\n",
-        "  ax.set_title(commands[label_id.numpy()])\n",
-        "  ax.axis('off')\n",
-        "  \n",
-        "plt.show()"
+        "train_spectrogram_ds = make_spec_ds(train_ds)\n",
+        "val_spectrogram_ds = make_spec_ds(val_ds)\n",
+        "test_spectrogram_ds = make_spec_ds(test_ds)"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "z5KdY8IF8rkt"
+        "id": "6gQpAAgMnyDi"
       },
       "source": [
-        "## Build and train the model\n",
-        "\n",
-        "Now you can build and train your model. But before you do that, you'll need to repeat the training set preprocessing on the validation and test sets."
+        "Examine the spectrograms for different examples of the dataset:"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "10UI32QH_45b"
+        "id": "EaM2q5aGis-d"
       },
       "outputs": [],
       "source": [
-        "def preprocess_dataset(files):\n",
-        "  files_ds = tf.data.Dataset.from_tensor_slices(files)\n",
-        "  output_ds = files_ds.map(get_waveform_and_label, num_parallel_calls=AUTOTUNE)\n",
-        "  output_ds = output_ds.map(\n",
-        "      get_spectrogram_and_label_id,  num_parallel_calls=AUTOTUNE)\n",
-        "  return output_ds"
+        "for example_spectrograms, example_spect_labels in train_spectrogram_ds.take(1):\n",
+        "  break"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "HNv4xwYkB2P6"
+        "id": "QUbHfTuon4iF"
       },
       "outputs": [],
       "source": [
-        "train_ds = spectrogram_ds\n",
-        "val_ds = preprocess_dataset(val_files)\n",
-        "test_ds = preprocess_dataset(test_files)"
+        "rows = 3\n",
+        "cols = 3\n",
+        "n = rows*cols\n",
+        "fig, axes = plt.subplots(rows, cols, figsize=(16, 9))\n",
+        "\n",
+        "for i in range(n):\n",
+        "    r = i // cols\n",
+        "    c = i % cols\n",
+        "    ax = axes[r][c]\n",
+        "    plot_spectrogram(example_spectrograms[i].numpy(), ax)\n",
+        "    ax.set_title(label_names[example_spect_labels[i].numpy()])\n",
+        "\n",
+        "plt.show()"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "assnWo6SB3lR"
-      },
-      "source": [
-        "Batch the training and validation sets for model training."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "UgY9WYzn61EX"
+        "id": "z5KdY8IF8rkt"
       },
-      "outputs": [],
       "source": [
-        "batch_size = 64\n",
-        "train_ds = train_ds.batch(batch_size)\n",
-        "val_ds = val_ds.batch(batch_size)"
+        "## Build and train the model"
       ]
     },
     {
@@ -598,7 +559,7 @@
         "id": "GS1uIh6F_TN9"
       },
       "source": [
-        "Add dataset [`cache()`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#cache) and [`prefetch()`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#prefetch) operations to reduce read latency while training the model."
+        "Add `Dataset.cache` and `Dataset.prefetch` operations to reduce read latency while training the model:"
       ]
     },
     {
@@ -609,8 +570,9 @@
       },
       "outputs": [],
       "source": [
-        "train_ds = train_ds.cache().prefetch(AUTOTUNE)\n",
-        "val_ds = val_ds.cache().prefetch(AUTOTUNE)"
+        "train_spectrogram_ds = train_spectrogram_ds.cache().shuffle(10000).prefetch(tf.data.AUTOTUNE)\n",
+        "val_spectrogram_ds = val_spectrogram_ds.cache().prefetch(tf.data.AUTOTUNE)\n",
+        "test_spectrogram_ds = test_spectrogram_ds.cache().prefetch(tf.data.AUTOTUNE)"
       ]
     },
     {
@@ -620,11 +582,13 @@
       },
       "source": [
         "For the model, you'll use a simple convolutional neural network (CNN), since you have transformed the audio files into spectrogram images.\n",
-        "The model also has the following additional preprocessing layers:\n",
-        "- A [`Resizing`](https://www.tensorflow.org/api_docs/python/tf/keras/layers/experimental/preprocessing/Resizing) layer to downsample the input to enable the model to train faster.\n",
-        "- A [`Normalization`](https://www.tensorflow.org/api_docs/python/tf/keras/layers/experimental/preprocessing/Normalization) layer to normalize each pixel in the image based on its mean and standard deviation.\n",
         "\n",
-        "For the `Normalization` layer, its `adapt` method would first need to be called on the training data in order to compute aggregate statistics (i.e. mean and standard deviation)."
+        "Your `tf.keras.Sequential` model will use the following Keras preprocessing layers:\n",
+        "\n",
+        "- `tf.keras.layers.Resizing`: to downsample the input to enable the model to train faster.\n",
+        "- `tf.keras.layers.Normalization`: to normalize each pixel in the image based on its mean and standard deviation.\n",
+        "\n",
+        "For the `Normalization` layer, its `adapt` method would first need to be called on the training data in order to compute aggregate statistics (that is, the mean and the standard deviation)."
       ]
     },
     {
@@ -635,17 +599,21 @@
       },
       "outputs": [],
       "source": [
-        "for spectrogram, _ in spectrogram_ds.take(1):\n",
-        "  input_shape = spectrogram.shape\n",
+        "input_shape = example_spectrograms.shape[1:]\n",
         "print('Input shape:', input_shape)\n",
-        "num_labels = len(commands)\n",
+        "num_labels = len(label_names)\n",
         "\n",
-        "norm_layer = preprocessing.Normalization()\n",
-        "norm_layer.adapt(spectrogram_ds.map(lambda x, _: x))\n",
+        "# Instantiate the `tf.keras.layers.Normalization` layer.\n",
+        "norm_layer = layers.Normalization()\n",
+        "# Fit the state of the layer to the spectrograms\n",
+        "# with `Normalization.adapt`.\n",
+        "norm_layer.adapt(data=train_spectrogram_ds.map(map_func=lambda spec, label: spec))\n",
         "\n",
         "model = models.Sequential([\n",
         "    layers.Input(shape=input_shape),\n",
-        "    preprocessing.Resizing(32, 32), \n",
+        "    # Downsample the input.\n",
+        "    layers.Resizing(32, 32),\n",
+        "    # Normalize.\n",
         "    norm_layer,\n",
         "    layers.Conv2D(32, 3, activation='relu'),\n",
         "    layers.Conv2D(64, 3, activation='relu'),\n",
@@ -660,6 +628,15 @@
         "model.summary()"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "de52e5afa2f3"
+      },
+      "source": [
+        "Configure the Keras model with the Adam optimizer and the cross-entropy loss:"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -675,6 +652,15 @@
         ")"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "f42b9e3a4705"
+      },
+      "source": [
+        "Train the model over 10 epochs for demonstration purposes:"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -685,8 +671,8 @@
       "source": [
         "EPOCHS = 10\n",
         "history = model.fit(\n",
-        "    train_ds, \n",
-        "    validation_data=val_ds,  \n",
+        "    train_spectrogram_ds,\n",
+        "    validation_data=val_spectrogram_ds,\n",
         "    epochs=EPOCHS,\n",
         "    callbacks=tf.keras.callbacks.EarlyStopping(verbose=1, patience=2),\n",
         ")"
@@ -698,7 +684,7 @@
         "id": "gjpCDeQ4mUfS"
       },
       "source": [
-        "Let's check the training and validation loss curves to see how your model has improved during training."
+        "Let's plot the training and validation loss curves to check how your model has improved during training:"
       ]
     },
     {
@@ -710,9 +696,20 @@
       "outputs": [],
       "source": [
         "metrics = history.history\n",
+        "plt.figure(figsize=(16,6))\n",
+        "plt.subplot(1,2,1)\n",
         "plt.plot(history.epoch, metrics['loss'], metrics['val_loss'])\n",
         "plt.legend(['loss', 'val_loss'])\n",
-        "plt.show()"
+        "plt.ylim([0, max(plt.ylim())])\n",
+        "plt.xlabel('Epoch')\n",
+        "plt.ylabel('Loss [CrossEntropy]')\n",
+        "\n",
+        "plt.subplot(1,2,2)\n",
+        "plt.plot(history.epoch, 100*np.array(metrics['accuracy']), 100*np.array(metrics['val_accuracy']))\n",
+        "plt.legend(['accuracy', 'val_accuracy'])\n",
+        "plt.ylim([0, 100])\n",
+        "plt.xlabel('Epoch')\n",
+        "plt.ylabel('Accuracy [%]')"
       ]
     },
     {
@@ -721,54 +718,64 @@
         "id": "5ZTt3kO3mfm4"
       },
       "source": [
-        "## Evaluate test set performance\n",
+        "## Evaluate the model performance\n",
         "\n",
-        "Let's run the model on the test set and check performance."
+        "Run the model on the test set and check the model's performance:"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "biU2MwzyAo8o"
+        "id": "FapuRT_SsWGQ"
       },
       "outputs": [],
       "source": [
-        "test_audio = []\n",
-        "test_labels = []\n",
-        "\n",
-        "for audio, label in test_ds:\n",
-        "  test_audio.append(audio.numpy())\n",
-        "  test_labels.append(label.numpy())\n",
+        "model.evaluate(test_spectrogram_ds, return_dict=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "en9Znt1NOabH"
+      },
+      "source": [
+        "### Display a confusion matrix\n",
         "\n",
-        "test_audio = np.array(test_audio)\n",
-        "test_labels = np.array(test_labels)"
+        "Use a [confusion matrix](https://developers.google.com/machine-learning/glossary#confusion-matrix) to check how well the model did classifying each of the commands in the test set:\n"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "ktUanr9mRZky"
+        "id": "5Y6vmWWQuuT1"
       },
       "outputs": [],
       "source": [
-        "y_pred = np.argmax(model.predict(test_audio), axis=1)\n",
-        "y_true = test_labels\n",
-        "\n",
-        "test_acc = sum(y_pred == y_true) / len(y_true)\n",
-        "print(f'Test set accuracy: {test_acc:.0%}')"
+        "y_pred = model.predict(test_spectrogram_ds)"
       ]
     },
     {
-      "cell_type": "markdown",
+      "cell_type": "code",
+      "execution_count": null,
       "metadata": {
-        "id": "en9Znt1NOabH"
+        "id": "d6F0il82u7lW"
       },
+      "outputs": [],
       "source": [
-        "### Display a confusion matrix\n",
-        "\n",
-        "A confusion matrix is helpful to see how well the model did on each of the commands in the test set."
+        "y_pred = tf.argmax(y_pred, axis=1)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "vHSNoBYLvX81"
+      },
+      "outputs": [],
+      "source": [
+        "y_true = tf.concat(list(test_spectrogram_ds.map(lambda s,lab: lab)), axis=0)"
       ]
     },
     {
@@ -779,9 +786,11 @@
       },
       "outputs": [],
       "source": [
-        "confusion_mtx = tf.math.confusion_matrix(y_true, y_pred) \n",
+        "confusion_mtx = tf.math.confusion_matrix(y_true, y_pred)\n",
         "plt.figure(figsize=(10, 8))\n",
-        "sns.heatmap(confusion_mtx, xticklabels=commands, yticklabels=commands, \n",
+        "sns.heatmap(confusion_mtx,\n",
+        "            xticklabels=label_names,\n",
+        "            yticklabels=label_names,\n",
         "            annot=True, fmt='g')\n",
         "plt.xlabel('Prediction')\n",
         "plt.ylabel('Label')\n",
@@ -796,7 +805,7 @@
       "source": [
         "## Run inference on an audio file\n",
         "\n",
-        "Finally, verify the model's prediction output using an input audio file of someone saying \"no.\" How well does your model perform?"
+        "Finally, verify the model's prediction output using an input audio file of someone saying \"no\". How well does your model perform?"
       ]
     },
     {
@@ -807,15 +816,21 @@
       },
       "outputs": [],
       "source": [
-        "sample_file = data_dir/'no/01bb6a2a_nohash_0.wav'\n",
+        "x = data_dir/'no/01bb6a2a_nohash_0.wav'\n",
+        "x = tf.io.read_file(str(x))\n",
+        "x, sample_rate = tf.audio.decode_wav(x, desired_channels=1, desired_samples=16000,)\n",
+        "x = tf.squeeze(x, axis=-1)\n",
+        "waveform = x\n",
+        "x = get_spectrogram(x)\n",
+        "x = x[tf.newaxis,...]\n",
         "\n",
-        "sample_ds = preprocess_dataset([str(sample_file)])\n",
+        "prediction = model(x)\n",
+        "x_labels = ['no', 'yes', 'down', 'go', 'left', 'up', 'right', 'stop']\n",
+        "plt.bar(x_labels, tf.nn.softmax(prediction[0]))\n",
+        "plt.title('No')\n",
+        "plt.show()\n",
         "\n",
-        "for spectrogram, label in sample_ds.batch(1):\n",
-        "  prediction = model(spectrogram)\n",
-        "  plt.bar(commands, tf.nn.softmax(prediction[0]))\n",
-        "  plt.title(f'Predictions for \"{commands[label[0]]}\"')\n",
-        "  plt.show()"
+        "display.display(display.Audio(waveform, rate=16000))"
       ]
     },
     {
@@ -824,7 +839,107 @@
         "id": "VgWICqdqQNaQ"
       },
       "source": [
-        "You can see that your model very clearly recognized the audio command as \"no.\""
+        "As the output suggests, your model should have recognized the audio command as \"no\"."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "h1icqlM3ISW0"
+      },
+      "source": [
+        "## Export the model with preprocessing"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "r7HX-MjgIbji"
+      },
+      "source": [
+        "The model's not very easy to use if you have to apply those preprocessing steps before passing data to the model for inference. So build an end-to-end version:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2lIeXdWjIbDE"
+      },
+      "outputs": [],
+      "source": [
+        "class ExportModel(tf.Module):\n",
+        "  def __init__(self, model):\n",
+        "    self.model = model\n",
+        "\n",
+        "    # Accept either a string-filename or a batch of waveforms.\n",
+        "    # You could add additional signatures for a single wave, or a ragged-batch. \n",
+        "    self.__call__.get_concrete_function(\n",
+        "        x=tf.TensorSpec(shape=(), dtype=tf.string))\n",
+        "    self.__call__.get_concrete_function(\n",
+        "       x=tf.TensorSpec(shape=[None, 16000], dtype=tf.float32))\n",
+        "\n",
+        "\n",
+        "  @tf.function\n",
+        "  def __call__(self, x):\n",
+        "    # If they pass a string, load the file and decode it. \n",
+        "    if x.dtype == tf.string:\n",
+        "      x = tf.io.read_file(x)\n",
+        "      x, _ = tf.audio.decode_wav(x, desired_channels=1, desired_samples=16000,)\n",
+        "      x = tf.squeeze(x, axis=-1)\n",
+        "      x = x[tf.newaxis, :]\n",
+        "    \n",
+        "    x = get_spectrogram(x)  \n",
+        "    result = self.model(x, training=False)\n",
+        "    \n",
+        "    class_ids = tf.argmax(result, axis=-1)\n",
+        "    class_names = tf.gather(label_names, class_ids)\n",
+        "    return {'predictions':result,\n",
+        "            'class_ids': class_ids,\n",
+        "            'class_names': class_names}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gtZBmUiB9HGY"
+      },
+      "source": [
+        "Test run the \"export\" model:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Z1_8TYaCIRue"
+      },
+      "outputs": [],
+      "source": [
+        "export = ExportModel(model)\n",
+        "export(tf.constant(str(data_dir/'no/01bb6a2a_nohash_0.wav')))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "1J6Iuz829Cxo"
+      },
+      "source": [
+        "Save and reload the model, the reloaded model gives identical output:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "wTAg4vsn3oEb"
+      },
+      "outputs": [],
+      "source": [
+        "tf.saved_model.save(export, \"saved\")\n",
+        "imported = tf.saved_model.load(\"saved\")\n",
+        "imported(waveform[tf.newaxis, :])"
       ]
     },
     {
@@ -835,18 +950,20 @@
       "source": [
         "## Next steps\n",
         "\n",
-        "This tutorial showed how you could do simple audio classification using a convolutional neural network with TensorFlow and Python.\n",
-        "\n",
-        "* To learn how to use transfer learning for audio classification, check out the [Sound classification with YAMNet](https://www.tensorflow.org/hub/tutorials/yamnet) tutorial.\n",
-        "\n",
-        "* To build your own interactive web app for audio classification, consider taking the [TensorFlow.js - Audio recognition using transfer learning codelab](https://codelabs.developers.google.com/codelabs/tensorflowjs-audio-codelab/index.html#0).\n",
+        "This tutorial demonstrated how to carry out simple audio classification/automatic speech recognition using a convolutional neural network with TensorFlow and Python. To learn more, consider the following resources:\n",
         "\n",
-        "* TensorFlow also has additional support for [audio data preparation and augmentation](https://www.tensorflow.org/io/tutorials/audio) to help with your own audio-based projects.\n"
+        "- The [Sound classification with YAMNet](https://www.tensorflow.org/hub/tutorials/yamnet) tutorial shows how to use transfer learning for audio classification.\n",
+        "- The notebooks from [Kaggle's TensorFlow speech recognition challenge](https://www.kaggle.com/c/tensorflow-speech-recognition-challenge/overview).\n",
+        "- The \n",
+        "[TensorFlow.js - Audio recognition using transfer learning codelab](https://codelabs.developers.google.com/codelabs/tensorflowjs-audio-codelab/index.html#0) teaches how to build your own interactive web app for audio classification.\n",
+        "- [A tutorial on deep learning for music information retrieval](https://arxiv.org/abs/1709.04396) (Choi et al., 2017) on arXiv.\n",
+        "- TensorFlow also has additional support for [audio data preparation and augmentation](https://www.tensorflow.org/io/tutorials/audio) to help with your own audio-based projects.\n",
+        "- Consider using the [librosa](https://librosa.org/) library for music and audio analysis."
       ]
     }
   ],
   "metadata": {
-    "accelerator": "GPU",
+    "accelerator": "CPU",
     "colab": {
       "collapsed_sections": [],
       "name": "simple_audio.ipynb",
diff --git a/site/en/tutorials/audio/transfer_learning_audio.ipynb b/site/en/tutorials/audio/transfer_learning_audio.ipynb
index 16c679aed61..160aeeb7103 100644
--- a/site/en/tutorials/audio/transfer_learning_audio.ipynb
+++ b/site/en/tutorials/audio/transfer_learning_audio.ipynb
@@ -99,7 +99,9 @@
       },
       "outputs": [],
       "source": [
-        "!pip install tensorflow_io"
+        "!pip install -q \"tensorflow==2.11.*\"\n",
+        "# tensorflow_io 0.28 is compatible with TensorFlow 2.11\n",
+        "!pip install -q \"tensorflow_io==0.28.*\""
       ]
     },
     {
@@ -235,7 +237,7 @@
         "_ = plt.plot(testing_wav_data)\n",
         "\n",
         "# Play the audio file.\n",
-        "display.Audio(testing_wav_data,rate=16000)"
+        "display.Audio(testing_wav_data, rate=16000)"
       ]
     },
     {
@@ -286,7 +288,7 @@
       "source": [
         "scores, embeddings, spectrogram = yamnet_model(testing_wav_data)\n",
         "class_scores = tf.reduce_mean(scores, axis=0)\n",
-        "top_class = tf.argmax(class_scores)\n",
+        "top_class = tf.math.argmax(class_scores)\n",
         "inferred_class = class_names[top_class]\n",
         "\n",
         "print(f'The main sound is: {inferred_class}')\n",
@@ -736,7 +738,7 @@
       "outputs": [],
       "source": [
         "reloaded_results = reloaded_model(testing_wav_data)\n",
-        "cat_or_dog = my_classes[tf.argmax(reloaded_results)]\n",
+        "cat_or_dog = my_classes[tf.math.argmax(reloaded_results)]\n",
         "print(f'The main sound is: {cat_or_dog}')"
       ]
     },
@@ -758,7 +760,7 @@
       "outputs": [],
       "source": [
         "serving_results = reloaded_model.signatures['serving_default'](testing_wav_data)\n",
-        "cat_or_dog = my_classes[tf.argmax(serving_results['classifier'])]\n",
+        "cat_or_dog = my_classes[tf.math.argmax(serving_results['classifier'])]\n",
         "print(f'The main sound is: {cat_or_dog}')\n"
       ]
     },
@@ -805,13 +807,13 @@
         "# Run the model, check the output.\n",
         "scores, embeddings, spectrogram = yamnet_model(waveform)\n",
         "class_scores = tf.reduce_mean(scores, axis=0)\n",
-        "top_class = tf.argmax(class_scores)\n",
+        "top_class = tf.math.argmax(class_scores)\n",
         "inferred_class = class_names[top_class]\n",
         "top_score = class_scores[top_class]\n",
         "print(f'[YAMNet] The main sound is: {inferred_class} ({top_score})')\n",
         "\n",
         "reloaded_results = reloaded_model(waveform)\n",
-        "your_top_class = tf.argmax(reloaded_results)\n",
+        "your_top_class = tf.math.argmax(reloaded_results)\n",
         "your_inferred_class = my_classes[your_top_class]\n",
         "class_probabilities = tf.nn.softmax(reloaded_results, axis=-1)\n",
         "your_top_score = class_probabilities[your_top_class]\n",
diff --git a/site/en/tutorials/customization/basics.ipynb b/site/en/tutorials/customization/basics.ipynb
index 314738300e3..2df0840ad5e 100644
--- a/site/en/tutorials/customization/basics.ipynb
+++ b/site/en/tutorials/customization/basics.ipynb
@@ -70,10 +70,10 @@
       "source": [
         "This is an introductory TensorFlow tutorial that shows how to:\n",
         "\n",
-        "* Import the required package\n",
-        "* Create and use tensors\n",
-        "* Use GPU acceleration\n",
-        "* Demonstrate `tf.data.Dataset`"
+        "* Import the required package.\n",
+        "* Create and use tensors.\n",
+        "* Use GPU acceleration.\n",
+        "* Build a data pipeline with `tf.data.Dataset`."
       ]
     },
     {
@@ -84,7 +84,7 @@
       "source": [
         "## Import TensorFlow\n",
         "\n",
-        "To get started, import the `tensorflow` module. As of TensorFlow 2, eager execution is turned on by default. This enables a more interactive frontend to TensorFlow, the details of which we will discuss much later."
+        "To get started, import the `tensorflow` module. As of TensorFlow 2, eager execution is turned on by default. Eager execution enables a more interactive frontend to TensorFlow, which you will later explore in more detail."
       ]
     },
     {
@@ -106,7 +106,7 @@
       "source": [
         "## Tensors\n",
         "\n",
-        "A Tensor is a multi-dimensional array. Similar to NumPy `ndarray` objects, `tf.Tensor` objects have a data type and a shape. Additionally, `tf.Tensor`s can reside in accelerator memory (like a GPU). TensorFlow offers a rich library of operations ([tf.add](https://www.tensorflow.org/api_docs/python/tf/add), [tf.matmul](https://www.tensorflow.org/api_docs/python/tf/matmul), [tf.linalg.inv](https://www.tensorflow.org/api_docs/python/tf/linalg/inv) etc.) that consume and produce `tf.Tensor`s. These operations automatically convert native Python types, for example:\n"
+        "A Tensor is a multi-dimensional array. Similar to NumPy `ndarray` objects, `tf.Tensor` objects have a data type and a shape. Additionally, `tf.Tensor`s can reside in accelerator memory (like a GPU). TensorFlow offers a rich library of operations (for example, `tf.math.add`, `tf.linalg.matmul`, and `tf.linalg.inv`) that consume and produce `tf.Tensor`s. These operations automatically convert built-in Python types. For example:\n"
       ]
     },
     {
@@ -118,13 +118,13 @@
       },
       "outputs": [],
       "source": [
-        "print(tf.add(1, 2))\n",
-        "print(tf.add([1, 2], [3, 4]))\n",
-        "print(tf.square(5))\n",
-        "print(tf.reduce_sum([1, 2, 3]))\n",
+        "print(tf.math.add(1, 2))\n",
+        "print(tf.math.add([1, 2], [3, 4]))\n",
+        "print(tf.math.square(5))\n",
+        "print(tf.math.reduce_sum([1, 2, 3]))\n",
         "\n",
         "# Operator overloading is also supported\n",
-        "print(tf.square(2) + tf.square(3))"
+        "print(tf.math.square(2) + tf.math.square(3))"
       ]
     },
     {
@@ -144,7 +144,7 @@
       },
       "outputs": [],
       "source": [
-        "x = tf.matmul([[1]], [[2, 3]])\n",
+        "x = tf.linalg.matmul([[1]], [[2, 3]])\n",
         "print(x)\n",
         "print(x.shape)\n",
         "print(x.dtype)"
@@ -168,9 +168,9 @@
         "id": "Dwi1tdW3JBw6"
       },
       "source": [
-        "### NumPy Compatibility\n",
+        "### NumPy compatibility\n",
         "\n",
-        "Converting between a TensorFlow `tf.Tensor`s and a NumPy `ndarray` is easy:\n",
+        "Converting between a TensorFlow `tf.Tensor` and a NumPy `ndarray` is easy:\n",
         "\n",
         "* TensorFlow operations automatically convert NumPy ndarrays to Tensors.\n",
         "* NumPy operations automatically convert Tensors to NumPy ndarrays.\n",
@@ -191,11 +191,11 @@
         "ndarray = np.ones([3, 3])\n",
         "\n",
         "print(\"TensorFlow operations convert numpy arrays to Tensors automatically\")\n",
-        "tensor = tf.multiply(ndarray, 42)\n",
+        "tensor = tf.math.multiply(ndarray, 42)\n",
         "print(tensor)\n",
         "\n",
         "\n",
-        "print(\"And NumPy operations convert Tensors to numpy arrays automatically\")\n",
+        "print(\"And NumPy operations convert Tensors to NumPy arrays automatically\")\n",
         "print(np.add(tensor, 1))\n",
         "\n",
         "print(\"The .numpy() method explicitly converts a Tensor to a numpy array\")\n",
@@ -210,7 +210,7 @@
       "source": [
         "## GPU acceleration\n",
         "\n",
-        "Many TensorFlow operations are accelerated using the GPU for computation. Without any annotations, TensorFlow automatically decides whether to use the GPU or CPU for an operation—copying the tensor between CPU and GPU memory, if necessary. Tensors produced by an operation are typically backed by the memory of the device on which the operation executed, for example:"
+        "Many TensorFlow operations are accelerated using the GPU for computation. Without any annotations, TensorFlow automatically decides whether to use the GPU or CPU for an operation—copying the tensor between CPU and GPU memory, if necessary. Tensors produced by an operation are typically backed by the memory of the device on which the operation executed. For example:"
       ]
     },
     {
@@ -237,7 +237,7 @@
         "id": "vpgYzgVXW2Ud"
       },
       "source": [
-        "### Device Names\n",
+        "### Device names\n",
         "\n",
         "The `Tensor.device` property provides a fully qualified string name of the device hosting the contents of the tensor. This name encodes many details, such as an identifier of the network address of the host on which this program is executing and the device within that host. This is required for distributed execution of a TensorFlow program. The string ends with `GPU:<N>` if the tensor is placed on the `N`-th GPU on the host."
       ]
@@ -248,9 +248,11 @@
         "id": "ZWZQCimzuqyP"
       },
       "source": [
-        "### Explicit Device Placement\n",
+        "### Explicit device placement\n",
         "\n",
-        "In TensorFlow, *placement* refers to how individual operations are assigned (placed on) a device for execution. As mentioned, when there is no explicit guidance provided, TensorFlow automatically decides which device to execute an operation and copies tensors to that device, if needed. However, TensorFlow operations can be explicitly placed on specific devices using the `tf.device` context manager, for example:"
+        "In TensorFlow, *placement* refers to how individual operations are assigned (placed on) a device for execution. As mentioned, when there is no explicit guidance provided, TensorFlow automatically decides which device to execute an operation and copies tensors to that device, if needed.\n",
+        "\n",
+        "However, TensorFlow operations can be explicitly placed on specific devices using the `tf.device` context manager. For example:"
       ]
     },
     {
@@ -266,7 +268,7 @@
         "def time_matmul(x):\n",
         "  start = time.time()\n",
         "  for loop in range(10):\n",
-        "    tf.matmul(x, x)\n",
+        "    tf.linalg.matmul(x, x)\n",
         "\n",
         "  result = time.time()-start\n",
         "\n",
@@ -296,7 +298,7 @@
       "source": [
         "## Datasets\n",
         "\n",
-        "This section uses the [`tf.data.Dataset` API](https://www.tensorflow.org/guide/datasets) to build a pipeline for feeding data to your model. The `tf.data.Dataset` API is used to build performant, complex input pipelines from simple, re-usable pieces that will feed your model's training or evaluation loops."
+        "This section uses the `tf.data.Dataset` API to build a pipeline for feeding data to your model. `tf.data.Dataset` is used to build performant, complex input pipelines from simple, re-usable pieces that will feed your model's training or evaluation loops. (Refer to the [tf.data: Build TensorFlow input pipelines](../../guide/data.ipynb) guide to learn more.)"
       ]
     },
     {
@@ -307,7 +309,7 @@
       "source": [
         "### Create a source `Dataset`\n",
         "\n",
-        "Create a *source* dataset using one of the factory functions like [`Dataset.from_tensors`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensors), [`Dataset.from_tensor_slices`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensor_slices), or using objects that read from files like [`TextLineDataset`](https://www.tensorflow.org/api_docs/python/tf/data/TextLineDataset) or [`TFRecordDataset`](https://www.tensorflow.org/api_docs/python/tf/data/TFRecordDataset). See the [TensorFlow Dataset guide](https://www.tensorflow.org/guide/datasets#reading_input_data) for more information."
+        "Create a *source* dataset using one of the factory functions like `tf.data.Dataset.from_tensors`, `tf.data.Dataset.from_tensor_slices`, or using objects that read from files like `tf.data.TextLineDataset` or `tf.data.TFRecordDataset`. Refer to the _Reading input data_ section of the [tf.data: Build TensorFlow input pipelines](../../guide/data.ipynb) guide for more information."
       ]
     },
     {
@@ -341,7 +343,7 @@
       "source": [
         "### Apply transformations\n",
         "\n",
-        "Use the transformations functions like [`map`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#map), [`batch`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch), and [`shuffle`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#shuffle) to apply transformations to dataset records."
+        "Use the transformations functions like `tf.data.Dataset.map`, `tf.data.Dataset.batch`, and `tf.data.Dataset.shuffle` to apply transformations to dataset records."
       ]
     },
     {
@@ -352,7 +354,7 @@
       },
       "outputs": [],
       "source": [
-        "ds_tensors = ds_tensors.map(tf.square).shuffle(2).batch(2)\n",
+        "ds_tensors = ds_tensors.map(tf.math.square).shuffle(2).batch(2)\n",
         "\n",
         "ds_file = ds_file.batch(2)"
       ]
diff --git a/site/en/tutorials/customization/custom_layers.ipynb b/site/en/tutorials/customization/custom_layers.ipynb
index 97c0e8f8ba6..8bfe0a01b09 100644
--- a/site/en/tutorials/customization/custom_layers.ipynb
+++ b/site/en/tutorials/customization/custom_layers.ipynb
@@ -90,7 +90,7 @@
       },
       "outputs": [],
       "source": [
-        "print(tf.test.is_gpu_available())"
+        "print(tf.config.list_physical_devices('GPU'))"
       ]
     },
     {
@@ -103,7 +103,7 @@
         "\n",
         "Most of the time when writing code for machine learning models you want to operate at a higher level of abstraction than individual operations and manipulation of individual variables.\n",
         "\n",
-        "Many machine learning models are expressible as the composition and stacking of relatively simple layers, and TensorFlow provides both a set of many common layers as a well as easy ways for you to write your own application-specific layers either from scratch or as the composition of existing layers.\n",
+        "Many machine learning models are expressible as the composition and stacking of relatively simple layers, and TensorFlow provides both a set of many common layers as well as easy ways for you to write your own application-specific layers either from scratch or as the composition of existing layers.\n",
         "\n",
         "TensorFlow includes the full [Keras](https://keras.io) API in the tf.keras package, and the Keras layers are very useful when building your own models.\n"
       ]
@@ -256,7 +256,7 @@
         "\n",
         "Many interesting layer-like things in machine learning models are implemented by composing existing layers. For example, each residual block in a resnet is a composition of convolutions, batch normalizations, and a shortcut. Layers can be nested inside other layers.\n",
         "\n",
-        "Typically you inherit from `keras.Model` when you need the model methods like: `Model.fit`,`Model.evaluate`, and `Model.save` (see [Custom Keras layers and models](../../guide/keras/custom_layers_and_models.ipynb) for details).\n",
+        "Typically you inherit from `keras.Model` when you need the model methods like: `Model.fit`,`Model.evaluate`, and `Model.save` (see [Custom Keras layers and models](https://www.tensorflow.org/guide/keras/custom_layers_and_models) for details).\n",
         "\n",
         "One other feature provided by `keras.Model` (instead of `keras.layers.Layer`) is that in addition to tracking variables, a `keras.Model` also tracks its internal layers, making them easier to inspect.\n",
         "\n",
diff --git a/site/en/tutorials/customization/custom_training_walkthrough.ipynb b/site/en/tutorials/customization/custom_training_walkthrough.ipynb
index 45cc7e8c39d..9a05d864815 100644
--- a/site/en/tutorials/customization/custom_training_walkthrough.ipynb
+++ b/site/en/tutorials/customization/custom_training_walkthrough.ipynb
@@ -68,81 +68,20 @@
         "id": "LDrzLFXE8T1l"
       },
       "source": [
-        "This guide uses machine learning to *categorize* Iris flowers by species. It uses TensorFlow to:\n",
-        "1. Build a model,\n",
-        "2. Train this model on example data, and\n",
-        "3. Use the model to make predictions about unknown data.\n",
+        "This tutorial shows you how to train a machine learning model with a custom training loop to *categorize* penguins by species. In this notebook, you use TensorFlow to accomplish the following:\n",
         "\n",
-        "## TensorFlow programming\n",
-        "\n",
-        "This guide uses these high-level TensorFlow concepts:\n",
+        "1. Import a dataset\n",
+        "2. Build a simple linear model\n",
+        "3. Train the model\n",
+        "4. Evaluate the model's effectiveness\n",
+        "5. Use the trained model to make predictions\n",
         "\n",
-        "* Use TensorFlow's default [eager execution](../../guide/eager.ipynb) development environment,\n",
-        "* Import data with the [Datasets API](../../guide/datasets.ipynb),\n",
-        "* Build models and layers with TensorFlow's [Keras API](../../guide/keras/overview.ipynb).\n",
+        "## TensorFlow programming\n",
         "\n",
-        "This tutorial is structured like many TensorFlow programs:\n",
+        "This tutorial demonstrates the following TensorFlow programming tasks:\n",
         "\n",
-        "1. Import and parse the dataset.\n",
-        "2. Select the type of model.\n",
-        "3. Train the model.\n",
-        "4. Evaluate the model's effectiveness.\n",
-        "5. Use the trained model to make predictions."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "yNr7H-AIoLOR"
-      },
-      "source": [
-        "## Setup program"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "1J3AuPBT9gyR"
-      },
-      "source": [
-        "### Configure imports\n",
-        "\n",
-        "Import TensorFlow and the other required Python modules. By default, TensorFlow uses [eager execution](../../guide/eager.ipynb) to evaluate operations immediately, returning concrete values instead of creating a computational graph that is executed later. If you are used to a REPL or the `python` interactive console, this feels familiar."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "jElLULrDhQZR"
-      },
-      "outputs": [],
-      "source": [
-        "import os\n",
-        "import matplotlib.pyplot as plt"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "bfV2Dai0Ow2o"
-      },
-      "outputs": [],
-      "source": [
-        "import tensorflow as tf"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "g4Wzg69bnwK2"
-      },
-      "outputs": [],
-      "source": [
-        "print(\"TensorFlow version: {}\".format(tf.__version__))\n",
-        "print(\"Eager execution: {}\".format(tf.executing_eagerly()))"
+        "* Importing data with the [TensorFlow Datasets API](https://www.tensorflow.org/datasets/overview#load_a_dataset)\n",
+        "* Building models and layers with the [Keras API](https://www.tensorflow.org/guide/keras/)\n"
       ]
     },
     {
@@ -151,293 +90,255 @@
         "id": "Zx7wc0LuuxaJ"
       },
       "source": [
-        "## The Iris classification problem\n",
+        "## Penguin classification problem \n",
         "\n",
-        "Imagine you are a botanist seeking an automated way to categorize each Iris flower you find. Machine learning provides many algorithms to classify flowers statistically. For instance, a sophisticated machine learning program could classify flowers based on photographs. Our ambitions are more modest—we're going to classify Iris flowers based on the length and width measurements of their [sepals](https://en.wikipedia.org/wiki/Sepal) and [petals](https://en.wikipedia.org/wiki/Petal).\n",
+        "Imagine you are an ornithologist seeking an automated way to categorize each penguin you find. Machine learning provides many algorithms to classify penguins statistically. For instance, a sophisticated machine learning program could classify penguins based on photographs. The model you build in this tutorial is a little simpler. It classifies penguins based on their body weight, flipper length, and beaks, specifically the length and width measurements of their [culmen](https://en.wikipedia.org/wiki/Beak#Culmen).\n",
         "\n",
-        "The Iris genus entails about 300 species, but our program will only classify the following three:\n",
+        "There are 18 species of penguins, but in this tutorial you will only attempt to classify the following three:\n",
         "\n",
-        "* Iris setosa\n",
-        "* Iris virginica\n",
-        "* Iris versicolor\n",
+        "* Chinstrap penguins\n",
+        "* Gentoo penguins\n",
+        "* Adélie penguins\n",
         "\n",
         "<table>\n",
         "  <tr><td>\n",
-        "    <img src=\"https://www.tensorflow.org/images/iris_three_species.jpg\"\n",
-        "         alt=\"Petal geometry compared for three iris species: Iris setosa, Iris virginica, and Iris versicolor\">\n",
+        "    <img src=\"https://www.tensorflow.org/tutorials/customization/images/penguins_ds_species.png\" alt=\"Illustration of Chinstrap, Gentoo, and Adélie penguins\"\n",
+        "     class=\"no-filter\">\n",
         "  </td></tr>\n",
         "  <tr><td align=\"center\">\n",
-        "    <b>Figure 1.</b> <a href=\"https://commons.wikimedia.org/w/index.php?curid=170298\">Iris setosa</a> (by <a href=\"https://commons.wikimedia.org/wiki/User:Radomil\">Radomil</a>, CC BY-SA 3.0), <a href=\"https://commons.wikimedia.org/w/index.php?curid=248095\">Iris versicolor</a>, (by <a href=\"https://commons.wikimedia.org/wiki/User:Dlanglois\">Dlanglois</a>, CC BY-SA 3.0), and <a href=\"https://www.flickr.com/photos/33397993@N05/3352169862\">Iris virginica</a> (by <a href=\"https://www.flickr.com/photos/33397993@N05\">Frank Mayfield</a>, CC BY-SA 2.0).<br/>&nbsp;\n",
+        "    <b>Figure 1.</b> <a href=\"https://en.wikipedia.org/wiki/Chinstrap_penguin\">Chinstratp</a>, <a href=\"https://en.wikipedia.org/wiki/Gentoo_penguin\">Gentoo</a>, and <a href=\"https://en.wikipedia.org/wiki/Ad%C3%A9lie_penguin\">Adélie</a> penguins (Artwork by @allison_horst, CC BY-SA 2.0).<br/>&nbsp;\n",
         "  </td></tr>\n",
         "</table>\n",
         "\n",
-        "Fortunately, someone has already created a [dataset of 120 Iris flowers](https://en.wikipedia.org/wiki/Iris_flower_data_set) with the sepal and petal measurements. This is a classic dataset that is popular for beginner machine learning classification problems."
+        "Fortunately, a research team has already created and shared a [dataset of 334 penguins](https://allisonhorst.github.io/palmerpenguins/) with body weight, flipper length, beak measurements, and other data. This dataset is also conveniently available as the [penguins](https://www.tensorflow.org/datasets/catalog/penguins) TensorFlow Dataset.  "
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "3Px6KAg0Jowz"
+        "id": "1J3AuPBT9gyR"
       },
       "source": [
-        "## Import and parse the training dataset\n",
+        "## Setup\n",
         "\n",
-        "Download the dataset file and convert it into a structure that can be used by this Python program.\n",
-        "\n",
-        "### Download the dataset\n",
-        "\n",
-        "Download the training dataset file using the `tf.keras.utils.get_file` function. This returns the file path of the downloaded file:"
+        "Install the `tfds-nightly` package for the penguins dataset. The `tfds-nightly` package is the nightly released version of the TensorFlow Datasets (TFDS). For more information on TFDS, see [TensorFlow Datasets overview](https://www.tensorflow.org/datasets/overview)."
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "J6c7uEU9rjRM"
+        "id": "4XXWn1eDZmET"
       },
       "outputs": [],
       "source": [
-        "train_dataset_url = \"https://storage.googleapis.com/download.tensorflow.org/data/iris_training.csv\"\n",
-        "\n",
-        "train_dataset_fp = tf.keras.utils.get_file(fname=os.path.basename(train_dataset_url),\n",
-        "                                           origin=train_dataset_url)\n",
-        "\n",
-        "print(\"Local copy of the dataset file: {}\".format(train_dataset_fp))"
+        "!pip install -q tfds-nightly"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "qnX1-aLors4S"
+        "id": "DtGeMicKRGzU"
       },
       "source": [
-        "### Inspect the data\n",
+        "Then select **Runtime > Restart Runtime** from the Colab menu to restart the Colab runtime.\n",
         "\n",
-        "This dataset, `iris_training.csv`, is a plain text file that stores tabular data formatted as comma-separated values (CSV). Use the `head -n5` command to take a peek at the first five entries:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "FQvb_JYdrpPm"
-      },
-      "outputs": [],
-      "source": [
-        "!head -n5 {train_dataset_fp}"
+        "Do not proceed with the rest of this tutorial without first restarting the runtime."
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "kQhzD6P-uBoq"
+        "id": "G9onjGZWZbA-"
       },
       "source": [
-        "From this view of the dataset, notice the following:\n",
-        "\n",
-        "1. The first line is a header containing information about the dataset:\n",
-        "  * There are 120 total examples. Each example has four features and one of three possible label names.\n",
-        "2. Subsequent rows are data records, one *[example](https://developers.google.com/machine-learning/glossary/#example)* per line, where:\n",
-        "  * The first four fields are *[features](https://developers.google.com/machine-learning/glossary/#feature)*: these are the characteristics of an example. Here, the fields hold float numbers representing flower measurements.\n",
-        "  * The last column is the *[label](https://developers.google.com/machine-learning/glossary/#label)*: this is the value we want to predict. For this dataset, it's an integer value of 0, 1, or 2 that corresponds to a flower name.\n",
-        "\n",
-        "Let's write that out in code:"
+        "Import TensorFlow and the other required Python modules. "
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "9Edhevw7exl6"
+        "id": "jElLULrDhQZR"
       },
       "outputs": [],
       "source": [
-        "# column order in CSV file\n",
-        "column_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']\n",
-        "\n",
-        "feature_names = column_names[:-1]\n",
-        "label_name = column_names[-1]\n",
+        "import os\n",
+        "import tensorflow as tf\n",
+        "import tensorflow_datasets as tfds\n",
+        "import matplotlib.pyplot as plt\n",
         "\n",
-        "print(\"Features: {}\".format(feature_names))\n",
-        "print(\"Label: {}\".format(label_name))"
+        "print(\"TensorFlow version: {}\".format(tf.__version__))\n",
+        "print(\"TensorFlow Datasets version: \",tfds.__version__)"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "CCtwLoJhhDNc"
+        "id": "3Px6KAg0Jowz"
       },
       "source": [
-        "Each label is associated with string name (for example, \"setosa\"), but machine learning typically relies on numeric values. The label numbers are mapped to a named representation, such as:\n",
-        "\n",
-        "* `0`: Iris setosa\n",
-        "* `1`: Iris versicolor\n",
-        "* `2`: Iris virginica\n",
+        "## Import the dataset\n",
         "\n",
-        "For more information about features and labels, see the [ML Terminology section of the Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course/framing/ml-terminology)."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "sVNlJlUOhkoX"
-      },
-      "outputs": [],
-      "source": [
-        "class_names = ['Iris setosa', 'Iris versicolor', 'Iris virginica']"
+        "The default [penguins/processed](https://www.tensorflow.org/datasets/catalog/penguins) TensorFlow Dataset is already cleaned, normalized, and ready for building a model. Before you download the processed data, preview a simplified version to get familiar with the original penguin survey data.\n"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "dqPkQExM2Pwt"
+        "id": "qnX1-aLors4S"
       },
       "source": [
-        "### Create a `tf.data.Dataset`\n",
+        "### Preview the data\n",
         "\n",
-        "TensorFlow's [Dataset API](../../guide/data.ipynb) handles many common cases for loading data into a model. This is a high-level API for reading data and transforming it into a form used for training.\n",
-        "\n",
-        "\n",
-        "Since the dataset is a CSV-formatted text file, use the `tf.data.experimental.make_csv_dataset` function to parse the data into a suitable format. Since this function generates data for training models, the default behavior is to shuffle the data (`shuffle=True, shuffle_buffer_size=10000`), and repeat the dataset forever (`num_epochs=None`). We also set the [batch_size](https://developers.google.com/machine-learning/glossary/#batch_size) parameter:"
+        "Download the simplified version of the penguins dataset (`penguins/simple`) using the TensorFlow Datasets [`tfds.load`](https://www.tensorflow.org/datasets/api_docs/python/tfds/load) method. There are 344 data records in this dataset. Extract the first five records into a [`DataFrame`](https://www.tensorflow.org/datasets/api_docs/python/tfds/as_dataframe) object to inspect a sample of the values in this dataset:"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "WsxHnz1ebJ2S"
+        "id": "FQvb_JYdrpPm"
       },
       "outputs": [],
       "source": [
-        "batch_size = 32\n",
-        "\n",
-        "train_dataset = tf.data.experimental.make_csv_dataset(\n",
-        "    train_dataset_fp,\n",
-        "    batch_size,\n",
-        "    column_names=column_names,\n",
-        "    label_name=label_name,\n",
-        "    num_epochs=1)"
+        "ds_preview, info = tfds.load('penguins/simple', split='train', with_info=True)\n",
+        "df = tfds.as_dataframe(ds_preview.take(5), info)\n",
+        "print(df)\n",
+        "print(info.features)"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "gB_RSn62c-3G"
+        "id": "kQhzD6P-uBoq"
       },
       "source": [
-        "The `make_csv_dataset` function returns a `tf.data.Dataset` of `(features, label)` pairs, where `features` is a dictionary: `{'feature_name': value}`\n",
-        "\n",
-        "These `Dataset` objects are iterable. Let's look at a batch of features:"
+        "The numbered rows are data records, one _[example](https://developers.google.com/machine-learning/glossary/#example)_ per line, where:\n",
+        "  * The first six fields are _[features](https://developers.google.com/machine-learning/glossary/#feature)_: these are the characteristics of an example. Here, the fields hold numbers representing penguin measurements.\n",
+        "  * The last column is the _[label](https://developers.google.com/machine-learning/glossary/#label)_: this is the value you want to predict. For this dataset, it's an integer value of 0, 1, or 2 that corresponds to a penguin species name."
       ]
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
+      "cell_type": "markdown",
       "metadata": {
-        "id": "iDuG94H-C122"
+        "id": "CCtwLoJhhDNc"
       },
-      "outputs": [],
       "source": [
-        "features, labels = next(iter(train_dataset))\n",
+        "In the dataset, the label for the penguin species is represented as a number to make it easier to work with in the model you are building. These numbers correspond to the following penguin species:\n",
+        "\n",
+        "* `0`: Adélie penguin\n",
+        "* `1`: Chinstrap penguin\n",
+        "* `2`: Gentoo penguin\n",
         "\n",
-        "print(features)"
+        "Create a list containing the penguin species names in this order. You will use this list to interpret the output of the classification model:"
       ]
     },
     {
-      "cell_type": "markdown",
+      "cell_type": "code",
+      "execution_count": null,
       "metadata": {
-        "id": "E63mArnQaAGz"
+        "id": "sVNlJlUOhkoX"
       },
+      "outputs": [],
       "source": [
-        "Notice that like-features are grouped together, or *batched*. Each example row's fields are appended to the corresponding feature array. Change the `batch_size` to set the number of examples stored in these feature arrays.\n",
-        "\n",
-        "You can start to see some clusters by plotting a few features from the batch:"
+        "class_names = ['Adélie', 'Chinstrap', 'Gentoo']"
       ]
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
+      "cell_type": "markdown",
       "metadata": {
-        "id": "me5Wn-9FcyyO"
+        "id": "iav9kEgxpY0s"
       },
-      "outputs": [],
       "source": [
-        "plt.scatter(features['petal_length'],\n",
-        "            features['sepal_length'],\n",
-        "            c=labels,\n",
-        "            cmap='viridis')\n",
-        "\n",
-        "plt.xlabel(\"Petal length\")\n",
-        "plt.ylabel(\"Sepal length\")\n",
-        "plt.show()"
+        "For more information about features and labels, refer to the [ML Terminology section of the Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course/framing/ml-terminology)."
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "YlxpSyHlhT6M"
+        "id": "PD33PxSmCrtL"
       },
       "source": [
-        "To simplify the model building step, create a function to repackage the features dictionary into a single array with shape: `(batch_size, num_features)`.\n",
+        "### Download the preprocessed dataset\n",
         "\n",
-        "This function uses the `tf.stack` method which takes values from a list of tensors and creates a combined tensor at the specified dimension:"
+        "Now, download the preprocessed penguins dataset (`penguins/processed`) with the `tfds.load` method, which returns a list of `tf.data.Dataset` objects. Note that the `penguins/processed` dataset doesn't come with its own test set, so use an 80:20 split to [slice the full dataset](https://www.tensorflow.org/datasets/splits) into the training and test sets. You will use the test dataset later to verify your model."
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "jm932WINcaGU"
+        "id": "EVV96zIYYAi8"
       },
       "outputs": [],
       "source": [
-        "def pack_features_vector(features, labels):\n",
-        "  \"\"\"Pack the features into a single array.\"\"\"\n",
-        "  features = tf.stack(list(features.values()), axis=1)\n",
-        "  return features, labels"
+        "ds_split, info = tfds.load(\"penguins/processed\", split=['train[:20%]', 'train[20%:]'], as_supervised=True, with_info=True)\n",
+        "\n",
+        "ds_test = ds_split[0]\n",
+        "ds_train = ds_split[1]\n",
+        "assert isinstance(ds_test, tf.data.Dataset)\n",
+        "\n",
+        "print(info.features)\n",
+        "df_test = tfds.as_dataframe(ds_test.take(5), info)\n",
+        "print(\"Test dataset sample: \")\n",
+        "print(df_test)\n",
+        "\n",
+        "df_train = tfds.as_dataframe(ds_train.take(5), info)\n",
+        "print(\"Train dataset sample: \")\n",
+        "print(df_train)\n",
+        "\n",
+        "ds_train_batch = ds_train.batch(32)"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "V1Vuph_eDl8x"
+        "id": "xX2NfLyQOK1y"
       },
       "source": [
-        "Then use the `tf.data.Dataset#map` method to pack the `features` of each `(features,label)` pair into the training dataset:"
+        "Notice that this version of the dataset has been processed by reducing the data down to four normalized features and a species label. In this format, the data can be quickly used to train a model without further processing."
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "ZbDkzGZIkpXf"
+        "id": "iDuG94H-C122"
       },
       "outputs": [],
       "source": [
-        "train_dataset = train_dataset.map(pack_features_vector)"
+        "features, labels = next(iter(ds_train_batch))\n",
+        "\n",
+        "print(features)\n",
+        "print(labels)"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "NLy0Q1xCldVO"
+        "id": "E63mArnQaAGz"
       },
       "source": [
-        "The features element of the `Dataset` are now arrays with shape `(batch_size, num_features)`. Let's look at the first few examples:"
+        "You can visualize some clusters by plotting a few features from the batch:"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "kex9ibEek6Tr"
+        "id": "me5Wn-9FcyyO"
       },
       "outputs": [],
       "source": [
-        "features, labels = next(iter(train_dataset))\n",
+        "plt.scatter(features[:,0],\n",
+        "            features[:,2],\n",
+        "            c=labels,\n",
+        "            cmap='viridis')\n",
         "\n",
-        "print(features[:5])"
+        "plt.xlabel(\"Body Mass\")\n",
+        "plt.ylabel(\"Culmen Length\")\n",
+        "plt.show()"
       ]
     },
     {
@@ -446,29 +347,31 @@
         "id": "LsaVrtNM3Tx5"
       },
       "source": [
-        "## Select the type of model\n",
+        "## Build a simple linear model\n",
         "\n",
         "### Why model?\n",
         "\n",
-        "A *[model](https://developers.google.com/machine-learning/crash-course/glossary#model)* is a relationship between features and the label.  For the Iris classification problem, the model defines the relationship between the sepal and petal measurements and the predicted Iris species. Some simple models can be described with a few lines of algebra, but complex machine learning models have a large number of parameters that are difficult to summarize.\n",
+        "A *[model](https://developers.google.com/machine-learning/crash-course/glossary#model)* is a relationship between features and the label.  For the penguin classification problem, the model defines the relationship between the body mass, flipper and culmen measurements and the predicted penguin species. Some simple models can be described with a few lines of algebra, but complex machine learning models have a large number of parameters that are difficult to summarize.\n",
         "\n",
-        "Could you determine the relationship between the four features and the Iris species *without* using machine learning?  That is, could you use traditional programming techniques (for example, a lot of conditional statements) to create a model?  Perhaps—if you analyzed the dataset long enough to determine the relationships between petal and sepal measurements to a particular species. And this becomes difficult—maybe impossible—on more complicated datasets. A good machine learning approach *determines the model for you*. If you feed enough representative examples into the right machine learning model type, the program will figure out the relationships for you.\n",
+        "Could you determine the relationship between the four features and the penguin species *without* using machine learning?  That is, could you use traditional programming techniques (for example, a lot of conditional statements) to create a model?  Perhaps—if you analyzed the dataset long enough to determine the relationships between body mass and culmen measurements to a particular species. And this becomes difficult—maybe impossible—on more complicated datasets. A good machine learning approach *determines the model for you*. If you feed enough representative examples into the right machine learning model type, the program figures out the relationships for you.\n",
         "\n",
         "### Select the model\n",
         "\n",
-        "We need to select the kind of model to train. There are many types of models and picking a good one takes experience. This tutorial uses a neural network to solve the Iris classification problem. *[Neural networks](https://developers.google.com/machine-learning/glossary/#neural_network)* can find complex relationships between features and the label. It is a highly-structured graph, organized into one or more *[hidden layers](https://developers.google.com/machine-learning/glossary/#hidden_layer)*. Each hidden layer consists of one or more *[neurons](https://developers.google.com/machine-learning/glossary/#neuron)*. There are several categories of neural networks and this program uses a dense, or *[fully-connected neural network](https://developers.google.com/machine-learning/glossary/#fully_connected_layer)*: the neurons in one layer receive input connections from *every* neuron in the previous layer. For example, Figure 2 illustrates a dense neural network consisting of an input layer, two hidden layers, and an output layer:\n",
+        "Next you need to select the kind of model to train. There are many types of models and picking a good one takes experience. This tutorial uses a neural network to solve the penguin classification problem. [*Neural networks*](https://developers.google.com/machine-learning/glossary/#neural_network) can find complex relationships between features and the label. It is a highly-structured graph, organized into one or more [*hidden layers*](https://developers.google.com/machine-learning/glossary/#hidden_layer). Each hidden layer consists of one or more [*neurons*](https://developers.google.com/machine-learning/glossary/#neuron). There are several categories of neural networks and this program uses a dense, or [*fully-connected neural network*](https://developers.google.com/machine-learning/glossary/#fully_connected_layer): the neurons in one layer receive input connections from *every* neuron in the previous layer. For example, Figure 2 illustrates a dense neural network consisting of an input layer, two hidden layers, and an output layer:\n",
+        "\n",
+        "\n",
         "\n",
         "<table>\n",
         "  <tr><td>\n",
-        "    <img src=\"https://www.tensorflow.org/images/custom_estimators/full_network.png\"\n",
-        "         alt=\"A diagram of the network architecture: Inputs, 2 hidden layers, and outputs\">\n",
+        "    <img src=\"https://www.tensorflow.org/tutorials/customization/images/full_network_penguin.png\" alt=\"A diagram of the network architecture: Inputs, 2 hidden layers, and outputs\"\n",
+        "     class=\"no-filter\">\n",
         "  </td></tr>\n",
         "  <tr><td align=\"center\">\n",
         "    <b>Figure 2.</b> A neural network with features, hidden layers, and predictions.<br/>&nbsp;\n",
         "  </td></tr>\n",
         "</table>\n",
         "\n",
-        "When the model from Figure 2 is trained and fed an unlabeled example, it yields three predictions: the likelihood that this flower is the given Iris species. This prediction is called *[inference](https://developers.google.com/machine-learning/crash-course/glossary#inference)*. For this example, the sum of the output predictions is 1.0. In Figure 2, this prediction breaks down as: `0.02` for *Iris setosa*, `0.95` for *Iris versicolor*, and `0.03` for *Iris virginica*. This means that the model predicts—with 95% probability—that an unlabeled example flower is an *Iris versicolor*."
+        "When you train the model from Figure 2 and feed it an unlabeled example, it yields three predictions: the likelihood that this penguin is the given penguin species. This prediction is called [*inference*](https://developers.google.com/machine-learning/crash-course/glossary#inference). For this example, the sum of the output predictions is 1.0. In Figure 2, this prediction breaks down as: `0.02` for *Adelie*, `0.95` for *Chinstrap*, and `0.03` for *Gentoo* species. This means that the model predicts—with 95% probability—that an unlabeled example penguin is a *Chinstrap* penguin."
       ]
     },
     {
@@ -481,7 +384,7 @@
         "\n",
         "The TensorFlow `tf.keras` API is the preferred way to create models and layers. This makes it easy to build models and experiment while Keras handles the complexity of connecting everything together.\n",
         "\n",
-        "The `tf.keras.Sequential` model is a linear stack of layers. Its constructor takes a list of layer instances, in this case, two `tf.keras.layers.Dense` layers with 10 nodes each, and an output layer with 3 nodes representing our label predictions. The first layer's `input_shape` parameter corresponds to the number of features from the dataset, and is required:"
+        "The `tf.keras.Sequential` model is a linear stack of layers. Its constructor takes a list of layer instances, in this case, two `tf.keras.layers.Dense` layers with 10 nodes each, and an output layer with 3 nodes representing your label predictions. The first layer's `input_shape` parameter corresponds to the number of features from the dataset, and is required:"
       ]
     },
     {
@@ -505,7 +408,7 @@
         "id": "FHcbEzMpxbHL"
       },
       "source": [
-        "The *[activation function](https://developers.google.com/machine-learning/crash-course/glossary#activation_function)* determines the output shape of each node in the layer. These non-linearities are important—without them the model would be equivalent to a single layer. There are many `tf.keras.activations`, but [ReLU](https://developers.google.com/machine-learning/crash-course/glossary#ReLU) is common for hidden layers.\n",
+        "The [*activation function*](https://developers.google.com/machine-learning/crash-course/glossary#activation_function) determines the output shape of each node in the layer. These non-linearities are important—without them the model would be equivalent to a single layer. There are many `tf.keras.activations`, but [ReLU](https://developers.google.com/machine-learning/crash-course/glossary#ReLU) is common for hidden layers.\n",
         "\n",
         "The ideal number of hidden layers and neurons depends on the problem and the dataset. Like many aspects of machine learning, picking the best shape of the neural network requires a mixture of knowledge and experimentation. As a rule of thumb, increasing the number of hidden layers and neurons typically creates a more powerful model, which requires more data to train effectively."
       ]
@@ -516,7 +419,7 @@
         "id": "2wFKnhWCpDSS"
       },
       "source": [
-        "### Using the model\n",
+        "### Use the model\n",
         "\n",
         "Let's have a quick look at what this model does to a batch of features:"
       ]
@@ -561,7 +464,7 @@
         "id": "uRZmchElo481"
       },
       "source": [
-        "Taking the `tf.argmax` across classes gives us the predicted class index. But, the model hasn't been trained yet, so these aren't good predictions:"
+        "Taking the `tf.math.argmax` across classes gives us the predicted class index. But, the model hasn't been trained yet, so these aren't good predictions:"
       ]
     },
     {
@@ -572,7 +475,7 @@
       },
       "outputs": [],
       "source": [
-        "print(\"Prediction: {}\".format(tf.argmax(predictions, axis=1)))\n",
+        "print(\"Prediction: {}\".format(tf.math.argmax(predictions, axis=1)))\n",
         "print(\"    Labels: {}\".format(labels))"
       ]
     },
@@ -584,9 +487,9 @@
       "source": [
         "## Train the model\n",
         "\n",
-        "*[Training](https://developers.google.com/machine-learning/crash-course/glossary#training)* is the stage of machine learning when the model is gradually optimized, or the model *learns* the dataset. The goal is to learn enough about the structure of the training dataset to make predictions about unseen data. If you learn *too much* about the training dataset, then the predictions only work for the data it has seen and will not be generalizable. This problem is called *[overfitting](https://developers.google.com/machine-learning/crash-course/glossary#overfitting)*—it's like memorizing the answers instead of understanding how to solve a problem.\n",
+        "[*Training*](https://developers.google.com/machine-learning/crash-course/glossary#training) is the stage of machine learning when the model is gradually optimized, or the model *learns* the dataset. The goal is to learn enough about the structure of the training dataset to make predictions about unseen data. If you learn *too much* about the training dataset, then the predictions only work for the data it has seen and will not be generalizable. This problem is called [*overfitting*](https://developers.google.com/machine-learning/crash-course/glossary#overfitting)—it's like memorizing the answers instead of understanding how to solve a problem.\n",
         "\n",
-        "The Iris classification problem is an example of *[supervised machine learning](https://developers.google.com/machine-learning/glossary/#supervised_machine_learning)*: the model is trained from examples that contain labels. In *[unsupervised machine learning](https://developers.google.com/machine-learning/glossary/#unsupervised_machine_learning)*, the examples don't contain labels. Instead, the model typically finds patterns among the features."
+        "The penguin classification problem is an example of [*supervised machine learning*](https://developers.google.com/machine-learning/glossary/#supervised_machine_learning): the model is trained from examples that contain labels. In [*unsupervised machine learning*](https://developers.google.com/machine-learning/glossary/#unsupervised_machine_learning), the examples don't contain labels. Instead, the model typically finds patterns among the features."
       ]
     },
     {
@@ -595,11 +498,11 @@
         "id": "RaKp8aEjKX6B"
       },
       "source": [
-        "### Define the loss and gradient function\n",
+        "### Define the loss and gradients function\n",
         "\n",
-        "Both training and evaluation stages need to calculate the model's *[loss](https://developers.google.com/machine-learning/crash-course/glossary#loss)*. This measures how off a model's predictions are from the desired label, in other words, how bad the model is performing. We want to minimize, or optimize, this value.\n",
+        "Both training and evaluation stages need to calculate the model's [*loss*](https://developers.google.com/machine-learning/crash-course/glossary#loss). This measures how off a model's predictions are from the desired label, in other words, how bad the model is performing. You want to minimize, or optimize, this value.\n",
         "\n",
-        "Our model will calculate its loss using the `tf.keras.losses.SparseCategoricalCrossentropy` function which takes the model's class probability predictions and the desired label, and returns the average loss across the examples."
+        "Your model will calculate its loss using the `tf.keras.losses.SparseCategoricalCrossentropy` function which takes the model's class probability predictions and the desired label, and returns the average loss across the examples."
       ]
     },
     {
@@ -628,7 +531,6 @@
         "\n",
         "  return loss_object(y_true=y, y_pred=y_)\n",
         "\n",
-        "\n",
         "l = loss(model, features, labels, training=False)\n",
         "print(\"Loss test: {}\".format(l))"
       ]
@@ -639,7 +541,7 @@
         "id": "3IcPqA24QM6B"
       },
       "source": [
-        "Use the `tf.GradientTape` context to calculate the *[gradients](https://developers.google.com/machine-learning/crash-course/glossary#gradient)* used to optimize your model:"
+        "Use the `tf.GradientTape` context to calculate the [*gradients*](https://developers.google.com/machine-learning/crash-course/glossary#gradient) used to optimize your model:"
       ]
     },
     {
@@ -664,7 +566,7 @@
       "source": [
         "### Create an optimizer\n",
         "\n",
-        "An *[optimizer](https://developers.google.com/machine-learning/crash-course/glossary#optimizer)* applies the computed gradients to the model's variables to minimize the `loss` function. You can think of the loss function as a curved surface (see Figure 3) and we want to find its lowest point by walking around. The gradients point in the direction of steepest ascent—so we'll travel the opposite way and move down the hill. By iteratively calculating the loss and gradient for each batch, we'll adjust the model during training. Gradually, the model will find the best combination of weights and bias to minimize loss. And the lower the loss, the better the model's predictions.\n",
+        "An [*optimizer*](https://developers.google.com/machine-learning/crash-course/glossary#optimizer) applies the computed gradients to the model's parameters to minimize the `loss` function. You can think of the loss function as a curved surface (refer to Figure 3) and you want to find its lowest point by walking around. The gradients point in the direction of steepest ascent—so you'll travel the opposite way and move down the hill. By iteratively calculating the loss and gradient for each batch, you'll adjust the model during training. Gradually, the model will find the best combination of weights and bias to minimize the loss. And the lower the loss, the better the model's predictions.\n",
         "\n",
         "<table>\n",
         "  <tr><td>\n",
@@ -676,7 +578,7 @@
         "  </td></tr>\n",
         "</table>\n",
         "\n",
-        "TensorFlow has many optimization algorithms available for training. This model uses the `tf.keras.optimizers.SGD` that implements the *[stochastic gradient descent](https://developers.google.com/machine-learning/crash-course/glossary#gradient_descent)* (SGD) algorithm. The `learning_rate` sets the step size to take for each iteration down the hill. This is a *hyperparameter* that you'll commonly adjust to achieve better results."
+        "TensorFlow has many optimization algorithms available for training. In this tutorial, you will use the `tf.keras.optimizers.SGD` that implements the [*stochastic gradient descent*](https://developers.google.com/machine-learning/crash-course/glossary#gradient_descent) (SGD) algorithm. The `learning_rate` parameter sets the step size to take for each iteration down the hill. This rate is a [*hyperparameter*](https://developers.google.com/machine-learning/glossary/#hyperparameter) that you'll commonly adjust to achieve better results."
       ]
     },
     {
@@ -685,7 +587,7 @@
         "id": "XkUd6UiZa_dF"
       },
       "source": [
-        "Let's setup the optimizer:"
+        "Instantiate the optimizer with a [*learning rate*](https://developers.google.com/machine-learning/glossary#learning-rate) of `0.01`, a scalar value that is multiplied by the gradient at each iteration of the training:"
       ]
     },
     {
@@ -705,7 +607,7 @@
         "id": "pJVRZ0hP52ZB"
       },
       "source": [
-        "We'll use this to calculate a single optimization step:"
+        "Then use this object to calculate a single optimization step:"
       ]
     },
     {
@@ -740,11 +642,11 @@
         "1. Iterate each *epoch*. An epoch is one pass through the dataset.\n",
         "2. Within an epoch, iterate over each example in the training `Dataset` grabbing its *features* (`x`) and *label* (`y`).\n",
         "3. Using the example's features, make a prediction and compare it with the label. Measure the inaccuracy of the prediction and use that to calculate the model's loss and gradients.\n",
-        "4. Use an `optimizer` to update the model's variables.\n",
+        "4. Use an `optimizer` to update the model's parameters.\n",
         "5. Keep track of some stats for visualization.\n",
         "6. Repeat for each epoch.\n",
         "\n",
-        "The `num_epochs` variable is the number of times to loop over the dataset collection. Counter-intuitively, training a model longer does not guarantee a better model. `num_epochs` is a *[hyperparameter](https://developers.google.com/machine-learning/glossary/#hyperparameter)* that you can tune. Choosing the right number usually requires both experience and experimentation:"
+        "The `num_epochs` variable is the number of times to loop over the dataset collection. In the code below, `num_epochs` is set to 201 which means this training loop will run 201 times. Counter-intuitively, training a model longer does not guarantee a better model. `num_epochs` is a [*hyperparameter*](https://developers.google.com/machine-learning/glossary/#hyperparameter) that you can tune. Choosing the right number usually requires both experience and experimentation:"
       ]
     },
     {
@@ -755,7 +657,7 @@
       },
       "outputs": [],
       "source": [
-        "## Note: Rerunning this cell uses the same model variables\n",
+        "## Note: Rerunning this cell uses the same model parameters\n",
         "\n",
         "# Keep results for plotting\n",
         "train_loss_results = []\n",
@@ -768,7 +670,7 @@
         "  epoch_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()\n",
         "\n",
         "  # Training loop - using batches of 32\n",
-        "  for x, y in train_dataset:\n",
+        "  for x, y in ds_train_batch:\n",
         "    # Optimize the model\n",
         "    loss_value, grads = grad(model, x, y)\n",
         "    optimizer.apply_gradients(zip(grads, model.trainable_variables))\n",
@@ -790,6 +692,15 @@
         "                                                                epoch_accuracy.result()))"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Diep-ROEuKyl"
+      },
+      "source": [
+        "Alternatively, you could use the built-in Keras [`Model.fit(ds_train_batch)`](https://www.tensorflow.org/api_docs/python/tf/keras/Model#fit) method to train your model. "
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -805,9 +716,9 @@
         "id": "j3wdbmtLVTyr"
       },
       "source": [
-        "While it's helpful to print out the model's training progress, it's often *more* helpful to see this progress. [TensorBoard](https://www.tensorflow.org/tensorboard) is a nice visualization tool that is packaged with TensorFlow, but we can create basic charts using the `matplotlib` module.\n",
+        "While it's helpful to print out the model's training progress, you can visualize the progress with [TensorBoard](https://www.tensorflow.org/tensorboard) - a visualization and metrics tool that is packaged with TensorFlow. For this simple example, you will create basic charts using the `matplotlib` module.\n",
         "\n",
-        "Interpreting these charts takes some experience, but you really want to see the *loss* go down and the *accuracy* go up:"
+        "Interpreting these charts takes some experience, but in general you want to see the *loss* decrease and the *accuracy* increase:"
       ]
     },
     {
@@ -838,9 +749,9 @@
       "source": [
         "## Evaluate the model's effectiveness\n",
         "\n",
-        "Now that the model is trained, we can get some statistics on its performance.\n",
+        "Now that the model is trained, you can get some statistics on its performance.\n",
         "\n",
-        "*Evaluating* means determining how effectively the model makes predictions. To determine the model's effectiveness at Iris classification, pass some sepal and petal measurements to the model and ask the model to predict what Iris species they represent. Then compare the model's predictions against the actual label.  For example, a model that picked the correct species on half the input examples has an *[accuracy](https://developers.google.com/machine-learning/glossary/#accuracy)* of `0.5`. Figure 4 shows a slightly more effective model, getting 4 out of 5 predictions correct at 80% accuracy:\n",
+        "*Evaluating* means determining how effectively the model makes predictions. To determine the model's effectiveness at penguin classification, pass some measurements to the model and ask the model to predict what penguin species they represent. Then compare the model's predictions against the actual label.  For example, a model that picked the correct species on half the input examples has an [*accuracy*](https://developers.google.com/machine-learning/glossary/#accuracy) of `0.5`. Figure 4 shows a slightly more effective model, getting 4 out of 5 predictions correct at 80% accuracy:\n",
         "\n",
         "<table cellpadding=\"8\" border=\"0\">\n",
         "  <colgroup>\n",
@@ -869,7 +780,7 @@
         "    <td>5.5</td><td>2.5</td><td>4.0</td><td>1.3</td><td align=\"center\">1</td><td align=\"center\">1</td>\n",
         "  </tr>\n",
         "  <tr><td align=\"center\" colspan=\"6\">\n",
-        "    <b>Figure 4.</b> An Iris classifier that is 80% accurate.<br/>&nbsp;\n",
+        "    <b>Figure 4.</b> A penguin classifier that is 80% accurate.<br/>&nbsp;\n",
         "  </td></tr>\n",
         "</table>"
       ]
@@ -880,44 +791,11 @@
         "id": "z-EvK7hGL0d8"
       },
       "source": [
-        "### Setup the test dataset\n",
+        "### Set up the test set\n",
         "\n",
         "Evaluating the model is similar to training the model. The biggest difference is the examples come from a separate *[test set](https://developers.google.com/machine-learning/crash-course/glossary#test_set)* rather than the training set. To fairly assess a model's effectiveness, the examples used to evaluate a model must be different from the examples used to train the model.\n",
         "\n",
-        "The setup for the test `Dataset` is similar to the setup for training `Dataset`. Download the CSV text file and parse that values, then give it a little shuffle:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Ps3_9dJ3Lodk"
-      },
-      "outputs": [],
-      "source": [
-        "test_url = \"https://storage.googleapis.com/download.tensorflow.org/data/iris_test.csv\"\n",
-        "\n",
-        "test_fp = tf.keras.utils.get_file(fname=os.path.basename(test_url),\n",
-        "                                  origin=test_url)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "SRMWCu30bnxH"
-      },
-      "outputs": [],
-      "source": [
-        "test_dataset = tf.data.experimental.make_csv_dataset(\n",
-        "    test_fp,\n",
-        "    batch_size,\n",
-        "    column_names=column_names,\n",
-        "    label_name='species',\n",
-        "    num_epochs=1,\n",
-        "    shuffle=False)\n",
-        "\n",
-        "test_dataset = test_dataset.map(pack_features_vector)"
+        "The penguin dataset doesn't have a separate test dataset so in the previous Download the dataset section, you split the original dataset into test and train datasets. Use the `ds_test_batch` dataset for the evaluation."
       ]
     },
     {
@@ -928,7 +806,7 @@
       "source": [
         "### Evaluate the model on the test dataset\n",
         "\n",
-        "Unlike the training stage, the model only evaluates a single [epoch](https://developers.google.com/machine-learning/glossary/#epoch) of the test data. In the following code cell, we iterate over each example in the test set and compare the model's prediction against the actual label. This is used to measure the model's accuracy across the entire test set:"
+        "Unlike the training stage, the model only evaluates a single [epoch](https://developers.google.com/machine-learning/glossary/#epoch) of the test data. The following code iterates over each example in the test set and compare the model's prediction against the actual label. This comparison is used to measure the model's accuracy across the entire test set:"
       ]
     },
     {
@@ -940,24 +818,34 @@
       "outputs": [],
       "source": [
         "test_accuracy = tf.keras.metrics.Accuracy()\n",
+        "ds_test_batch = ds_test.batch(10)\n",
         "\n",
-        "for (x, y) in test_dataset:\n",
+        "for (x, y) in ds_test_batch:\n",
         "  # training=False is needed only if there are layers with different\n",
         "  # behavior during training versus inference (e.g. Dropout).\n",
         "  logits = model(x, training=False)\n",
-        "  prediction = tf.argmax(logits, axis=1, output_type=tf.int32)\n",
+        "  prediction = tf.math.argmax(logits, axis=1, output_type=tf.int64)\n",
         "  test_accuracy(prediction, y)\n",
         "\n",
         "print(\"Test set accuracy: {:.3%}\".format(test_accuracy.result()))"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Fel8ql2qzGlK"
+      },
+      "source": [
+        "You can also use the `model.evaluate(ds_test, return_dict=True)` keras function to get accuracy information on your test dataset. "
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "HcKEZMtCOeK-"
       },
       "source": [
-        "We can see on the last batch, for example, the model is usually correct:"
+        "By inspecting the last batch, for example, you can observe that the model predictions are usually correct.\n"
       ]
     },
     {
@@ -979,13 +867,13 @@
       "source": [
         "## Use the trained model to make predictions\n",
         "\n",
-        "We've trained a model and \"proven\" that it's good—but not perfect—at classifying Iris species. Now let's use the trained model to make some predictions on [unlabeled examples](https://developers.google.com/machine-learning/glossary/#unlabeled_example); that is, on examples that contain features but not a label.\n",
+        "You've trained a model and \"proven\" that it's good—but not perfect—at classifying penguin species. Now let's use the trained model to make some predictions on [*unlabeled examples*](https://developers.google.com/machine-learning/glossary/#unlabeled_example); that is, on examples that contain features but not labels.\n",
         "\n",
-        "In real-life, the unlabeled examples could come from lots of different sources including apps, CSV files, and data feeds. For now, we're going to manually provide three unlabeled examples to predict their labels. Recall, the label numbers are mapped to a named representation as:\n",
+        "In real-life, the unlabeled examples could come from lots of different sources including apps, CSV files, and data feeds. For this tutorial, manually provide three unlabeled examples to predict their labels. Recall, the label numbers are mapped to a named representation as:\n",
         "\n",
-        "* `0`: Iris setosa\n",
-        "* `1`: Iris versicolor\n",
-        "* `2`: Iris virginica"
+        "* `0`: Adélie penguin\n",
+        "* `1`: Chinstrap penguin\n",
+        "* `2`: Gentoo penguin"
       ]
     },
     {
@@ -997,9 +885,9 @@
       "outputs": [],
       "source": [
         "predict_dataset = tf.convert_to_tensor([\n",
-        "    [5.1, 3.3, 1.7, 0.5,],\n",
-        "    [5.9, 3.0, 4.2, 1.5,],\n",
-        "    [6.9, 3.1, 5.4, 2.1]\n",
+        "    [0.3, 0.8, 0.4, 0.5,],\n",
+        "    [0.4, 0.1, 0.8, 0.5,],\n",
+        "    [0.7, 0.9, 0.8, 0.4]\n",
         "])\n",
         "\n",
         "# training=False is needed only if there are layers with different\n",
@@ -1007,7 +895,7 @@
         "predictions = model(predict_dataset, training=False)\n",
         "\n",
         "for i, logits in enumerate(predictions):\n",
-        "  class_idx = tf.argmax(logits).numpy()\n",
+        "  class_idx = tf.math.argmax(logits).numpy()\n",
         "  p = tf.nn.softmax(logits)[class_idx]\n",
         "  name = class_names[class_idx]\n",
         "  print(\"Example {} prediction: {} ({:4.1f}%)\".format(i, name, 100*p))"
diff --git a/site/en/tutorials/customization/images/full_network_penguin.png b/site/en/tutorials/customization/images/full_network_penguin.png
new file mode 100644
index 00000000000..3fb940bd8bf
Binary files /dev/null and b/site/en/tutorials/customization/images/full_network_penguin.png differ
diff --git a/site/en/tutorials/customization/images/penguins_ds_species.png b/site/en/tutorials/customization/images/penguins_ds_species.png
new file mode 100644
index 00000000000..736ae89b686
Binary files /dev/null and b/site/en/tutorials/customization/images/penguins_ds_species.png differ
diff --git a/site/en/tutorials/distribute/custom_training.ipynb b/site/en/tutorials/distribute/custom_training.ipynb
index da45c340b1a..d14b0ac003c 100644
--- a/site/en/tutorials/distribute/custom_training.ipynb
+++ b/site/en/tutorials/distribute/custom_training.ipynb
@@ -68,9 +68,9 @@
         "id": "FbVhjPpzn6BM"
       },
       "source": [
-        "This tutorial demonstrates how to use [`tf.distribute.Strategy`](https://www.tensorflow.org/guide/distributed_training) with custom training loops. We will train a simple CNN model on the fashion MNIST dataset. The fashion MNIST dataset contains 60000 train images of size 28 x 28 and 10000 test images of size 28 x 28.\n",
+        "This tutorial demonstrates how to use `tf.distribute.Strategy`—a TensorFlow API that provides an abstraction for [distributing your training](../../guide/distributed_training.ipynb) across multiple processing units (GPUs, multiple machines, or TPUs)—with custom training loops. In this example, you will train a simple convolutional neural network on the [Fashion MNIST dataset](https://github.com/zalandoresearch/fashion-mnist) containing 70,000 images of size 28 x 28.\n",
         "\n",
-        "We are using custom training loops to train our model because they give us flexibility and a greater control on training. Moreover, it is easier to debug the model and the training loop."
+        "[Custom training loops](../customization/custom_training_walkthrough.ipynb) provide flexibility and a greater control on training. They also make it easier to debug the model and the training loop."
       ]
     },
     {
@@ -97,7 +97,7 @@
         "id": "MM6W__qraV55"
       },
       "source": [
-        "## Download the fashion MNIST dataset"
+        "## Download the Fashion MNIST dataset"
       ]
     },
     {
@@ -112,14 +112,14 @@
         "\n",
         "(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()\n",
         "\n",
-        "# Adding a dimension to the array -> new shape == (28, 28, 1)\n",
-        "# We are doing this because the first layer in our model is a convolutional\n",
+        "# Add a dimension to the array -> new shape == (28, 28, 1)\n",
+        "# This is done because the first layer in our model is a convolutional\n",
         "# layer and it requires a 4D input (batch_size, height, width, channels).\n",
         "# batch_size dimension will be added later on.\n",
         "train_images = train_images[..., None]\n",
         "test_images = test_images[..., None]\n",
         "\n",
-        "# Getting the images in [0, 1] range.\n",
+        "# Scale the images to the [0, 1] range.\n",
         "train_images = train_images / np.float32(255)\n",
         "test_images = test_images / np.float32(255)"
       ]
@@ -141,13 +141,13 @@
       "source": [
         "How does `tf.distribute.MirroredStrategy` strategy work?\n",
         "\n",
-        "*   All the variables and the model graph is replicated on the replicas.\n",
+        "*   All the variables and the model graph are replicated across the replicas.\n",
         "*   Input is evenly distributed across the replicas.\n",
         "*   Each replica calculates the loss and gradients for the input it received.\n",
-        "*   The gradients are synced across all the replicas by summing them.\n",
+        "*   The gradients are synced across all the replicas by **summing** them.\n",
         "*   After the sync, the same update is made to the copies of the variables on each replica.\n",
         "\n",
-        "Note: You can put all the code below inside a single scope. We are dividing it into several code cells for illustration purposes.\n"
+        "Note: You can put all the code below inside a single scope. This example divides it into several code cells for illustration purposes.\n"
       ]
     },
     {
@@ -158,8 +158,8 @@
       },
       "outputs": [],
       "source": [
-        "# If the list of devices is not specified in the\n",
-        "# `tf.distribute.MirroredStrategy` constructor, it will be auto-detected.\n",
+        "# If the list of devices is not specified in\n",
+        "# `tf.distribute.MirroredStrategy` constructor, they will be auto-detected.\n",
         "strategy = tf.distribute.MirroredStrategy()"
       ]
     },
@@ -171,7 +171,7 @@
       },
       "outputs": [],
       "source": [
-        "print ('Number of devices: {}'.format(strategy.num_replicas_in_sync))"
+        "print('Number of devices: {}'.format(strategy.num_replicas_in_sync))"
       ]
     },
     {
@@ -183,15 +183,6 @@
         "## Setup input pipeline"
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "0Qb6nDgxiN_n"
-      },
-      "source": [
-        "Export the graph and the variables to the platform-agnostic SavedModel format. After your model is saved, you can load it with or without the scope."
-      ]
-    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -225,8 +216,8 @@
       },
       "outputs": [],
       "source": [
-        "train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels)).shuffle(BUFFER_SIZE).batch(GLOBAL_BATCH_SIZE) \n",
-        "test_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels)).batch(GLOBAL_BATCH_SIZE) \n",
+        "train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels)).shuffle(BUFFER_SIZE).batch(GLOBAL_BATCH_SIZE)\n",
+        "test_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels)).batch(GLOBAL_BATCH_SIZE)\n",
         "\n",
         "train_dist_dataset = strategy.experimental_distribute_dataset(train_dataset)\n",
         "test_dist_dataset = strategy.experimental_distribute_dataset(test_dataset)"
@@ -240,7 +231,7 @@
       "source": [
         "## Create the model\n",
         "\n",
-        "Create a model using `tf.keras.Sequential`. You can also use the Model Subclassing API to do this."
+        "Create a model using `tf.keras.Sequential`. You can also use the [Model Subclassing API](https://www.tensorflow.org/guide/keras/custom_layers_and_models) or the [functional API](https://www.tensorflow.org/guide/keras/functional) to do this."
       ]
     },
     {
@@ -252,14 +243,21 @@
       "outputs": [],
       "source": [
         "def create_model():\n",
+        "  regularizer = tf.keras.regularizers.L2(1e-5)\n",
         "  model = tf.keras.Sequential([\n",
-        "      tf.keras.layers.Conv2D(32, 3, activation='relu'),\n",
+        "      tf.keras.layers.Conv2D(32, 3,\n",
+        "                             activation='relu',\n",
+        "                             kernel_regularizer=regularizer),\n",
         "      tf.keras.layers.MaxPooling2D(),\n",
-        "      tf.keras.layers.Conv2D(64, 3, activation='relu'),\n",
+        "      tf.keras.layers.Conv2D(64, 3,\n",
+        "                             activation='relu',\n",
+        "                             kernel_regularizer=regularizer),\n",
         "      tf.keras.layers.MaxPooling2D(),\n",
         "      tf.keras.layers.Flatten(),\n",
-        "      tf.keras.layers.Dense(64, activation='relu'),\n",
-        "      tf.keras.layers.Dense(10)\n",
+        "      tf.keras.layers.Dense(64,\n",
+        "                            activation='relu',\n",
+        "                            kernel_regularizer=regularizer),\n",
+        "      tf.keras.layers.Dense(10, kernel_regularizer=regularizer)\n",
         "    ])\n",
         "\n",
         "  return model"
@@ -286,25 +284,29 @@
       "source": [
         "## Define the loss function\n",
         "\n",
-        "Normally, on a single machine with 1 GPU/CPU, loss is divided by the number of examples in the batch of input.\n",
+        "Recall that the loss function consists of one or two parts:\n",
         "\n",
-        "*So, how should the loss be calculated when using a `tf.distribute.Strategy`?*\n",
+        "  * The **prediction loss** measures how far off the model's predictions are from the training labels for a batch of training examples. It is computed for each labeled example and then reduced across the batch by computing the average value.\n",
+        "  * Optionally, **regularization loss** terms can be added to the prediction loss, to steer the model away from overfitting the training data. A common choice is L2 regularization, which adds a small fixed multiple of the sum of squares of all model weights, independent of the number of examples. The model above uses L2 regularization to demonstrate its handling in the training loop below.\n",
         "\n",
-        "* For an example, let's say you have 4 GPU's and a batch size of 64. One batch of input is distributed\n",
-        "across the replicas (4 GPUs), each replica getting an input of size 16.\n",
+        "For training on a single machine with a single GPU/CPU, this works as follows:\n",
         "\n",
-        "* The model on each replica does a forward pass with its respective input and calculates the loss. Now, instead of dividing the loss by the number of examples in its respective input (BATCH_SIZE_PER_REPLICA = 16), the loss should be divided by the GLOBAL_BATCH_SIZE (64)."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "OCIcsaeoIHJX"
-      },
-      "source": [
-        "*Why do this?*\n",
+        "  * The prediction loss is computed for each example in the batch, summed across the batch, and then divided by the batch size.\n",
+        "  * The regularization loss is added to the prediction loss.\n",
+        "  * The gradient of the total loss is computed w.r.t. each model weight, and the optimizer updates each model weight from the corresponding gradient.\n",
+        "\n",
+        "With `tf.distribute.Strategy`, the input batch is split between replicas.\n",
+        "For example, let's say you have 4 GPUs, each with one replica of the model. One batch of 256 input examples is distributed evenly across the 4 replicas, so each replica gets a batch of size 64: We have `256 = 4*64`, or generally `GLOBAL_BATCH_SIZE = num_replicas_in_sync * BATCH_SIZE_PER_REPLICA`.\n",
         "\n",
-        "* This needs to be done because after the gradients are calculated on each replica, they are synced across the replicas by **summing** them."
+        "Each replica computes the loss from the training examples it gets and computes the gradients of the loss w.r.t. each model weight. The optimizer takes care that these **gradients are summed up across replicas** before using them to update the copies of the model weights on each replica.\n",
+        "\n",
+        "*So, how should the loss be calculated when using a `tf.distribute.Strategy`?*\n",
+        "\n",
+        "  * Each replica computes the prediction loss for all examples distributed to it, sums up the results and divides them by `num_replicas_in_sync * BATCH_SIZE_PER_REPLICA`, or equivently, `GLOBAL_BATCH_SIZE`.\n",
+        "  * Each replica compues the regularization loss(es) and divides them by\n",
+        "  `num_replicas_in_sync`.\n",
+        "\n",
+        "Compared to non-distributed training, all per-replica loss terms are scaled down by a factor of `1/num_replicas_in_sync`. On the other hand, all loss terms -- or rather, their gradients -- are summed across that number of replicas before the optimizer applies them. In effect, the optimizer on each replica uses the same gradients as if a non-distributed computation with `GLOBAL_BATCH_SIZE` had happened. This is consistent with the distributed and undistributed behavior of Keras `Model.fit`. See the [Distributed training with Keras](./keras.ipynb) tutorial on how a larger gloabl batch size enables to scale up the learning rate."
       ]
     },
     {
@@ -315,31 +317,18 @@
       "source": [
         "*How to do this in TensorFlow?*\n",
         "\n",
-        "* If you're writing a custom training loop, as in this tutorial, you should sum the per example losses and divide the sum by the GLOBAL_BATCH_SIZE: \n",
-        "`scale_loss = tf.reduce_sum(loss) * (1. / GLOBAL_BATCH_SIZE)`\n",
-        "or you can use `tf.nn.compute_average_loss` which takes the per example loss,\n",
-        "optional sample weights, and GLOBAL_BATCH_SIZE as arguments and returns the scaled loss.\n",
-        "\n",
-        "* If you are using regularization losses in your model then you need to scale\n",
-        "the loss value by number of replicas. You can do this by using the `tf.nn.scale_regularization_loss` function.\n",
+        "  * Loss reduction and scaling is done automatically in Keras `Model.compile` and `Model.fit`\n",
         "\n",
-        "* Using `tf.reduce_mean` is not recommended. Doing so divides the loss by actual per replica batch size which may vary step to step.\n",
+        "  * If you're writing a custom training loop, as in this tutorial, you should sum the per-example losses and divide the sum by the global batch size using `tf.nn.compute_average_loss`, which takes the per-example losses and\n",
+        "optional sample weights as arguments and returns the scaled loss.\n",
         "\n",
-        "* This reduction and scaling is done automatically in keras `model.compile` and `model.fit`\n",
+        "  * If using `tf.keras.losses` classes (as in the example below), the loss reduction needs to be explicitly specified to be one of `NONE` or `SUM`. The default `AUTO` and `SUM_OVER_BATCH_SIZE` are disallowed outside `Model.fit`.\n",
+        "    * `AUTO` is disallowed because the user should explicitly think about what reduction they want to make sure it is correct in the distributed case.\n",
+        "    * `SUM_OVER_BATCH_SIZE` is disallowed because currently it would only divide by per replica batch size, and leave the dividing by number of replicas to the user, which might be easy to miss. So, instead, you need to do the reduction yourself explicitly.\n",
         "\n",
-        "* If using `tf.keras.losses` classes (as in the example below), the loss reduction needs to be explicitly specified to be one of `NONE` or `SUM`. `AUTO` and `SUM_OVER_BATCH_SIZE`  are disallowed when used with `tf.distribute.Strategy`. `AUTO` is disallowed because the user should explicitly think about what reduction they want to make sure it is correct in the distributed case. `SUM_OVER_BATCH_SIZE` is disallowed because currently it would only divide by per replica batch size, and leave the dividing by number of replicas to the user, which might be easy to miss. So instead we ask the user do the reduction themselves explicitly.\n",
-        "* If `labels` is multi-dimensional, then average the `per_example_loss` across the number of elements in each sample. For example, if the shape of `predictions` is `(batch_size, H, W, n_classes)` and `labels` is `(batch_size, H, W)`, you will need to update `per_example_loss` like: `per_example_loss /= tf.cast(tf.reduce_prod(tf.shape(labels)[1:]), tf.float32)`\n",
+        "  * If you're writing a custom training loop for a model with a non-empty list of `Model.losses` (e.g., weight regularizers), you should sum them up and divide the sum by the number of replicas. You can do this by using the `tf.nn.scale_regularization_loss` function. The model code itself remains unaware of the number of replicas.\n",
         "\n",
-        "  Caution: **Verify the shape of your loss**. \n",
-        "  Loss functions in `tf.losses`/`tf.keras.losses` typically\n",
-        "  return the average over the last dimension of the input. The loss\n",
-        "  classes wrap these functions. Passing `reduction=Reduction.NONE` when\n",
-        "  creating an instance of a loss class means \"no **additional** reduction\".\n",
-        "  For categorical losses with an example input shape of `[batch, W, H, n_classes]` the `n_classes`\n",
-        "  dimension is reduced. For pointwise losses like\n",
-        "  `losses.mean_squared_error` or `losses.binary_crossentropy` include a\n",
-        "  dummy axis so that `[batch, W, H, 1]` is reduced to `[batch, W, H]`. Without\n",
-        "  the dummy axis  `[batch, W, H]` will be incorrectly reduced to `[batch, W]`.\n"
+        "  However, models can define input-dependent regularization losses with Keras APIs such as `Layer.add_loss(...)` and `Layer(activity_regularizer=...)`. For `Layer.add_loss(...)`, it falls on the modeling code to perform the division of the summed per-example terms by the per-replica(!) batch size, e.g., by using `tf.math.reduce_mean()`."
       ]
     },
     {
@@ -351,14 +340,51 @@
       "outputs": [],
       "source": [
         "with strategy.scope():\n",
-        "  # Set reduction to `none` so we can do the reduction afterwards and divide by\n",
-        "  # global batch size.\n",
+        "  # Set reduction to `NONE` so you can do the reduction yourself.\n",
         "  loss_object = tf.keras.losses.SparseCategoricalCrossentropy(\n",
         "      from_logits=True,\n",
         "      reduction=tf.keras.losses.Reduction.NONE)\n",
-        "  def compute_loss(labels, predictions):\n",
+        "  def compute_loss(labels, predictions, model_losses):\n",
         "    per_example_loss = loss_object(labels, predictions)\n",
-        "    return tf.nn.compute_average_loss(per_example_loss, global_batch_size=GLOBAL_BATCH_SIZE)"
+        "    loss = tf.nn.compute_average_loss(per_example_loss)\n",
+        "    if model_losses:\n",
+        "      loss += tf.nn.scale_regularization_loss(tf.add_n(model_losses))\n",
+        "    return loss"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6pM96bqQY52D"
+      },
+      "source": [
+        "### Special cases\n",
+        "\n",
+        "Advanced users should also consider the following special cases.\n",
+        "\n",
+        "  * Input batches shorter than `GLOBAL_BATCH_SIZE` create unpleasant corner cases in several places. In practice, it often works best to avoid them by allowing batches to span epoch boundaries using `Dataset.repeat().batch()` and defining approximate epochs by step counts, not dataset ends. Alternatively, `Dataset.batch(drop_remainder=True)` maintains the notion of epoch but drops the last few examples.\n",
+        "\n",
+        "  For illustration, this example goes the harder route and allows short batches, so that each training epoch contains each training example exactly once.\n",
+        "  \n",
+        "  Which denominator should be used by `tf.nn.compute_average_loss()`?\n",
+        "\n",
+        "    * By default, in the example code above and equivalently in `Keras.fit()`, the sum of prediction losses is divided by `num_replicas_in_sync` times the actual batch size seen on the replica (with empty batches silently ignored). This preserves the balance between the prediction loss on the one hand and the regularization losses on the other hand. It is particularly appropriate for models that use input-dependent regularization losses. Plain L2 regularization just superimposes weight decay onto the gradients of the prediction loss and is less in need of such a balance.\n",
+        "    * In practice, many custom training loops pass as a constant Python value into `tf.nn.compute_average_loss(..., global_batch_size=GLOBAL_BATCH_SIZE)` to use it as the denominator. This preserves the relative weighting of training examples between batches. Without it, the smaller denominator in short batches effectively upweights the examples in those. (Before TensorFlow 2.13, this was also needed to avoid NaNs in case some replica received an actual batch size of zero.)\n",
+        "  \n",
+        "  Both options are equivalent if short batches are avoided, as suggested above.\n",
+        "\n",
+        "  * Multi-dimensional `labels` require you to average the `per_example_loss` across the number of predictions in each example. Consider a classification task for all pixels of an input image, with `predictions` of shape `(batch_size, H, W, n_classes)` and `labels` of shape `(batch_size, H, W)`. You will need to update `per_example_loss` like: `per_example_loss /= tf.cast(tf.reduce_prod(tf.shape(labels)[1:]), tf.float32)`\n",
+        "\n",
+        "  Caution: **Verify the shape of your loss**.\n",
+        "  Loss functions in `tf.losses`/`tf.keras.losses` typically\n",
+        "  return the average over the last dimension of the input. The loss\n",
+        "  classes wrap these functions. Passing `reduction=Reduction.NONE` when\n",
+        "  creating an instance of a loss class means \"no **additional** reduction\".\n",
+        "  For categorical losses with an example input shape of `[batch, W, H, n_classes]` the `n_classes`\n",
+        "  dimension is reduced. For pointwise losses like\n",
+        "  `losses.mean_squared_error` or `losses.binary_crossentropy` include a\n",
+        "  dummy axis so that `[batch, W, H, 1]` is reduced to `[batch, W, H]`. Without\n",
+        "  the dummy axis  `[batch, W, H]` will be incorrectly reduced to `[batch, W]`."
       ]
     },
     {
@@ -406,11 +432,11 @@
       },
       "outputs": [],
       "source": [
-        "# model, optimizer, and checkpoint must be created under `strategy.scope`.\n",
+        "# A model, an optimizer, and a checkpoint must be created under `strategy.scope`.\n",
         "with strategy.scope():\n",
         "  model = create_model()\n",
         "\n",
-        "  optimizer = tf.keras.optimizers.Adam()\n",
+        "  optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)\n",
         "\n",
         "  checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model)"
       ]
@@ -428,13 +454,13 @@
         "\n",
         "  with tf.GradientTape() as tape:\n",
         "    predictions = model(images, training=True)\n",
-        "    loss = compute_loss(labels, predictions)\n",
+        "    loss = compute_loss(labels, predictions, model.losses)\n",
         "\n",
         "  gradients = tape.gradient(loss, model.trainable_variables)\n",
         "  optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n",
         "\n",
         "  train_accuracy.update_state(labels, predictions)\n",
-        "  return loss \n",
+        "  return loss\n",
         "\n",
         "def test_step(inputs):\n",
         "  images, labels = inputs\n",
@@ -484,9 +510,9 @@
         "\n",
         "  template = (\"Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, \"\n",
         "              \"Test Accuracy: {}\")\n",
-        "  print (template.format(epoch+1, train_loss,\n",
-        "                         train_accuracy.result()*100, test_loss.result(),\n",
-        "                         test_accuracy.result()*100))\n",
+        "  print(template.format(epoch + 1, train_loss,\n",
+        "                         train_accuracy.result() * 100, test_loss.result(),\n",
+        "                         test_accuracy.result() * 100))\n",
         "\n",
         "  test_loss.reset_states()\n",
         "  train_accuracy.reset_states()\n",
@@ -499,12 +525,12 @@
         "id": "Z1YvXqOpwy08"
       },
       "source": [
-        "Things to note in the example above:\n",
+        "### Things to note in the example above\n",
         "\n",
-        "* We are iterating over the `train_dist_dataset` and `test_dist_dataset` using  a `for x in ...` construct.\n",
+        "* Iterate over the `train_dist_dataset` and `test_dist_dataset` using  a `for x in ...` construct.\n",
         "* The scaled loss is the return value of the `distributed_train_step`. This value is aggregated across replicas using the `tf.distribute.Strategy.reduce` call and then across batches by summing the return value of the `tf.distribute.Strategy.reduce` calls.\n",
         "* `tf.keras.Metrics` should be updated inside `train_step` and `test_step` that gets executed by `tf.distribute.Strategy.run`.\n",
-        "*`tf.distribute.Strategy.run` returns results from each local replica in the strategy, and there are multiple ways to consume this result. You can do `tf.distribute.Strategy.reduce` to get an aggregated value. You can also do `tf.distribute.Strategy.experimental_local_results` to get the list of values contained in the result, one per local replica.\n"
+        "* `tf.distribute.Strategy.run` returns results from each local replica in the strategy, and there are multiple ways to consume this result. You can do `tf.distribute.Strategy.reduce` to get an aggregated value. You can also do `tf.distribute.Strategy.experimental_local_results` to get the list of values contained in the result, one per local replica.\n"
       ]
     },
     {
@@ -570,8 +596,8 @@
         "for images, labels in test_dataset:\n",
         "  eval_step(images, labels)\n",
         "\n",
-        "print ('Accuracy after restoring the saved model without strategy: {}'.format(\n",
-        "    eval_accuracy.result()*100))"
+        "print('Accuracy after restoring the saved model without strategy: {}'.format(\n",
+        "    eval_accuracy.result() * 100))"
       ]
     },
     {
@@ -584,7 +610,7 @@
         "\n",
         "### Using iterators\n",
         "\n",
-        "If you want to iterate over a given number of steps and not through the entire dataset you can create an iterator using the `iter` call and explicity call `next` on the iterator. You can choose to iterate over the dataset both inside and outside the tf.function. Here is a small snippet demonstrating iteration of the dataset outside the tf.function using an iterator.\n"
+        "If you want to iterate over a given number of steps and not through the entire dataset, you can create an iterator using the `iter` call and explicitly call `next` on the iterator. You can choose to iterate over the dataset both inside and outside the `tf.function`. Here is a small snippet demonstrating iteration of the dataset outside the `tf.function` using an iterator.\n"
       ]
     },
     {
@@ -606,7 +632,7 @@
         "  average_train_loss = total_loss / num_batches\n",
         "\n",
         "  template = (\"Epoch {}, Loss: {}, Accuracy: {}\")\n",
-        "  print (template.format(epoch+1, average_train_loss, train_accuracy.result()*100))\n",
+        "  print(template.format(epoch + 1, average_train_loss, train_accuracy.result() * 100))\n",
         "  train_accuracy.reset_states()"
       ]
     },
@@ -616,8 +642,9 @@
         "id": "GxVp48Oy0m6y"
       },
       "source": [
-        "### Iterating inside a tf.function\n",
-        "You can also iterate over the entire input `train_dist_dataset` inside a tf.function using the `for x in ...` construct or by creating iterators like we did above. The example below demonstrates wrapping one epoch of training in a tf.function and iterating over `train_dist_dataset` inside the function."
+        "### Iterating inside a `tf.function`\n",
+        "\n",
+        "You can also iterate over the entire input `train_dist_dataset` inside a `tf.function` using the `for x in ...` construct or by creating iterators like you did above. The example below demonstrates wrapping one epoch of training with a `@tf.function` decorator and iterating over `train_dist_dataset` inside the function."
       ]
     },
     {
@@ -643,7 +670,7 @@
         "  train_loss = distributed_train_epoch(train_dist_dataset)\n",
         "\n",
         "  template = (\"Epoch {}, Loss: {}, Accuracy: {}\")\n",
-        "  print (template.format(epoch+1, train_loss, train_accuracy.result()*100))\n",
+        "  print(template.format(epoch + 1, train_loss, train_accuracy.result() * 100))\n",
         "\n",
         "  train_accuracy.reset_states()"
       ]
@@ -658,17 +685,18 @@
         "\n",
         "Note: As a general rule, you should use `tf.keras.Metrics` to track per-sample values and avoid values that have been aggregated within a replica.\n",
         "\n",
-        "We do *not* recommend using `tf.metrics.Mean` to track the training loss across different replicas, because of the loss scaling computation that is carried out.\n",
+        "Because of the loss scaling computation that is carried out, it's not recommended to use `tf.keras.metrics.Mean` to track the training loss across different replicas.\n",
         "\n",
         "For example, if you run a training job with the following characteristics:\n",
+        "\n",
         "* Two replicas\n",
         "* Two samples are processed on each replica\n",
         "* Resulting loss values: [2,  3] and [4,  5] on each replica\n",
         "* Global batch size = 4\n",
         "\n",
-        "With loss scaling, you calculate the per-sample value of loss on each replica by adding the loss values, and then dividing by the global batch size. In this case: `(2 + 3) / 4 = 1.25` and `(4 + 5) / 4 = 2.25`. \n",
+        "With loss scaling, you calculate the per-sample value of loss on each replica by adding the loss values, and then dividing by the global batch size. In this case: `(2 + 3) / 4 = 1.25` and `(4 + 5) / 4 = 2.25`.\n",
         "\n",
-        "If you use `tf.metrics.Mean` to track loss across the two replicas, the result is different. In this example, you end up with a `total` of 3.50 and `count` of 2, which results in `total`/`count` = 1.75  when `result()` is called on the metric. Loss calculated with `tf.keras.Metrics` is scaled by an additional factor that is equal to the number of replicas in sync."
+        "If you use `tf.keras.metrics.Mean` to track loss across the two replicas, the result is different. In this example, you end up with a `total` of 3.50 and `count` of 2, which results in `total`/`count` = 1.75  when `result()` is called on the metric. Loss calculated with `tf.keras.Metrics` is scaled by an additional factor that is equal to the number of replicas in sync."
       ]
     },
     {
@@ -678,16 +706,17 @@
       },
       "source": [
         "### Guide and examples\n",
+        "\n",
         "Here are some examples for using distribution strategy with custom training loops:\n",
         "\n",
         "1. [Distributed training guide](../../guide/distributed_training)\n",
         "2. [DenseNet](https://github.com/tensorflow/examples/blob/master/tensorflow_examples/models/densenet/distributed_train.py) example using `MirroredStrategy`.\n",
-        "1. [BERT](https://github.com/tensorflow/models/blob/master/official/nlp/bert/run_classifier.py) example trained using `MirroredStrategy` and `TPUStrategy`.\n",
+        "1. [BERT](https://github.com/tensorflow/models/blob/master/official/legacy/bert/run_classifier.py) example trained using `MirroredStrategy` and `TPUStrategy`.\n",
         "This example is particularly helpful for understanding how to load from a checkpoint and generate periodic checkpoints during distributed training etc.\n",
         "2. [NCF](https://github.com/tensorflow/models/blob/master/official/recommendation/ncf_keras_main.py) example trained using `MirroredStrategy` that can be enabled using the `keras_use_ctl` flag.\n",
         "3. [NMT](https://github.com/tensorflow/examples/blob/master/tensorflow_examples/models/nmt_with_attention/distributed_train.py) example trained using `MirroredStrategy`.\n",
         "\n",
-        "More examples listed in the [Distribution strategy guide](../../guide/distributed_training.ipynb#examples_and_tutorials)."
+        "You can find more examples listed under _Examples and tutorials_ in the [Distribution strategy guide](../../guide/distributed_training.ipynb)."
       ]
     },
     {
@@ -699,7 +728,8 @@
         "## Next steps\n",
         "\n",
         "*   Try out the new `tf.distribute.Strategy` API on your models.\n",
-        "*   Visit the [Performance section](../../guide/function.ipynb) in the guide to learn more about other strategies and [tools](../../guide/profiler.md) you can use to optimize the performance of your TensorFlow models."
+        "*   Visit the [Better performance with `tf.function`](../../guide/function.ipynb) and [TensorFlow Profiler](../../guide/profiler.md) guides to learn more about tools to optimize the performance of your TensorFlow models.\n",
+        "*   Check out the [Distributed training in TensorFlow](../../guide/distributed_training.ipynb) guide, which provides an overview of the available distribution strategies."
       ]
     }
   ],
@@ -707,7 +737,6 @@
     "colab": {
       "collapsed_sections": [],
       "name": "custom_training.ipynb",
-      "provenance": [],
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/tutorials/distribute/dtensor_keras_tutorial.ipynb b/site/en/tutorials/distribute/dtensor_keras_tutorial.ipynb
new file mode 100644
index 00000000000..84f6478c2b5
--- /dev/null
+++ b/site/en/tutorials/distribute/dtensor_keras_tutorial.ipynb
@@ -0,0 +1,760 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Tce3stUlHN0L"
+      },
+      "source": [
+        "##### Copyright 2019 The TensorFlow Authors.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "tuOe1ymfHZPu"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MT-LkFOl2axM"
+      },
+      "source": [
+        "# Using DTensors with Keras"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "r6P32iYYV27b"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/tutorials/distribute/dtensor_keras_tutorial\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/distribute/dtensor_keras_tutorial.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/tutorials/distribute/dtensor_keras_tutorial.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/tutorials/distribute/dtensor_keras_tutorial.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vTe9dcbUAwqx"
+      },
+      "source": [
+        "## Overview\n",
+        "\n",
+        "In this tutorial, you will learn how to use DTensors with Keras.\n",
+        "\n",
+        "Through DTensor integration with Keras, you can reuse your existing Keras layers and models to build and train distributed machine learning models.\n",
+        "\n",
+        "You will train a multi-layer classification model with the MNIST data. Setting the layout for subclassing model, Sequential model, and functional model will be demonstrated.\n",
+        "\n",
+        "This tutorial assumes that you have already read the [DTensor programing guide](/guide/dtensor_overview), and are familiar with basic DTensor concepts like `Mesh` and `Layout`.\n",
+        "\n",
+        "This tutorial is based on [Training a neural network on MNIST with Keras](https://www.tensorflow.org/datasets/keras_example)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "keIyP3IoA1o4"
+      },
+      "source": [
+        "## Setup\n",
+        "\n",
+        "DTensor (`tf.experimental.dtensor`) has been part of TensorFlow since the 2.9.0 release.\n",
+        "\n",
+        "First, install or upgrade TensorFlow Datasets:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "4dHik7NYA5vm"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install --quiet --upgrade tensorflow-datasets"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "VttBMZngDx8x"
+      },
+      "source": [
+        "Next, import TensorFlow and `dtensor`, and configure TensorFlow to use 8 virtual CPUs.\n",
+        "\n",
+        "Even though this example uses virtual CPUs, DTensor works the same way on CPU, GPU or TPU devices."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "CodX6idGBGSm"
+      },
+      "outputs": [],
+      "source": [
+        "import tensorflow as tf\n",
+        "import tensorflow_datasets as tfds\n",
+        "from tensorflow.experimental import dtensor"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "aAtvrpasDpDD"
+      },
+      "outputs": [],
+      "source": [
+        "def configure_virtual_cpus(ncpu):\n",
+        "  phy_devices = tf.config.list_physical_devices('CPU')\n",
+        "  tf.config.set_logical_device_configuration(\n",
+        "        phy_devices[0], \n",
+        "        [tf.config.LogicalDeviceConfiguration()] * ncpu)\n",
+        "  \n",
+        "configure_virtual_cpus(8)\n",
+        "tf.config.list_logical_devices('CPU')\n",
+        "\n",
+        "devices = [f'CPU:{i}' for i in range(8)]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ogULE1OHtyd9"
+      },
+      "source": [
+        "## Deterministic pseudo-random number generators\n",
+        "One thing you should note is that DTensor API requires each of the running client to have the same random seeds, so that it could have deterministic behavior for initializing the weights. You can achieve this by setting the global seeds in keras via `tf.keras.utils.set_random_seed()`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9u85YypguL8N"
+      },
+      "outputs": [],
+      "source": [
+        "tf.keras.backend.experimental.enable_tf_random_generator()\n",
+        "tf.keras.utils.set_random_seed(1337)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tO11XvPDAu3_"
+      },
+      "source": [
+        "## Creating a Data Parallel Mesh\n",
+        "\n",
+        "This tutorial demonstrates Data Parallel training. Adapting to Model Parallel training and Spatial Parallel training can be as simple as switching to a different set of `Layout` objects. Refer to the [Distributed training with DTensors](dtensor_ml_tutorial.ipynb) tutorial for more information on distributed training beyond Data Parallel.\n",
+        "\n",
+        "Data Parallel training is a commonly used parallel training scheme, also used by, for example, `tf.distribute.MirroredStrategy`.\n",
+        "\n",
+        "With DTensor, a Data Parallel training loop uses a `Mesh` that consists of a single 'batch' dimension, where each device runs a replica of the model that receives a shard from the global batch."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "6sT6s6z4j9H-"
+      },
+      "outputs": [],
+      "source": [
+        "mesh = dtensor.create_mesh([(\"batch\", 8)], devices=devices)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rouFcF6FE0aF"
+      },
+      "source": [
+        "As each device runs a full replica of the model, the model variables shall be fully replicated across the mesh (unsharded). As an example, a fully replicated Layout for a rank-2 weight on this `Mesh` would be as follows:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "U8OxvkDKE1Nu"
+      },
+      "outputs": [],
+      "source": [
+        "example_weight_layout = dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh)  # or\n",
+        "example_weight_layout = dtensor.Layout.replicated(mesh, rank=2)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6Bnic98RE0xi"
+      },
+      "source": [
+        "A layout for a rank-2 data tensor on this `Mesh` would be sharded along the first dimension (sometimes known as `batch_sharded`),"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "PhYp0EKBFfxt"
+      },
+      "outputs": [],
+      "source": [
+        "example_data_layout = dtensor.Layout(['batch', dtensor.UNSHARDED], mesh)  # or\n",
+        "example_data_layout = dtensor.Layout.batch_sharded(mesh, 'batch', rank=2)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4U-6n0DericV"
+      },
+      "source": [
+        "## Create Keras layers with layout\n",
+        "\n",
+        "In the data parallel scheme, you usually create your model weights with a fully replicated layout, so that each replica of the model can do calculations with the sharded input data. \n",
+        "\n",
+        "In order to configure the layout information for your layers' weights, Keras has exposed an extra parameter in the layer constructor for most of the built-in layers.\n",
+        "\n",
+        "The following example builds a small image classification model with fully replicated weight layout. You can specify layout information `kernel` and `bias` in `tf.keras.layers.Dense` via arguments `kernel_layout` and `bias_layout`. Most of the built-in keras layers are ready for explicitly specifying the `Layout` for the layer weights."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Koc5GlA1tFXY"
+      },
+      "outputs": [],
+      "source": [
+        "unsharded_layout_2d = dtensor.Layout.replicated(mesh, 2)\n",
+        "unsharded_layout_1d = dtensor.Layout.replicated(mesh, 1)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "GfOGTIxGs5Ql"
+      },
+      "outputs": [],
+      "source": [
+        "model = tf.keras.models.Sequential([\n",
+        "  tf.keras.layers.Flatten(input_shape=(28, 28)),\n",
+        "  tf.keras.layers.Dense(128, \n",
+        "                        activation='relu',\n",
+        "                        name='d1',\n",
+        "                        kernel_layout=unsharded_layout_2d, \n",
+        "                        bias_layout=unsharded_layout_1d),\n",
+        "  tf.keras.layers.Dense(10,\n",
+        "                        name='d2',\n",
+        "                        kernel_layout=unsharded_layout_2d, \n",
+        "                        bias_layout=unsharded_layout_1d)\n",
+        "])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0frf3jsVtx_n"
+      },
+      "source": [
+        "You can check the layout information by examining the `layout` property on the weights."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Z_nqv_VdwcXo"
+      },
+      "outputs": [],
+      "source": [
+        "for weight in model.weights:\n",
+        "  print(f'Weight name: {weight.name} with layout: {weight.layout}')\n",
+        "  break"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6FMGB-QsxPtU"
+      },
+      "source": [
+        "## Load a dataset and build input pipeline\n",
+        "\n",
+        "Load a MNIST dataset and configure some pre-processing input pipeline for it. The dataset itself is not associated with any DTensor layout information."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "zGt4kwltxOt4"
+      },
+      "outputs": [],
+      "source": [
+        "(ds_train, ds_test), ds_info = tfds.load(\n",
+        "    'mnist',\n",
+        "    split=['train', 'test'],\n",
+        "    shuffle_files=True,\n",
+        "    as_supervised=True,\n",
+        "    with_info=True,\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "HkUaOB_ryaLH"
+      },
+      "outputs": [],
+      "source": [
+        "def normalize_img(image, label):\n",
+        "  \"\"\"Normalizes images: `uint8` -> `float32`.\"\"\"\n",
+        "  return tf.cast(image, tf.float32) / 255., label"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Efm2H1iqydan"
+      },
+      "outputs": [],
+      "source": [
+        "batch_size = 128\n",
+        "\n",
+        "ds_train = ds_train.map(\n",
+        "    normalize_img, num_parallel_calls=tf.data.AUTOTUNE)\n",
+        "ds_train = ds_train.cache()\n",
+        "ds_train = ds_train.shuffle(ds_info.splits['train'].num_examples)\n",
+        "ds_train = ds_train.batch(batch_size)\n",
+        "ds_train = ds_train.prefetch(tf.data.AUTOTUNE)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Lcrg6QAtyis4"
+      },
+      "outputs": [],
+      "source": [
+        "ds_test = ds_test.map(\n",
+        "    normalize_img, num_parallel_calls=tf.data.AUTOTUNE)\n",
+        "ds_test = ds_test.batch(batch_size)\n",
+        "ds_test = ds_test.cache()\n",
+        "ds_test = ds_test.prefetch(tf.data.AUTOTUNE)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fHEZwib7lhqn"
+      },
+      "source": [
+        "## Define the training logic for the model\n",
+        "\n",
+        "Next, define the training and evaluation logic for the model. \n",
+        "\n",
+        "As of TensorFlow 2.9, you have to write a custom-training-loop for a DTensor-enabled Keras model. This is to pack the input data with proper layout information, which is not integrated with the standard `tf.keras.Model.fit()` or `tf.keras.Model.eval()` functions from Keras. you will get more `tf.data` support in the upcoming release. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "CAx11gMjzzjs"
+      },
+      "outputs": [],
+      "source": [
+        "@tf.function\n",
+        "def train_step(model, x, y, optimizer, metrics):\n",
+        "  with tf.GradientTape() as tape:\n",
+        "    logits = model(x, training=True)\n",
+        "    # tf.reduce_sum sums the batch sharded per-example loss to a replicated\n",
+        "    # global loss (scalar).\n",
+        "    loss = tf.reduce_sum(tf.keras.losses.sparse_categorical_crossentropy(\n",
+        "        y, logits, from_logits=True))\n",
+        "    \n",
+        "  gradients = tape.gradient(loss, model.trainable_variables)\n",
+        "  optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n",
+        "\n",
+        "  for metric in metrics.values():\n",
+        "    metric.update_state(y_true=y, y_pred=logits)\n",
+        "\n",
+        "  loss_per_sample = loss / len(x)\n",
+        "  results = {'loss': loss_per_sample}\n",
+        "  return results"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "maSTWeRemO0P"
+      },
+      "outputs": [],
+      "source": [
+        "@tf.function\n",
+        "def eval_step(model, x, y, metrics):\n",
+        "  logits = model(x, training=False)\n",
+        "  loss = tf.reduce_sum(tf.keras.losses.sparse_categorical_crossentropy(\n",
+        "        y, logits, from_logits=True))\n",
+        "\n",
+        "  for metric in metrics.values():\n",
+        "    metric.update_state(y_true=y, y_pred=logits)\n",
+        "\n",
+        "  loss_per_sample = loss / len(x)\n",
+        "  results = {'eval_loss': loss_per_sample}\n",
+        "  return results"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dt00axcLmvLr"
+      },
+      "outputs": [],
+      "source": [
+        "def pack_dtensor_inputs(images, labels, image_layout, label_layout):\n",
+        "  num_local_devices = image_layout.mesh.num_local_devices()\n",
+        "  images = tf.split(images, num_local_devices)\n",
+        "  labels = tf.split(labels, num_local_devices)\n",
+        "  images = dtensor.pack(images, image_layout)\n",
+        "  labels = dtensor.pack(labels, label_layout)\n",
+        "  return  images, labels"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9Eb-qIJGrxB9"
+      },
+      "source": [
+        "## Metrics and optimizers\n",
+        "\n",
+        "When using DTensor API with Keras `Metric` and `Optimizer`, you will need to provide the extra mesh information, so that any internal state variables and tensors can work with variables in the model.\n",
+        "\n",
+        "- For an optimizer, DTensor introduces a new experimental namespace `keras.dtensor.experimental.optimizers`, where many existing Keras Optimizers are extended to receive an additional `mesh` argument. In future releases, it may be merged with Keras core optimizers.\n",
+        "\n",
+        "- For metrics, you can directly specify the `mesh` to the constructor as an argument to make it a DTensor compatible `Metric`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "1lu_0mz1sxrl"
+      },
+      "outputs": [],
+      "source": [
+        "optimizer = tf.keras.dtensor.experimental.optimizers.Adam(0.01, mesh=mesh)\n",
+        "metrics = {'accuracy': tf.keras.metrics.SparseCategoricalAccuracy(mesh=mesh)}\n",
+        "eval_metrics = {'eval_accuracy': tf.keras.metrics.SparseCategoricalAccuracy(mesh=mesh)}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "QzufrkistELx"
+      },
+      "source": [
+        "## Train the model\n",
+        "\n",
+        "The following example demonstrates how to shard the data from input pipeline on the batch dimension, and train with the model, which has fully replicated weights. \n",
+        "\n",
+        "After 3 epochs, the model should achieve about 97% of accuracy:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "kZW568Dk0vvL"
+      },
+      "outputs": [],
+      "source": [
+        "num_epochs = 3\n",
+        "\n",
+        "image_layout = dtensor.Layout.batch_sharded(mesh, 'batch', rank=4)\n",
+        "label_layout = dtensor.Layout.batch_sharded(mesh, 'batch', rank=1)\n",
+        "\n",
+        "for epoch in range(num_epochs):\n",
+        "  print(\"============================\") \n",
+        "  print(\"Epoch: \", epoch)\n",
+        "  for metric in metrics.values():\n",
+        "    metric.reset_state()\n",
+        "  step = 0\n",
+        "  results = {}\n",
+        "  pbar = tf.keras.utils.Progbar(target=None, stateful_metrics=[])\n",
+        "  for input in ds_train:\n",
+        "    images, labels = input[0], input[1]\n",
+        "    images, labels = pack_dtensor_inputs(\n",
+        "        images, labels, image_layout, label_layout)\n",
+        "\n",
+        "    results.update(train_step(model, images, labels, optimizer, metrics))\n",
+        "    for metric_name, metric in metrics.items():\n",
+        "      results[metric_name] = metric.result()\n",
+        "\n",
+        "    pbar.update(step, values=results.items(), finalize=False)\n",
+        "    step += 1\n",
+        "  pbar.update(step, values=results.items(), finalize=True)\n",
+        "\n",
+        "  for metric in eval_metrics.values():\n",
+        "    metric.reset_state()\n",
+        "  for input in ds_test:\n",
+        "    images, labels = input[0], input[1]\n",
+        "    images, labels = pack_dtensor_inputs(\n",
+        "        images, labels, image_layout, label_layout)\n",
+        "    results.update(eval_step(model, images, labels, eval_metrics))\n",
+        "\n",
+        "  for metric_name, metric in eval_metrics.items():\n",
+        "    results[metric_name] = metric.result()\n",
+        "  \n",
+        "  for metric_name, metric in results.items():\n",
+        "    print(f\"{metric_name}: {metric.numpy()}\")\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "HYEXF6qCuoSr"
+      },
+      "source": [
+        "## Specify Layout for existing model code\n",
+        "\n",
+        "Often you have models that work well for your use case. Specifying `Layout` information to each individual layer within the model will be a large amount of work requiring a lot of edits.\n",
+        "\n",
+        "To help you easily convert your existing Keras model to work with DTensor API you can use the new `tf.keras.dtensor.experimental.LayoutMap` API that allow you to specify the `Layout` from a global point of view.\n",
+        "\n",
+        "First, you need to create a `LayoutMap` instance, which is a dictionary-like object that contains all the `Layout` you would like to specify for your model weights.\n",
+        "\n",
+        "`LayoutMap` needs a `Mesh` instance at init, which can be used to provide default replicated `Layout` for any weights that doesn't have Layout configured. In case you would like all your model weights to be just fully replicated, you can provide empty `LayoutMap`, and the default mesh will be used to create replicated `Layout`.\n",
+        "\n",
+        "`LayoutMap` uses a string as key and a `Layout` as value. There is a behavior difference between a normal Python dict and this class. The string key will be treated as a regex when retrieving the value."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "SCq5Nl-UP_dS"
+      },
+      "source": [
+        "### Subclassed Model\n",
+        "\n",
+        "Consider the following model defined using the Keras subclassing Model syntax."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "LZ0hRFs8unu0"
+      },
+      "outputs": [],
+      "source": [
+        "class SubclassedModel(tf.keras.Model):\n",
+        "\n",
+        "  def __init__(self, name=None):\n",
+        "    super().__init__(name=name)\n",
+        "    self.feature = tf.keras.layers.Dense(16)\n",
+        "    self.feature_2 = tf.keras.layers.Dense(24)\n",
+        "    self.dropout = tf.keras.layers.Dropout(0.1)\n",
+        "\n",
+        "  def call(self, inputs, training=None):\n",
+        "    x = self.feature(inputs)\n",
+        "    x = self.dropout(x, training=training)\n",
+        "    return self.feature_2(x)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "1njxqPB-yS97"
+      },
+      "source": [
+        "There are 4 weights in this model, which are `kernel` and `bias` for two `Dense` layers. Each of them are mapped based on the object path:\n",
+        "\n",
+        "*   `model.feature.kernel`\n",
+        "*   `model.feature.bias`\n",
+        "*   `model.feature_2.kernel`\n",
+        "*   `model.feature_2.bias`\n",
+        "\n",
+        "Note: For subclassed Models, the attribute name, rather than the `.name` attribute of the layer, is used as the key to retrieve the Layout from the mapping. This is consistent with the convention followed by `tf.Module` checkpointing. For complex models with more than a few layers, you can [manually inspect checkpoints](https://www.tensorflow.org/guide/checkpoint#manually_inspecting_checkpoints) to view the attribute mappings. \n",
+        "\n",
+        "Now define the following `LayoutMap` and apply it to the model:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "goVX6iIZw468"
+      },
+      "outputs": [],
+      "source": [
+        "layout_map = tf.keras.dtensor.experimental.LayoutMap(mesh=mesh)\n",
+        "\n",
+        "layout_map['feature.*kernel'] = dtensor.Layout.batch_sharded(mesh, 'batch', rank=2)\n",
+        "layout_map['feature.*bias'] = dtensor.Layout.batch_sharded(mesh, 'batch', rank=1)\n",
+        "\n",
+        "with layout_map.scope():\n",
+        "  subclassed_model = SubclassedModel()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "M32HcSp_PyWs"
+      },
+      "source": [
+        "The model weights are created on the first call, so call the model with a DTensor input and confirm the weights have the expected layouts:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "c3CbD9l7qUNq"
+      },
+      "outputs": [],
+      "source": [
+        "dtensor_input = dtensor.copy_to_mesh(tf.zeros((16, 16)), layout=unsharded_layout_2d)\n",
+        "# Trigger the weights creation for subclass model\n",
+        "subclassed_model(dtensor_input)\n",
+        "\n",
+        "print(subclassed_model.feature.kernel.layout)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ZyCnfd-4Q2jk"
+      },
+      "source": [
+        "With this, you can quickly map the `Layout` to your models without updating any of your existing code. "
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6GliUdWTQnKC"
+      },
+      "source": [
+        "### Sequential and Functional Models"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6zzvTqAR2Teu"
+      },
+      "source": [
+        "For Keras Functional and Sequential models, you can use `tf.keras.dtensor.experimental.LayoutMap` as well.\n",
+        "\n",
+        "Note: For Functional and Sequential models, the mappings are slightly different. The layers in the model don't have a public attribute attached to the model (though you can access them via `Model.layers` as a list). Use the string name as the key in this case. The string name is guaranteed to be unique within a model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "gXK2EquIRJCC"
+      },
+      "outputs": [],
+      "source": [
+        "layout_map = tf.keras.dtensor.experimental.LayoutMap(mesh=mesh)\n",
+        "\n",
+        "layout_map['feature.*kernel'] = dtensor.Layout.batch_sharded(mesh, 'batch', rank=2)\n",
+        "layout_map['feature.*bias'] = dtensor.Layout.batch_sharded(mesh, 'batch', rank=1)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "cBzwJqrg2TH3"
+      },
+      "outputs": [],
+      "source": [
+        "with layout_map.scope():\n",
+        "  inputs = tf.keras.Input((16,), batch_size=16)\n",
+        "  x = tf.keras.layers.Dense(16, name='feature')(inputs)\n",
+        "  x = tf.keras.layers.Dropout(0.1)(x)\n",
+        "  output = tf.keras.layers.Dense(32, name='feature_2')(x)\n",
+        "  model = tf.keras.Model(inputs, output)\n",
+        "\n",
+        "print(model.layers[1].kernel.layout)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "pPuh1NlE3-wO"
+      },
+      "outputs": [],
+      "source": [
+        "with layout_map.scope():\n",
+        "  model = tf.keras.Sequential([\n",
+        "      tf.keras.layers.Dense(16, name='feature', input_shape=(16,)),\n",
+        "      tf.keras.layers.Dropout(0.1),\n",
+        "      tf.keras.layers.Dense(32, name='feature_2')\n",
+        "  ])\n",
+        "\n",
+        "print(model.layers[2].kernel.layout)"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "name": "dtensor_keras_tutorial.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/tutorials/distribute/dtensor_ml_tutorial.ipynb b/site/en/tutorials/distribute/dtensor_ml_tutorial.ipynb
new file mode 100644
index 00000000000..55557be6368
--- /dev/null
+++ b/site/en/tutorials/distribute/dtensor_ml_tutorial.ipynb
@@ -0,0 +1,1070 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Tce3stUlHN0L"
+      },
+      "source": [
+        "##### Copyright 2019 The TensorFlow Authors.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "tuOe1ymfHZPu"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfBg1C5NB3X0"
+      },
+      "source": [
+        "# Distributed training with DTensors"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "r6P32iYYV27b"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/tutorials/distribute/dtensor_ml_tutorial\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/distribute/dtensor_ml_tutorial.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/tutorials/distribute/dtensor_ml_tutorial.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/tutorials/distribute/dtensor_ml_tutorial.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kiF4jjX4O1mF"
+      },
+      "source": [
+        "## Overview\n",
+        "\n",
+        "DTensor provides a way for you to distribute the training of your model across devices to improve efficiency, reliability and scalability. For more details, check out the [DTensor concepts](../../guide/dtensor_overview.ipynb) guide.\n",
+        "\n",
+        "In this tutorial, you will train a sentiment analysis model using DTensors. The example demonstrates three distributed training schemes:\n",
+        "\n",
+        " - Data Parallel training, where the training samples are sharded (partitioned) to devices.\n",
+        " - Model Parallel training, where the model variables are sharded to devices.\n",
+        " - Spatial Parallel training, where the features of input data are  sharded to devices (also known as [Spatial Partitioning](https://cloud.google.com/blog/products/ai-machine-learning/train-ml-models-on-large-images-and-3d-volumes-with-spatial-partitioning-on-cloud-tpus)).\n",
+        "\n",
+        "The training portion of this tutorial is inspired by a Kaggle notebook called [A Kaggle guide on sentiment analysis](https://www.kaggle.com/code/anasofiauzsoy/yelp-review-sentiment-analysis-tensorflow-tfds/notebook). To learn about the complete training and evaluation workflow (without DTensor), refer to that notebook.\n",
+        "\n",
+        "This tutorial will walk through the following steps:\n",
+        "\n",
+        "- Some data cleaning to obtain a `tf.data.Dataset` of tokenized sentences and their polarity.\n",
+        "- Then, building an MLP model with custom Dense and BatchNorm layers using a `tf.Module` to track the inference variables. The model constructor will take additional `Layout` arguments to control the sharding of variables.\n",
+        "- For training, you will first use data parallel training together with `tf.experimental.dtensor`'s checkpoint feature. Then, you will continue with Model Parallel Training and Spatial Parallel Training.\n",
+        "- The final section briefly describes the interaction between `tf.saved_model` and `tf.experimental.dtensor` as of TensorFlow 2.9."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YD80veeg7QtW"
+      },
+      "source": [
+        "## Setup\n",
+        "\n",
+        "DTensor (`tf.experimental.dtensor`) has been part of TensorFlow since the 2.9.0 release.\n",
+        "\n",
+        "First, install or upgrade TensorFlow Datasets:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "-RKXLJN-7Yyb"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install --quiet --upgrade tensorflow-datasets"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tcxP4_Zu7ciQ"
+      },
+      "source": [
+        "Next, import `tensorflow` and `dtensor`, and configure TensorFlow to use 8 virtual CPUs.\n",
+        "\n",
+        "Even though this example uses virtual CPUs, DTensor works the same way on CPU, GPU or TPU devices."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dXcB26oP7dUd"
+      },
+      "outputs": [],
+      "source": [
+        "import tempfile\n",
+        "import numpy as np\n",
+        "import tensorflow_datasets as tfds\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "\n",
+        "from tensorflow.experimental import dtensor\n",
+        "\n",
+        "print('TensorFlow version:', tf.__version__)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "oHtO6MJLUXlz"
+      },
+      "outputs": [],
+      "source": [
+        "def configure_virtual_cpus(ncpu):\n",
+        "  phy_devices = tf.config.list_physical_devices('CPU')\n",
+        "  tf.config.set_logical_device_configuration(phy_devices[0], [\n",
+        "        tf.config.LogicalDeviceConfiguration(),\n",
+        "    ] * ncpu)\n",
+        "\n",
+        "configure_virtual_cpus(8)\n",
+        "DEVICES = [f'CPU:{i}' for i in range(8)]\n",
+        "\n",
+        "tf.config.list_logical_devices('CPU')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "omYd4jbF7j_I"
+      },
+      "source": [
+        "## Download the dataset\n",
+        "\n",
+        "Download the IMDB reviews data set to train the sentiment analysis model:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "fW4w4QlFVHhx"
+      },
+      "outputs": [],
+      "source": [
+        "train_data = tfds.load('imdb_reviews', split='train', shuffle_files=True, batch_size=64)\n",
+        "train_data"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ki3mpfi4aZH8"
+      },
+      "source": [
+        "## Prepare the data\n",
+        "\n",
+        "First tokenize the text. Here use an extension of one-hot encoding, the `'tf_idf'` mode of `tf.keras.layers.TextVectorization`.\n",
+        "\n",
+        "- For the sake of speed, limit the number of tokens to 1200.\n",
+        "- To keep the `tf.Module` simple, run `TextVectorization` as a preprocessing step before the training.\n",
+        "\n",
+        "The final result of the data cleaning section is a `Dataset` with the tokenized text as `x` and label as `y`.\n",
+        "\n",
+        "**Note**: Running `TextVectorization` as a preprocessing step is **neither a usual practice nor a recommended one** as doing so assumes the training data fits into the client memory, which is not always the case.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "zNpxjku_57Lg"
+      },
+      "outputs": [],
+      "source": [
+        "text_vectorization = tf.keras.layers.TextVectorization(output_mode='tf_idf', max_tokens=1200, output_sequence_length=None)\n",
+        "text_vectorization.adapt(data=train_data.map(lambda x: x['text']))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "q16bjngoVwQp"
+      },
+      "outputs": [],
+      "source": [
+        "def vectorize(features):\n",
+        "  return text_vectorization(features['text']), features['label']\n",
+        "\n",
+        "train_data_vec = train_data.map(vectorize)\n",
+        "train_data_vec"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "atTqL9kE5wz4"
+      },
+      "source": [
+        "## Build a neural network with DTensor\n",
+        "\n",
+        "Now build a Multi-Layer Perceptron (MLP) network with `DTensor`. The network will use fully connected Dense and BatchNorm layers.\n",
+        "\n",
+        "`DTensor` expands TensorFlow through single-program multi-data (SPMD) expansion of regular TensorFlow Ops according to the `dtensor.Layout` attributes of their input `Tensor` and variables.\n",
+        "\n",
+        "Variables of `DTensor` aware layers are `dtensor.DVariable`, and the constructors of `DTensor` aware layer objects take additional `Layout` inputs in addition to the usual layer parameters.\n",
+        "\n",
+        "Note: As of TensorFlow 2.9, Keras layers such as `tf.keras.layer.Dense`, and `tf.keras.layer.BatchNormalization` accepts `dtensor.Layout` arguments.  Refer to the [DTensor Keras Integration Tutorial](/tutorials/distribute/dtensor_keras_tutorial) for more information using Keras with DTensor."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PMCt-Gj3b3Jy"
+      },
+      "source": [
+        "### Dense Layer\n",
+        "\n",
+        "The following custom Dense layer defines 2 layer variables: $W_{ij}$ is the variable for weights, and $b_i$ is the variable for the biases.\n",
+        "\n",
+        "$$\n",
+        "y_j = \\sigma(\\sum_i x_i W_{ij} + b_j)\n",
+        "$$\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nYlFUJWNjl4N"
+      },
+      "source": [
+        "### Layout deduction\n",
+        "\n",
+        "This result comes from the following observations:\n",
+        "\n",
+        "- The preferred DTensor sharding for operands to a matrix dot product $t_j = \\sum_i x_i W_{ij}$ is to shard $\\mathbf{W}$ and $\\mathbf{x}$ the same way along the $i$-axis.\n",
+        "\n",
+        "- The preferred DTensor sharding for operands to a matrix sum $t_j + b_j$, is to shard $\\mathbf{t}$ and $\\mathbf{b}$ the same way along the $j$-axis.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "VpKblz7Yb16G"
+      },
+      "outputs": [],
+      "source": [
+        "class Dense(tf.Module):\n",
+        "\n",
+        "  def __init__(self, input_size, output_size,\n",
+        "               init_seed, weight_layout, activation=None):\n",
+        "    super().__init__()\n",
+        "\n",
+        "    random_normal_initializer = tf.function(tf.random.stateless_normal)\n",
+        "\n",
+        "    self.weight = dtensor.DVariable(\n",
+        "        dtensor.call_with_layout(\n",
+        "            random_normal_initializer, weight_layout,\n",
+        "            shape=[input_size, output_size],\n",
+        "            seed=init_seed\n",
+        "            ))\n",
+        "    if activation is None:\n",
+        "      activation = lambda x:x\n",
+        "    self.activation = activation\n",
+        "    \n",
+        "    # bias is sharded the same way as the last axis of weight.\n",
+        "    bias_layout = weight_layout.delete([0])\n",
+        "\n",
+        "    self.bias = dtensor.DVariable(\n",
+        "        dtensor.call_with_layout(tf.zeros, bias_layout, [output_size]))\n",
+        "\n",
+        "  def __call__(self, x):\n",
+        "    y = tf.matmul(x, self.weight) + self.bias\n",
+        "    y = self.activation(y)\n",
+        "\n",
+        "    return y"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tfVY_vAKbxM0"
+      },
+      "source": [
+        "### BatchNorm\n",
+        "\n",
+        "A batch normalization layer helps avoid collapsing modes while training. In this case, adding batch normalization layers helps model training avoid producing a model that only produces zeros.\n",
+        "\n",
+        "The constructor of the custom `BatchNorm` layer below does not take a `Layout` argument. This is because `BatchNorm` has no layer variables. This still works with DTensor because 'x', the only input to the layer, is already a DTensor that represents the global batch.\n",
+        "\n",
+        "Note: With DTensor, the input Tensor 'x' always represents the global batch. Therefore `tf.nn.batch_normalization` is applied to the global batch. This differs from training with `tf.distribute.MirroredStrategy`, where Tensor 'x' only represents the per-replica shard of the batch (the local batch)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "riBA9pfhlPFq"
+      },
+      "outputs": [],
+      "source": [
+        "class BatchNorm(tf.Module):\n",
+        "\n",
+        "  def __init__(self):\n",
+        "    super().__init__()\n",
+        "\n",
+        "  def __call__(self, x, training=True):\n",
+        "    if not training:\n",
+        "      # This branch is not used in the Tutorial.\n",
+        "      pass\n",
+        "    mean, variance = tf.nn.moments(x, axes=[0])\n",
+        "    return tf.nn.batch_normalization(x, mean, variance, 0.0, 1.0, 1e-5)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "q4R4MPz5prh4"
+      },
+      "source": [
+        "A full featured batch normalization layer (such as `tf.keras.layers.BatchNormalization`) will need Layout arguments for its variables."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "unFcP99zprJj"
+      },
+      "outputs": [],
+      "source": [
+        "def make_keras_bn(bn_layout):\n",
+        "  return tf.keras.layers.BatchNormalization(gamma_layout=bn_layout,\n",
+        "                                            beta_layout=bn_layout,\n",
+        "                                            moving_mean_layout=bn_layout,\n",
+        "                                            moving_variance_layout=bn_layout,\n",
+        "                                            fused=False)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "v8Dj7AJ_lPs0"
+      },
+      "source": [
+        "### Putting Layers Together\n",
+        "\n",
+        "Next, build a Multi-layer perceptron (MLP) network with the building blocks above.  The diagram below shows the axis relationships between the input `x` and the weight matrices for the two `Dense` layers without any DTensor sharding or replication applied."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "udFGAO-NrZw6"
+      },
+      "source": [
+        "<img src=\"https://www.tensorflow.org/images/dtensor/no_dtensor.png\" alt=\"The input and weight matrices for a non distributed model.\" class=\"no-filter\">\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8DCQ0aQ5rQtB"
+      },
+      "source": [
+        "The output of the first `Dense` layer is passed into the input of the second `Dense` layer (after the `BatchNorm`). Therefore, the preferred DTensor sharding for the output of first `Dense` layer ($\\mathbf{W_1}$) and the input of second `Dense` layer ($\\mathbf{W_2}$) is to shard $\\mathbf{W_1}$ and $\\mathbf{W_2}$ the same way along the common axis $\\hat{j}$,\n",
+        "\n",
+        "$$\n",
+        "\\mathsf{Layout}[{W_{1,ij}}; i, j] = \\left[\\hat{i}, \\hat{j}\\right] \\\\\n",
+        "\\mathsf{Layout}[{W_{2,jk}}; j, k] = \\left[\\hat{j}, \\hat{k} \\right]\n",
+        "$$\n",
+        "\n",
+        "Even though the layout deduction shows that the 2 layouts are not independent, for the sake of simplicity of the model interface, `MLP` will take 2 `Layout` arguments, one per Dense layer."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "junyS-965opl"
+      },
+      "outputs": [],
+      "source": [
+        "from typing import Tuple\n",
+        "\n",
+        "class MLP(tf.Module):\n",
+        "\n",
+        "  def __init__(self, dense_layouts: Tuple[dtensor.Layout, dtensor.Layout]):\n",
+        "    super().__init__()\n",
+        "\n",
+        "    self.dense1 = Dense(\n",
+        "        1200, 48, (1, 2), dense_layouts[0], activation=tf.nn.relu)\n",
+        "    self.bn = BatchNorm()\n",
+        "    self.dense2 = Dense(48, 2, (3, 4), dense_layouts[1])\n",
+        "\n",
+        "  def __call__(self, x):\n",
+        "    y = x\n",
+        "    y = self.dense1(y)\n",
+        "    y = self.bn(y)\n",
+        "    y = self.dense2(y)\n",
+        "    return y\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9dgLmebHhr7h"
+      },
+      "source": [
+        "The trade-off between correctness in layout deduction constraints and simplicity of API is a common design point of APIs that uses DTensor.\n",
+        "It is also possible to capture the dependency between `Layout`'s with a different API. For example, the `MLPStricter` class creates the `Layout` objects in the constructor."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "wEZR7UlihsYX"
+      },
+      "outputs": [],
+      "source": [
+        "class MLPStricter(tf.Module):\n",
+        "\n",
+        "  def __init__(self, mesh, input_mesh_dim, inner_mesh_dim1, output_mesh_dim):\n",
+        "    super().__init__()\n",
+        "\n",
+        "    self.dense1 = Dense(\n",
+        "        1200, 48, (1, 2), dtensor.Layout([input_mesh_dim, inner_mesh_dim1], mesh),\n",
+        "        activation=tf.nn.relu)\n",
+        "    self.bn = BatchNorm()\n",
+        "    self.dense2 = Dense(48, 2, (3, 4), dtensor.Layout([inner_mesh_dim1, output_mesh_dim], mesh))\n",
+        "\n",
+        "\n",
+        "  def __call__(self, x):\n",
+        "    y = x\n",
+        "    y = self.dense1(y)\n",
+        "    y = self.bn(y)\n",
+        "    y = self.dense2(y)\n",
+        "    return y"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GcQi7D5mal2L"
+      },
+      "source": [
+        "To make sure the model runs, probe your model with fully replicated layouts and a fully replicated batch of `'x'` input."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "zOPuYeQwallh"
+      },
+      "outputs": [],
+      "source": [
+        "WORLD = dtensor.create_mesh([(\"world\", 8)], devices=DEVICES)\n",
+        "\n",
+        "model = MLP([dtensor.Layout.replicated(WORLD, rank=2),\n",
+        "             dtensor.Layout.replicated(WORLD, rank=2)])\n",
+        "\n",
+        "sample_x, sample_y = train_data_vec.take(1).get_single_element()\n",
+        "sample_x = dtensor.copy_to_mesh(sample_x, dtensor.Layout.replicated(WORLD, rank=2))\n",
+        "print(model(sample_x))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "akrjDstEpDv9"
+      },
+      "source": [
+        "## Moving data to the device\n",
+        "\n",
+        "Usually, `tf.data` iterators (and other data fetching methods) yield tensor objects backed by the local host device memory. This data must be transferred to the accelerator device memory that backs DTensor's component tensors.\n",
+        "\n",
+        "`dtensor.copy_to_mesh` is unsuitable for this situation because it replicates input tensors to all devices due to DTensor's global perspective. So in this tutorial, you will use a helper function `repack_local_tensor`, to facilitate the transfer of data. This helper function uses `dtensor.pack` to send (and only send) the shard of the global batch that is intended for a replica to the device backing the replica.\n",
+        "\n",
+        "This simplified function assumes single-client. Determining the correct way to split the local tensor and the mapping between the pieces of the split and the local devices can be laboring in a multi-client application.\n",
+        "\n",
+        "Additional DTensor API to simplify `tf.data` integration is planned, supporting both single-client and multi-client applications. Please stay tuned."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "3t5WvQR4Hvo4"
+      },
+      "outputs": [],
+      "source": [
+        "def repack_local_tensor(x, layout):\n",
+        "  \"\"\"Repacks a local Tensor-like to a DTensor with layout.\n",
+        "\n",
+        "  This function assumes a single-client application.\n",
+        "  \"\"\"\n",
+        "  x = tf.convert_to_tensor(x)\n",
+        "  sharded_dims = []\n",
+        "\n",
+        "  # For every sharded dimension, use tf.split to split the along the dimension.\n",
+        "  # The result is a nested list of split-tensors in queue[0].\n",
+        "  queue = [x]\n",
+        "  for axis, dim in enumerate(layout.sharding_specs):\n",
+        "    if dim == dtensor.UNSHARDED:\n",
+        "      continue\n",
+        "    num_splits = layout.shape[axis]\n",
+        "    queue = tf.nest.map_structure(lambda x: tf.split(x, num_splits, axis=axis), queue)\n",
+        "    sharded_dims.append(dim)\n",
+        "\n",
+        "  # Now we can build the list of component tensors by looking up the location in\n",
+        "  # the nested list of split-tensors created in queue[0].\n",
+        "  components = []\n",
+        "  for locations in layout.mesh.local_device_locations():\n",
+        "    t = queue[0]\n",
+        "    for dim in sharded_dims:\n",
+        "      split_index = locations[dim]  # Only valid on single-client mesh.\n",
+        "      t = t[split_index]\n",
+        "    components.append(t)\n",
+        "\n",
+        "  return dtensor.pack(components, layout)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2KKCDcjG7zj2"
+      },
+      "source": [
+        "## Data parallel training\n",
+        "\n",
+        "In this section, you will train your MLP model with data parallel training. The following sections will demonstrate model parallel training and spatial parallel training.\n",
+        "\n",
+        "Data parallel training is a commonly used scheme for distributed machine learning:\n",
+        "\n",
+        " - Model variables are replicated on N devices each.\n",
+        " - A global batch is split into N per-replica batches.\n",
+        " - Each per-replica batch is trained on the replica device.\n",
+        " - The gradient is reduced before weight up data is collectively performed on all replicas.\n",
+        "\n",
+        "Data parallel training provides nearly linear speedup regarding the number of devices."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "UMsLUyTGq3oL"
+      },
+      "source": [
+        "### Creating a data parallel mesh\n",
+        "\n",
+        "A typical data parallelism training loop uses a DTensor `Mesh` that consists of a single `batch` dimension, where each device becomes a replica that receives a shard from the global batch.\n",
+        "\n",
+        "<img src=\"https://www.tensorflow.org/images/dtensor/dtensor_data_para.png\" alt=\"Data parallel mesh\" class=\"no-filter\">\n",
+        "\n",
+        "\n",
+        "The replicated model runs on the replica, therefore the model variables are fully replicated (unsharded)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "C0IyOlxmeu4I"
+      },
+      "outputs": [],
+      "source": [
+        "mesh = dtensor.create_mesh([(\"batch\", 8)], devices=DEVICES)\n",
+        "\n",
+        "model = MLP([dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh),\n",
+        "             dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh),])\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "OREKwBybo1gZ"
+      },
+      "source": [
+        "### Packing training data to DTensors\n",
+        "\n",
+        "The training data batch should be packed into DTensors sharded along the `'batch'`(first) axis, such that DTensor will evenly distribute the training data to the `'batch'` mesh dimension.\n",
+        "\n",
+        "**Note**: In DTensor, the `batch size` always refers to the global batch size. The batch size should be chosen such that it can be divided evenly by the size of the `batch` mesh dimension."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "8xMYkTpGocY8"
+      },
+      "outputs": [],
+      "source": [
+        "def repack_batch(x, y, mesh):\n",
+        "  x = repack_local_tensor(x, layout=dtensor.Layout(['batch', dtensor.UNSHARDED], mesh))\n",
+        "  y = repack_local_tensor(y, layout=dtensor.Layout(['batch'], mesh))\n",
+        "  return x, y\n",
+        "\n",
+        "sample_x, sample_y = train_data_vec.take(1).get_single_element()\n",
+        "sample_x, sample_y = repack_batch(sample_x, sample_y, mesh)\n",
+        "\n",
+        "print('x', sample_x[:, 0])\n",
+        "print('y', sample_y)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "uONSiqOIkFL1"
+      },
+      "source": [
+        "### Training step\n",
+        "\n",
+        "This example uses a Stochastic Gradient Descent optimizer with the Custom Training Loop (CTL). Consult the [Custom Training Loop guide](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch) and [Walk through](https://www.tensorflow.org/tutorials/customization/custom_training_walkthrough) for more information on those topics.\n",
+        "\n",
+        "The `train_step` is encapsulated as a `tf.function` to indicate this body is to be traced as a TensorFlow Graph. The body of `train_step` consists of a forward inference pass, a backward gradient pass, and the variable update.\n",
+        "\n",
+        "Note that the body of `train_step` does not contain any special DTensor annotations. Instead, `train_step` only contains high-level TensorFlow operations that process the input `x` and `y` from the global view of the input batch and the model. All of the DTensor annotations (`Mesh`, `Layout`) are factored out of the train step."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "BwUFzLGDtQT6"
+      },
+      "outputs": [],
+      "source": [
+        "# Refer to the CTL (custom training loop guide)\n",
+        "@tf.function\n",
+        "def train_step(model, x, y, learning_rate=tf.constant(1e-4)):\n",
+        "  with tf.GradientTape() as tape:\n",
+        "    logits = model(x)\n",
+        "    # tf.reduce_sum sums the batch sharded per-example loss to a replicated\n",
+        "    # global loss (scalar).\n",
+        "    loss = tf.reduce_sum(\n",
+        "        tf.nn.sparse_softmax_cross_entropy_with_logits(\n",
+        "            logits=logits, labels=y))\n",
+        "  parameters = model.trainable_variables\n",
+        "  gradients = tape.gradient(loss, parameters)\n",
+        "  for parameter, parameter_gradient in zip(parameters, gradients):\n",
+        "    parameter.assign_sub(learning_rate * parameter_gradient)\n",
+        "\n",
+        "  # Define some metrics\n",
+        "  accuracy = 1.0 - tf.reduce_sum(tf.cast(tf.argmax(logits, axis=-1, output_type=tf.int64) != y, tf.float32)) / x.shape[0]\n",
+        "  loss_per_sample = loss / len(x)\n",
+        "  return {'loss': loss_per_sample, 'accuracy': accuracy}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0OYTu4j0evWT"
+      },
+      "source": [
+        "### Checkpointing\n",
+        "\n",
+        "You can checkpoint a DTensor model using `tf.train.Checkpoint` out of the box. Saving and restoring sharded DVariables will perform an efficient sharded save and restore. Currently, when using `tf.train.Checkpoint.save` and `tf.train.Checkpoint.restore`, all DVariables must be on the same host mesh, and DVariables and regular variables cannot be saved together. You can learn more about checkpointing in [this guide](../../guide/checkpoint.ipynb).\n",
+        "\n",
+        "When a DTensor checkpoint is restored, `Layout`s of variables can be different from when the checkpoint is saved. That is, saving DTensor models is layout- and mesh-agnostic, and only affects the efficiency of sharded saving. You can save a DTensor model with one mesh and layout and restore it on a different mesh and layout. This tutorial makes use of this feature to continue the training in the Model Parallel training and Spatial Parallel training sections.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "rsInFFJg7x9t"
+      },
+      "outputs": [],
+      "source": [
+        "CHECKPOINT_DIR = tempfile.mkdtemp()\n",
+        "\n",
+        "def start_checkpoint_manager(model):\n",
+        "  ckpt = tf.train.Checkpoint(root=model)\n",
+        "  manager = tf.train.CheckpointManager(ckpt, CHECKPOINT_DIR, max_to_keep=3)\n",
+        "\n",
+        "  if manager.latest_checkpoint:\n",
+        "    print(\"Restoring a checkpoint\")\n",
+        "    ckpt.restore(manager.latest_checkpoint).assert_consumed()\n",
+        "  else:\n",
+        "    print(\"New training\")\n",
+        "  return manager\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9r77ky5Jgp1j"
+      },
+      "source": [
+        "### Training loop\n",
+        "\n",
+        "For the data parallel training scheme, train for epochs and report the progress. 3 epochs is insufficient for training the model -- an accuracy of 50% is as good as randomly guessing.\n",
+        "\n",
+        "Enable checkpointing so that you can pick up the training later. In the following section, you will load the checkpoint and train with a different parallel scheme."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "UaLn-vGZgqbS"
+      },
+      "outputs": [],
+      "source": [
+        "num_epochs = 2\n",
+        "manager = start_checkpoint_manager(model)\n",
+        "\n",
+        "for epoch in range(num_epochs):\n",
+        "  step = 0\n",
+        "  pbar = tf.keras.utils.Progbar(target=int(train_data_vec.cardinality()), stateful_metrics=[])\n",
+        "  metrics = {'epoch': epoch}\n",
+        "  for x,y in train_data_vec:\n",
+        "\n",
+        "    x, y = repack_batch(x, y, mesh)\n",
+        "\n",
+        "    metrics.update(train_step(model, x, y, 1e-2))\n",
+        "\n",
+        "    pbar.update(step, values=metrics.items(), finalize=False)\n",
+        "    step += 1\n",
+        "  manager.save()\n",
+        "  pbar.update(step, values=metrics.items(), finalize=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YRFJEhum7EGD"
+      },
+      "source": [
+        "## Model Parallel Training\n",
+        "\n",
+        "If you switch to a 2 dimensional `Mesh`, and shard the model variables along the second mesh dimension, then the training becomes Model Parallel.\n",
+        "\n",
+        "In Model Parallel training, each model replica spans multiple devices (2 in this case):\n",
+        "\n",
+        "- There are 4 model replicas, and the training data batch is distributed to the 4 replicas.\n",
+        "- The 2 devices within a single model replica receive replicated training data.\n",
+        "\n",
+        "\n",
+        "<img src=\"https://www.tensorflow.org/images/dtensor/dtensor_model_para.png\" alt=\"Model parallel mesh\" class=\"no-filter\">\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "5gZE9IT5Dzwl"
+      },
+      "outputs": [],
+      "source": [
+        "mesh = dtensor.create_mesh([(\"batch\", 4), (\"model\", 2)], devices=DEVICES)\n",
+        "model = MLP([dtensor.Layout([dtensor.UNSHARDED, \"model\"], mesh), \n",
+        "             dtensor.Layout([\"model\", dtensor.UNSHARDED], mesh)])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ihof3DkMFKnf"
+      },
+      "source": [
+        "As the training data is still sharded along the batch dimension, you can reuse the same `repack_batch` function as the Data Parallel training case. DTensor will automatically replicate the per-replica batch to all devices inside the replica along the `\"model\"` mesh dimension."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dZf56ynbE_p1"
+      },
+      "outputs": [],
+      "source": [
+        "def repack_batch(x, y, mesh):\n",
+        "  x = repack_local_tensor(x, layout=dtensor.Layout(['batch', dtensor.UNSHARDED], mesh))\n",
+        "  y = repack_local_tensor(y, layout=dtensor.Layout(['batch'], mesh))\n",
+        "  return x, y"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "UW3OXdhNFfpv"
+      },
+      "source": [
+        "Next run the training loop. The training loop reuses the same checkpoint manager as the Data Parallel training example, and the code looks identical.\n",
+        "\n",
+        "You can continue training the data parallel trained model under model parallel training."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "LLC0wgii7EgA"
+      },
+      "outputs": [],
+      "source": [
+        "num_epochs = 2\n",
+        "manager = start_checkpoint_manager(model)\n",
+        "\n",
+        "for epoch in range(num_epochs):\n",
+        "  step = 0\n",
+        "  pbar = tf.keras.utils.Progbar(target=int(train_data_vec.cardinality()))\n",
+        "  metrics = {'epoch': epoch}\n",
+        "  for x,y in train_data_vec:\n",
+        "    x, y = repack_batch(x, y, mesh)\n",
+        "    metrics.update(train_step(model, x, y, 1e-2))\n",
+        "    pbar.update(step, values=metrics.items(), finalize=False)\n",
+        "    step += 1\n",
+        "  manager.save()\n",
+        "  pbar.update(step, values=metrics.items(), finalize=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "BZH-aMrVzi2L"
+      },
+      "source": [
+        "## Spatial Parallel Training"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "u-bK6IZ9GCS9"
+      },
+      "source": [
+        "When training data of very high dimensionality (e.g. a very large image or a video), it may be desirable to shard along the feature dimension. This is called [Spatial Partitioning](https://cloud.google.com/blog/products/ai-machine-learning/train-ml-models-on-large-images-and-3d-volumes-with-spatial-partitioning-on-cloud-tpus), which was first introduced into TensorFlow for training models with large 3-d input samples.\n",
+        "\n",
+        "<img src=\"https://www.tensorflow.org/images/dtensor/dtensor_spatial_para.png\" alt=\"Spatial parallel mesh\" class=\"no-filter\">\n",
+        "\n",
+        "DTensor also supports this case. The only change you need to do is to create a Mesh that includes a `feature` dimension, and apply the corresponding `Layout`.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "jpc9mqURGpmK"
+      },
+      "outputs": [],
+      "source": [
+        "mesh = dtensor.create_mesh([(\"batch\", 2), (\"feature\", 2), (\"model\", 2)], devices=DEVICES)\n",
+        "model = MLP([dtensor.Layout([\"feature\", \"model\"], mesh), \n",
+        "             dtensor.Layout([\"model\", dtensor.UNSHARDED], mesh)])\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "i07Wrv-jHBc1"
+      },
+      "source": [
+        "Shard the input data along the `feature` dimension when packing the input tensors to DTensors. You do this with a slightly different repack function, `repack_batch_for_spt`, where `spt` stands for Spatial Parallel Training."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "DWR8qF6BGtFL"
+      },
+      "outputs": [],
+      "source": [
+        "def repack_batch_for_spt(x, y, mesh):\n",
+        "    # Shard data on feature dimension, too\n",
+        "    x = repack_local_tensor(x, layout=dtensor.Layout([\"batch\", 'feature'], mesh))\n",
+        "    y = repack_local_tensor(y, layout=dtensor.Layout([\"batch\"], mesh))\n",
+        "    return x, y"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ygl9dqMUHTVN"
+      },
+      "source": [
+        "The Spatial parallel training can also continue from a checkpoint created with other parallell training schemes."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "p3NnpHSKo-hx"
+      },
+      "outputs": [],
+      "source": [
+        "num_epochs = 2\n",
+        "\n",
+        "manager = start_checkpoint_manager(model)\n",
+        "for epoch in range(num_epochs):\n",
+        "  step = 0\n",
+        "  metrics = {'epoch': epoch}\n",
+        "  pbar = tf.keras.utils.Progbar(target=int(train_data_vec.cardinality()))\n",
+        "\n",
+        "  for x, y in train_data_vec:\n",
+        "    x, y = repack_batch_for_spt(x, y, mesh)\n",
+        "    metrics.update(train_step(model, x, y, 1e-2))\n",
+        "\n",
+        "    pbar.update(step, values=metrics.items(), finalize=False)\n",
+        "    step += 1\n",
+        "  manager.save()\n",
+        "  pbar.update(step, values=metrics.items(), finalize=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vp4L59CpJjYr"
+      },
+      "source": [
+        "## SavedModel and DTensor\n",
+        "\n",
+        "The integration of DTensor and SavedModel is still under development. \n",
+        "\n",
+        "As of TensorFlow `2.11`, `tf.saved_model` can save sharded and replicated DTensor models, and saving will do an efficient sharded save on different devices of the mesh. However, after a model is saved, all DTensor annotations are lost and the saved signatures can only be used with regular Tensors, not DTensors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "49HfIq_SJZoj"
+      },
+      "outputs": [],
+      "source": [
+        "mesh = dtensor.create_mesh([(\"world\", 1)], devices=DEVICES[:1])\n",
+        "mlp = MLP([dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh), \n",
+        "           dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh)])\n",
+        "\n",
+        "manager = start_checkpoint_manager(mlp)\n",
+        "\n",
+        "model_for_saving = tf.keras.Sequential([\n",
+        "  text_vectorization,\n",
+        "  mlp\n",
+        "])\n",
+        "\n",
+        "@tf.function(input_signature=[tf.TensorSpec([None], tf.string)])\n",
+        "def run(inputs):\n",
+        "  return {'result': model_for_saving(inputs)}\n",
+        "\n",
+        "tf.saved_model.save(\n",
+        "    model_for_saving, \"/tmp/saved_model\",\n",
+        "    signatures=run)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "h6Csim_VMGxQ"
+      },
+      "source": [
+        "As of TensorFlow 2.9.0, you can only call a loaded signature with a regular Tensor, or a fully replicated DTensor (which will be converted to a regular Tensor)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "HG_ASSzR4IWW"
+      },
+      "outputs": [],
+      "source": [
+        "sample_batch = train_data.take(1).get_single_element()\n",
+        "sample_batch"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "qW8yKPrhKQ5b"
+      },
+      "outputs": [],
+      "source": [
+        "loaded = tf.saved_model.load(\"/tmp/saved_model\")\n",
+        "\n",
+        "run_sig = loaded.signatures[\"serving_default\"]\n",
+        "result = run_sig(sample_batch['text'])['result']"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "GahGbv0ZmkJb"
+      },
+      "outputs": [],
+      "source": [
+        "np.mean(tf.argmax(result, axis=-1) == sample_batch['label'])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ks-Vs9qsH6jO"
+      },
+      "source": [
+        "## What's next?\n",
+        "\n",
+        "This tutorial demonstrated building and training an MLP sentiment analysis model with DTensor.\n",
+        "\n",
+        "Through `Mesh` and `Layout` primitives, DTensor can transform a TensorFlow `tf.function` to a distributed program suitable for a variety of training schemes.\n",
+        "\n",
+        "In a real-world machine learning application, evaluation and cross-validation should be applied to avoid producing an over-fitted model. The techniques introduced in this tutorial can also be applied to introduce parallelism to evaluation.\n",
+        "\n",
+        "Composing a model with `tf.Module` from scratch is a lot of work, and reusing existing building blocks such as layers and helper functions can drastically speed up model development.\n",
+        "As of TensorFlow 2.9, all Keras Layers under `tf.keras.layers` accepts DTensor layouts as their arguments, and can be used to build DTensor models. You can even directly reuse a Keras model with DTensor without modifying the model implementation. Refer to the [DTensor Keras Integration Tutorial](https://www.tensorflow.org/tutorials/distribute/dtensor_keras_tutorial) for information on using DTensor Keras. "
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "dtensor_ml_tutorial.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/tutorials/distribute/input.ipynb b/site/en/tutorials/distribute/input.ipynb
index e1cdca6788e..f779c4f19a6 100644
--- a/site/en/tutorials/distribute/input.ipynb
+++ b/site/en/tutorials/distribute/input.ipynb
@@ -73,7 +73,7 @@
         "This guide will show you the different ways in which you can create distributed dataset and iterators using `tf.distribute` APIs. Additionally, the following topics will be covered:\n",
         "- Usage, sharding and batching options when using  `tf.distribute.Strategy.experimental_distribute_dataset` and `tf.distribute.Strategy.distribute_datasets_from_function`.\n",
         "- Different ways in which you can iterate over the distributed dataset.\n",
-        "- Differences between `tf.distribute.Strategy.experimental_distribute_dataset`/`tf.distribute.Strategy.distribute_datasets_from_function` APIs and `tf.data` APIs as well any limitations that users may come across in their usage.\n",
+        "- Differences between `tf.distribute.Strategy.experimental_distribute_dataset`/`tf.distribute.Strategy.distribute_datasets_from_function` APIs and `tf.data` APIs as well as any limitations that users may come across in their usage.\n",
         "\n",
         "This guide does not cover usage of distributed input with Keras APIs."
       ]
@@ -84,7 +84,7 @@
         "id": "MM6W__qraV55"
       },
       "source": [
-        "## Distributed Datasets"
+        "## Distributed datasets"
       ]
     },
     {
@@ -93,8 +93,8 @@
         "id": "lNy9GxjSlMKQ"
       },
       "source": [
-        "To use `tf.distribute` APIs to scale, it is recommended that users use `tf.data.Dataset` to represent their input. `tf.distribute` has been made to work efficiently with `tf.data.Dataset` (for example, automatic prefetch of data onto each accelerator device) with performance optimizations being regularly incorporated into the implementation. If you have a use case for using something other than `tf.data.Dataset`, please refer a later [section](\"tensorinputs\") in this guide.\n",
-        "In a non distributed training loop, users first create a `tf.data.Dataset` instance and then iterate over the elements. For example:\n"
+        "To use `tf.distribute` APIs to scale, use `tf.data.Dataset` to represent their input. `tf.distribute` works efficiently with `tf.data.Dataset`—for example, via automatic prefetching onto each accelerator device and regular performance updates. If you have a use case for using something other than `tf.data.Dataset`, please refer to the [Tensor inputs section](#tensorinputs) in this guide.\n",
+        "In a non-distributed training loop, first create a `tf.data.Dataset` instance and then iterate over the elements. For example:\n"
       ]
     },
     {
@@ -114,6 +114,34 @@
         "print(tf.__version__)"
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "6cnilUtmKwpa"
+      },
+      "outputs": [],
+      "source": [
+        "# Simulate multiple CPUs with virtual devices\n",
+        "N_VIRTUAL_DEVICES = 2\n",
+        "physical_devices = tf.config.list_physical_devices(\"CPU\")\n",
+        "tf.config.set_logical_device_configuration(\n",
+        "    physical_devices[0], [tf.config.LogicalDeviceConfiguration() for _ in range(N_VIRTUAL_DEVICES)])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "zd4l1ySeLRk1"
+      },
+      "outputs": [],
+      "source": [
+        "print(\"Available devices:\")\n",
+        "for i, device in enumerate(tf.config.list_logical_devices()):\n",
+        "  print(\"%d) %s\" % (i, device))"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -214,14 +242,14 @@
         "      * Replica 1:[0, 1]\n",
         "      * Replica 2:[2, 3]\n",
         "    * Batch 2:\n",
-        "      * Replica 2: [4]\n",
+        "      * Replica 1: [4]\n",
         "      * Replica 2: [5]\n",
         "\n",
         "\n",
         "\n",
         "* `tf.data.Dataset.range(4).batch(4)`\n",
         "  * Without distribution:\n",
-        "    * Batch 1: [[0], [1], [2], [3]]\n",
+        "    * Batch 1: [0, 1, 2, 3]\n",
         "  * With distribution over 5 replicas:\n",
         "    * Batch 1:\n",
         "      * Replica 1: [0]\n",
@@ -246,7 +274,7 @@
         "\n",
         "Note: The above examples only illustrate how a global batch is split on different replicas. It is not advisable to depend on the actual values that might end up on each replica as it can change depending on the implementation.\n",
         "\n",
-        "Rebatching the dataset has a space complexity that increases linearly with the number of replicas. This means that for the multi worker training use case the input pipeline can run into OOM errors. "
+        "Rebatching the dataset has a space complexity that increases linearly with the number of replicas. This means that for the multi-worker training use case the input pipeline can run into OOM errors. "
       ]
     },
     {
@@ -257,7 +285,7 @@
       "source": [
         "#####  Sharding\n",
         "\n",
-        "`tf.distribute` also autoshards the input dataset in multi worker training with `MultiWorkerMirroredStrategy` and `TPUStrategy`. Each dataset is created on the CPU device of the worker. Autosharding a dataset over a set of workers means that each worker is assigned a subset of the entire dataset (if the right `tf.data.experimental.AutoShardPolicy` is set). This is to ensure that at each step, a global batch size of non overlapping dataset elements will be processed by each worker. Autosharding has a couple of different options that can be specified using `tf.data.experimental.DistributeOptions`. Note that there is no autosharding in multi worker training with `ParameterServerStrategy`, and more information on dataset creation with this strategy can be found in the [Parameter Server Strategy tutorial](parameter_server_training.ipynb). "
+        "`tf.distribute` also autoshards the input dataset in multi-worker training with `MultiWorkerMirroredStrategy` and `TPUStrategy`. Each dataset is created on the CPU device of the worker. Autosharding a dataset over a set of workers means that each worker is assigned a subset of the entire dataset (if the right `tf.data.experimental.AutoShardPolicy` is set). This is to ensure that at each step, a global batch size of non-overlapping dataset elements will be processed by each worker. Autosharding has a couple of different options that can be specified using `tf.data.experimental.DistributeOptions`. Note that there is no autosharding in multi-worker training with `ParameterServerStrategy`, and more information on dataset creation with this strategy can be found in the [ParameterServerStrategy tutorial](parameter_server_training.ipynb). "
       ]
     },
     {
@@ -268,7 +296,7 @@
       },
       "outputs": [],
       "source": [
-        "dataset = tf.data.Dataset.from_tensors(([1.],[1.])).repeat(64).batch(16)\n",
+        "dataset = tf.data.Dataset.from_tensors(([1.], [1.])).repeat(64).batch(16)\n",
         "options = tf.data.Options()\n",
         "options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.DATA\n",
         "dataset = dataset.with_options(options)"
@@ -358,7 +386,7 @@
       "source": [
         "#### Usage\n",
         "\n",
-        "This API takes an input function and returns a `tf.distribute.DistributedDataset` instance. The input function that users pass in has a `tf.distribute.InputContext` argument and should return a `tf.data.Dataset` instance. With this API, `tf.distribute` does not make any further changes to the user’s `tf.data.Dataset` instance returned from the input function. It is the responsibility of the user to batch and shard the dataset. `tf.distribute` calls the input function on the CPU device of each of the workers. Apart from allowing users to specify their own batching and sharding logic, this API also demonstrates better scalability and performance compared to `tf.distribute.Strategy.experimental_distribute_dataset` when used for multi worker training."
+        "This API takes an input function and returns a `tf.distribute.DistributedDataset` instance. The input function that users pass in has a `tf.distribute.InputContext` argument and should return a `tf.data.Dataset` instance. With this API, `tf.distribute` does not make any further changes to the user’s `tf.data.Dataset` instance returned from the input function. It is the responsibility of the user to batch and shard the dataset. `tf.distribute` calls the input function on the CPU device of each of the workers. Apart from allowing users to specify their own batching and sharding logic, this API also demonstrates better scalability and performance compared to `tf.distribute.Strategy.experimental_distribute_dataset` when used for multi-worker training."
       ]
     },
     {
@@ -373,11 +401,11 @@
         "\n",
         "def dataset_fn(input_context):\n",
         "  batch_size = input_context.get_per_replica_batch_size(global_batch_size)\n",
-        "  dataset = tf.data.Dataset.from_tensors(([1.],[1.])).repeat(64).batch(16)\n",
+        "  dataset = tf.data.Dataset.from_tensors(([1.], [1.])).repeat(64).batch(16)\n",
         "  dataset = dataset.shard(\n",
-        "    input_context.num_input_pipelines, input_context.input_pipeline_id)\n",
+        "      input_context.num_input_pipelines, input_context.input_pipeline_id)\n",
         "  dataset = dataset.batch(batch_size)\n",
-        "  dataset = dataset.prefetch(2) # This prefetches 2 batches per device.\n",
+        "  dataset = dataset.prefetch(2)  # This prefetches 2 batches per device.\n",
         "  return dataset\n",
         "\n",
         "dist_dataset = mirrored_strategy.distribute_datasets_from_function(dataset_fn)"
@@ -411,7 +439,7 @@
       "source": [
         "##### Sharding\n",
         "\n",
-        "The `tf.distribute.InputContext` object that is implicitly passed as an argument to the user’s input function is created by `tf.distribute` under the hood. It has information about the number of workers, current worker id etc. This input function can handle sharding as per policies set by the user using these properties that are part of the `tf.distribute.InputContext` object.\n"
+        "The `tf.distribute.InputContext` object that is implicitly passed as an argument to the user’s input function is created by `tf.distribute` under the hood. It has information about the number of workers, current worker ID etc. This input function can handle sharding as per policies set by the user using these properties that are part of the `tf.distribute.InputContext` object.\n"
       ]
     },
     {
@@ -422,7 +450,7 @@
       "source": [
         "##### Prefetching\n",
         "\n",
-        "`tf.distribute` does not add a prefetch transformation at the end of the `tf.data.Dataset` returned by the user provided input function."
+        "`tf.distribute` does not add a prefetch transformation at the end of the `tf.data.Dataset` returned by the user-provided input function, so you explicitly call `Dataset.prefetch` in the example above."
       ]
     },
     {
@@ -442,7 +470,7 @@
         "id": "dL3XbI1gzEjO"
       },
       "source": [
-        "## Distributed Iterators"
+        "## Distributed iterators"
       ]
     },
     {
@@ -452,7 +480,7 @@
       },
       "source": [
         "Similar to non-distributed `tf.data.Dataset` instances, you will need to create an iterator on the `tf.distribute.DistributedDataset` instances to iterate over it and access the elements in the `tf.distribute.DistributedDataset`.\n",
-        "The following are the ways in which you can create an `tf.distribute.DistributedIterator` and use it to train your model:\n"
+        "The following are the ways in which you can create a `tf.distribute.DistributedIterator` and use it to train your model:\n"
       ]
     },
     {
@@ -486,7 +514,7 @@
         "global_batch_size = 16\n",
         "mirrored_strategy = tf.distribute.MirroredStrategy()\n",
         "\n",
-        "dataset = tf.data.Dataset.from_tensors(([1.],[1.])).repeat(100).batch(global_batch_size)\n",
+        "dataset = tf.data.Dataset.from_tensors(([1.], [1.])).repeat(100).batch(global_batch_size)\n",
         "dist_dataset = mirrored_strategy.experimental_distribute_dataset(dataset)\n",
         "\n",
         "@tf.function\n",
@@ -536,14 +564,16 @@
         "id": "UpJXIlxjqPYg"
       },
       "source": [
-        "With `next()` or `tf.distribute.DistributedIterator.get_next()`, if the `tf.distribute.DistributedIterator` has reached its end, an OutOfRange error will be thrown. The client can catch the error on python side and continue doing other work such as checkpointing and evaluation. However, this will not work if you are using a host training loop (i.e., run multiple steps per `tf.function`), which looks like:\n",
+        "With `next` or `tf.distribute.DistributedIterator.get_next`, if the `tf.distribute.DistributedIterator` has reached its end, an OutOfRange error will be thrown. The client can catch the error on python side and continue doing other work such as checkpointing and evaluation. However, this will not work if you are using a host training loop (i.e., run multiple steps per `tf.function`), which looks like:\n",
+        "\n",
         "```\n",
         "@tf.function\n",
         "def train_fn(iterator):\n",
         "  for _ in tf.range(steps_per_loop):\n",
         "    strategy.run(step_fn, args=(next(iterator),))\n",
         "```\n",
-        " `train_fn` contains multiple steps by wrapping the step body inside a `tf.range`. In this case, different iterations in the loop with no dependency could start in parallel, so an OutOfRange error can be triggered in later iterations before the computation of previous iterations finishes. Once an OutOfRange error is thrown, all the ops in the function will be terminated right away. If this is some case that you would like to avoid, an alternative that does not throw an OutOfRange error is `tf.distribute.DistributedIterator.get_next_as_optional()`. `get_next_as_optional` returns a `tf.experimental.Optional` which contains the next element or no value if the `tf.distribute.DistributedIterator` has reached to an end."
+        "\n",
+        "This example `train_fn` contains multiple steps by wrapping the step body inside a `tf.range`. In this case, different iterations in the loop with no dependency could start in parallel, so an OutOfRange error can be triggered in later iterations before the computation of previous iterations finishes. Once an OutOfRange error is thrown, all the ops in the function will be terminated right away. If this is some case that you would like to avoid, an alternative that does not throw an OutOfRange error is `tf.distribute.DistributedIterator.get_next_as_optional`. `get_next_as_optional` returns a `tf.experimental.Optional` which contains the next element or no value if the `tf.distribute.DistributedIterator` has reached an end."
       ]
     },
     {
@@ -554,10 +584,10 @@
       },
       "outputs": [],
       "source": [
-        "# You can break the loop with get_next_as_optional by checking if the Optional contains value\n",
+        "# You can break the loop with `get_next_as_optional` by checking if the `Optional` contains a value\n",
         "global_batch_size = 4\n",
         "steps_per_loop = 5\n",
-        "strategy = tf.distribute.MirroredStrategy(devices=[\"GPU:0\", \"CPU:0\"])\n",
+        "strategy = tf.distribute.MirroredStrategy()\n",
         "\n",
         "dataset = tf.data.Dataset.range(9).batch(global_batch_size)\n",
         "distributed_iterator = iter(strategy.experimental_distribute_dataset(dataset))\n",
@@ -568,7 +598,7 @@
         "    optional_data = distributed_iterator.get_next_as_optional()\n",
         "    if not optional_data.has_value():\n",
         "      break\n",
-        "    per_replica_results = strategy.run(lambda x:x, args=(optional_data.get_value(),))\n",
+        "    per_replica_results = strategy.run(lambda x: x, args=(optional_data.get_value(),))\n",
         "    tf.print(strategy.experimental_local_results(per_replica_results))\n",
         "train_fn(distributed_iterator)"
       ]
@@ -579,7 +609,7 @@
         "id": "LaclbKnqzLjf"
       },
       "source": [
-        "## Using `element_spec` property"
+        "## Using the `element_spec` property"
       ]
     },
     {
@@ -588,7 +618,7 @@
         "id": "Z1YvXqOpwy08"
       },
       "source": [
-        "If you pass the elements of a distributed dataset to a `tf.function` and want a `tf.TypeSpec` guarantee, you can specify the `input_signature` argument of the `tf.function`. The output of a distributed dataset is `tf.distribute.DistributedValues` which can represent the input to a single device or multiple devices. To get the `tf.TypeSpec` corresponding to this distributed value you can use the `element_spec` property of the distributed dataset or distributed iterator object."
+        "If you pass the elements of a distributed dataset to a `tf.function` and want a `tf.TypeSpec` guarantee, you can specify the `input_signature` argument of the `tf.function`. The output of a distributed dataset is `tf.distribute.DistributedValues` which can represent the input to a single device or multiple devices. To get the `tf.TypeSpec` corresponding to this distributed value, you can use `tf.distribute.DistributedDataset.element_spec` or `tf.distribute.DistributedIterator.element_spec`."
       ]
     },
     {
@@ -604,7 +634,7 @@
         "steps_per_epoch = 5\n",
         "mirrored_strategy = tf.distribute.MirroredStrategy()\n",
         "\n",
-        "dataset = tf.data.Dataset.from_tensors(([1.],[1.])).repeat(100).batch(global_batch_size)\n",
+        "dataset = tf.data.Dataset.from_tensors(([1.], [1.])).repeat(100).batch(global_batch_size)\n",
         "dist_dataset = mirrored_strategy.experimental_distribute_dataset(dataset)\n",
         "\n",
         "@tf.function(input_signature=[dist_dataset.element_spec])\n",
@@ -627,7 +657,246 @@
         "id": "-OAa6svUzuWm"
       },
       "source": [
-        "## Partial Batches"
+        "## Data preprocessing"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "pSMrs3kJQexW"
+      },
+      "source": [
+        "So far, you have learned how to distribute a `tf.data.Dataset`. Yet before the data is ready for the model, it needs to be preprocessed, for example by cleansing, transforming, and augmenting it. Two sets of those handy tools are:\n",
+        "\n",
+        "*   [Keras preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers): a set of Keras layers that allow developers to build Keras-native input processing pipelines. Some Keras preprocessing layers contain non-trainable states, which can be set on initialization or `adapt`ed (refer to the `adapt` section of the [Keras preprocessing layers guide](https://www.tensorflow.org/guide/keras/preprocessing_layers)). When distributing stateful preprocessing layers, the states should be replicated to all workers. To use these layers, you can either make them part of the model or apply them to the datasets.\n",
+        "\n",
+        "*   [TensorFlow Transform (tf.Transform)](https://www.tensorflow.org/tfx/transform/get_started): a library for TensorFlow that allows you to define both instance-level and full-pass data transformation through data preprocessing pipelines. Tensorflow Transform has two phases. The first is the Analyze phase, where the raw training data is analyzed in a full-pass process to compute the statistics needed for the transformations, and the transformation logic is generated as instance-level operations. The second is the Transform phase, where the raw training data is transformed in an instance-level process.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Pd4aUCFdVlZ1"
+      },
+      "source": [
+        "### Keras preprocessing layers vs. Tensorflow Transform \n",
+        "\n",
+        "Both Tensorflow Transform and Keras preprocessing layers provide a way to split out preprocessing during training and bundle preprocessing with a model during inference, reducing train/serve skew.\n",
+        "\n",
+        "Tensorflow Transform, deeply integrated with [TFX](https://www.tensorflow.org/tfx), provides a scalable map-reduce solution to analyzing and transforming datasets of any size in a job separate from the training pipeline. If you need to run an analysis on a dataset that cannot fit on a single machine, Tensorflow Transform should be your first choice.\n",
+        "\n",
+        "Keras preprocessing layers are more geared towards preprocessing applied during training, after reading data from disk. They fit seamlessly with model development in the Keras library. They support analysis of a smaller dataset via [`adapt`](https://www.tensorflow.org/guide/keras/preprocessing_layers#the_adapt_method) and supports use cases like image data augmentation, where each pass over the input dataset will yield different examples for training.\n",
+        "\n",
+        "The two libraries can also be mixed, where Tensorflow Transform is used for analysis and static transformations of input data, and Keras preprocessing layers are used for train-time transformations (e.g., one-hot encoding or data augmentation).\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MReKhhZpHUpj"
+      },
+      "source": [
+        "### Best Practice with tf.distribute\n",
+        "\n",
+        "Working with both tools involves initializing the transformation logic to apply to data, which might create Tensorflow resources. These resources or states should be replicated to all workers to save inter-workers or worker-coordinator communication. To do so, you are recommended to create Keras preprocessing layers, `tft.TFTransformOutput.transform_features_layer`, or `tft.TransformFeaturesLayer` under `tf.distribute.Strategy.scope`, just like you would for any other Keras layers.\n",
+        "\n",
+        "The following examples demonstrate usage of the `tf.distribute.Strategy` API with the high-level Keras `Model.fit` API and with a custom training loop separately."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rwEGMWuoX7kJ"
+      },
+      "source": [
+        "#### Extra notes for Keras preprocessing layers users:\n",
+        "\n",
+        "**Preprocessing layers and large vocabularies**\n",
+        "\n",
+        "When dealing with large vocabularies (over one gigabyte) in a multi-worker setting (for example, `tf.distribute.MultiWorkerMirroredStrategy`, `tf.distribute.experimental.ParameterServerStrategy`, `tf.distribute.TPUStrategy`), it is recommended to save the vocabulary to a static file accessible from all workers (for example, with Cloud Storage). This will reduce the time spent replicating the vocabulary to all workers during training.\n",
+        "\n",
+        "**Preprocessing in the `tf.data` pipeline versus in the model**\n",
+        "\n",
+        "While Keras preprocessing layers can be applied either as part of the model or directly to a `tf.data.Dataset`, each of the options come with their edge:\n",
+        "\n",
+        "* Applying the preprocessing layers within the model makes your model portable, and it helps reduce the training/serving skew. (For more details, refer to the _Benefits of doing preprocessing inside the model at inference time_ section in the [Working with preprocessing layers guide](https://www.tensorflow.org/guide/keras/preprocessing_layers#benefits_of_doing_preprocessing_inside_the_model_at_inference_time))\n",
+        "* Applying within the `tf.data` pipeline allows prefetching or offloading to the CPU, which generally gives better performance when using accelerators.\n",
+        "\n",
+        "When running on one or more TPUs, users should almost always place Keras preprocessing layers in the `tf.data` pipeline, as not all layers support TPUs, and string ops do not execute on TPUs. (The two exceptions are `tf.keras.layers.Normalization` and `tf.keras.layers.Rescaling`, which run fine on TPUs and are commonly used as the first layer in an image model.)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hNCYZ9L-BD2R"
+      },
+      "source": [
+        "### Preprocessing with `Model.fit`"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "NhRB2Xe8B6bX"
+      },
+      "source": [
+        "When using Keras `Model.fit`, you do not need to distribute data with `tf.distribute.Strategy.experimental_distribute_dataset` nor `tf.distribute.Strategy.distribute_datasets_from_function` themselves. Check out the [Working with preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers) guide and the [Distributed training with Keras](https://www.tensorflow.org/tutorials/distribute/keras) guide for details. A shortened example may look as below:\n",
+        "\n",
+        "```\n",
+        "strategy = tf.distribute.MirroredStrategy()\n",
+        "with strategy.scope():\n",
+        "  # Create the layer(s) under scope.\n",
+        "  integer_preprocessing_layer = tf.keras.layers.IntegerLookup(vocabulary=FILE_PATH)\n",
+        "  model = ...\n",
+        "  model.compile(...)\n",
+        "dataset = dataset.map(lambda x, y: (integer_preprocessing_layer(x), y))\n",
+        "model.fit(dataset)\n",
+        "```\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3zL2vzJ-G0yg"
+      },
+      "source": [
+        "Users of `tf.distribute.experimental.ParameterServerStrategy` with the `Model.fit` API need to use a `tf.keras.utils.experimental.DatasetCreator` as the input. (See the [Parameter Server Training](https://www.tensorflow.org/tutorials/distribute/parameter_server_training#parameter_server_training_with_modelfit_api) guide for more)\n",
+        "\n",
+        "```\n",
+        "strategy = tf.distribute.experimental.ParameterServerStrategy(\n",
+        "    cluster_resolver,\n",
+        "    variable_partitioner=variable_partitioner)\n",
+        "\n",
+        "with strategy.scope():\n",
+        "  preprocessing_layer = tf.keras.layers.StringLookup(vocabulary=FILE_PATH)\n",
+        "  model = ...\n",
+        "  model.compile(...)\n",
+        "\n",
+        "def dataset_fn(input_context):\n",
+        "  ...\n",
+        "  dataset = dataset.map(preprocessing_layer)\n",
+        "  ...\n",
+        "  return dataset\n",
+        "\n",
+        "dataset_creator = tf.keras.utils.experimental.DatasetCreator(dataset_fn)\n",
+        "model.fit(dataset_creator, epochs=5, steps_per_epoch=20, callbacks=callbacks)\n",
+        "\n",
+        "```"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "imZLQUOYBJyW"
+      },
+      "source": [
+        "### Preprocessing with a custom training loop"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "r2PX1QH_OwU3"
+      },
+      "source": [
+        "When writing a [custom training loop](https://www.tensorflow.org/tutorials/distribute/custom_training), you will distribute your data with either the `tf.distribute.Strategy.experimental_distribute_dataset` API or the `tf.distribute.Strategy.distribute_datasets_from_function` API. If you distribute your dataset through `tf.distribute.Strategy.experimental_distribute_dataset`, applying these preprocessing APIs in your data pipeline will lead the resources automatically co-located with the data pipeline to avoid remote resource access. Thus the examples here will all use `tf.distribute.Strategy.distribute_datasets_from_function`, in which case it is crucial to place initialization of these APIs under `strategy.scope()` for efficiency:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "wJS1UmcWQeab"
+      },
+      "outputs": [],
+      "source": [
+        "strategy = tf.distribute.MirroredStrategy()\n",
+        "vocab = [\"a\", \"b\", \"c\", \"d\", \"f\"]\n",
+        "\n",
+        "with strategy.scope():\n",
+        "  # Create the layer(s) under scope.\n",
+        "  layer = tf.keras.layers.StringLookup(vocabulary=vocab)\n",
+        "\n",
+        "def dataset_fn(input_context):\n",
+        "  # a tf.data.Dataset\n",
+        "  dataset = tf.data.Dataset.from_tensor_slices([\"a\", \"c\", \"e\"]).repeat()\n",
+        "\n",
+        "  # Custom your batching, sharding, prefetching, etc.\n",
+        "  global_batch_size = 4\n",
+        "  batch_size = input_context.get_per_replica_batch_size(global_batch_size)\n",
+        "  dataset = dataset.batch(batch_size)\n",
+        "  dataset = dataset.shard(\n",
+        "      input_context.num_input_pipelines,\n",
+        "      input_context.input_pipeline_id)\n",
+        "\n",
+        "  # Apply the preprocessing layer(s) to the tf.data.Dataset\n",
+        "  def preprocess_with_kpl(input):\n",
+        "    return layer(input)\n",
+        "\n",
+        "  processed_ds = dataset.map(preprocess_with_kpl)\n",
+        "  return processed_ds\n",
+        "\n",
+        "distributed_dataset = strategy.distribute_datasets_from_function(dataset_fn)\n",
+        "\n",
+        "# Print out a few example batches.\n",
+        "distributed_dataset_iterator = iter(distributed_dataset)\n",
+        "for _ in range(3):\n",
+        "  print(next(distributed_dataset_iterator))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PVl1cblWQy8b"
+      },
+      "source": [
+        "Note that if you are training with `tf.distribute.experimental.ParameterServerStrategy`, you'll also call `tf.distribute.experimental.coordinator.ClusterCoordinator.create_per_worker_dataset`\n",
+        "\n",
+        "```\n",
+        "@tf.function\n",
+        "def per_worker_dataset_fn():\n",
+        "  return strategy.distribute_datasets_from_function(dataset_fn)\n",
+        "\n",
+        "per_worker_dataset = coordinator.create_per_worker_dataset(per_worker_dataset_fn)\n",
+        "per_worker_iterator = iter(per_worker_dataset)\n",
+        "```\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ol7SmPID1dAt"
+      },
+      "source": [
+        "For Tensorflow Transform, as mentioned above, the Analyze stage is done separately from training and thus omitted here. See the [tutorial](https://www.tensorflow.org/tfx/tutorials/transform/census) for a detailed how-to. Usually, this stage includes creating a `tf.Transform` preprocessing function and transforming the data in an [Apache Beam](https://beam.apache.org/) pipeline with this preprocessing function. At the end of the Analyze stage, the output can be exported as a TensorFlow graph which you can use for both training and serving. Our example covers only the training pipeline part:\n",
+        "\n",
+        "```\n",
+        "with strategy.scope():\n",
+        "  # working_dir contains the tf.Transform output.\n",
+        "  tf_transform_output = tft.TFTransformOutput(working_dir)\n",
+        "  # Loading from working_dir to create a Keras layer for applying the tf.Transform output to data\n",
+        "  tft_layer = tf_transform_output.transform_features_layer()\n",
+        "  ...\n",
+        "\n",
+        "def dataset_fn(input_context):\n",
+        "  ...\n",
+        "  dataset.map(tft_layer, num_parallel_calls=tf.data.AUTOTUNE)\n",
+        "  ...\n",
+        "  return dataset\n",
+        "\n",
+        "distributed_dataset = strategy.distribute_datasets_from_function(dataset_fn)\n",
+        "```"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3_IQxRXxQWof"
+      },
+      "source": [
+        "## Partial batches"
       ]
     },
     {
@@ -636,7 +905,7 @@
         "id": "hW2_gVkiztUG"
       },
       "source": [
-        "Partial batches are encountered when `tf.data.Dataset` instances that users create may contain batch sizes that are not evenly divisible by the number of replicas or when the cardinality of the dataset instance is not divisible by the batch size. This means that when the dataset is distributed over multiple replicas, the `next` call on some iterators will result in an OutOfRangeError. To handle this use case, `tf.distribute` returns dummy batches of batch size 0 on replicas that do not have any more data to process.\n"
+        "Partial batches are encountered when: 1) `tf.data.Dataset` instances that users create may contain batch sizes that are not evenly divisible by the number of replicas; or 2) when the cardinality of the dataset instance is not divisible by the batch size. This means that when the dataset is distributed over multiple replicas, the `next` call on some iterators will result in an `tf.errors.OutOfRangeError`. To handle this use case, `tf.distribute` returns dummy batches of batch size `0` on replicas that do not have any more data to process.\n"
       ]
     },
     {
@@ -645,9 +914,9 @@
         "id": "rqutdpqtPcCH"
       },
       "source": [
-        "For the single worker case, if data is not returned by the `next` call on the iterator, dummy batches of 0 batch size are created and used along with the real data in the dataset. In the case of partial batches, the last global batch of data will contain real data alongside dummy batches of data. The stopping condition for processing data now checks if any of the replicas have data. If there is no data on any of the replicas, an OutOfRange error is thrown.\n",
+        "For the single-worker case, if the data is not returned by the `next` call on the iterator, dummy batches of 0 batch size are created and used along with the real data in the dataset. In the case of partial batches, the last global batch of data will contain real data alongside dummy batches of data. The stopping condition for processing data now checks if any of the replicas have data. If there is no data on any of the replicas, you will get a `tf.errors.OutOfRangeError`.\n",
         "\n",
-        "For the multi worker case, the boolean value representing presence of data on each of the workers is aggregated using cross replica communication and this is used to identify if all the workers have finished processing the distributed dataset. Since this involves cross worker communication there is some performance penalty involved.\n"
+        "For the multi-worker case, the boolean value representing presence of data on each of the workers is aggregated using cross replica communication and this is used to identify if all the workers have finished processing the distributed dataset. Since this involves cross worker communication there is some performance penalty involved.\n"
       ]
     },
     {
@@ -665,11 +934,11 @@
         "id": "Nx4jyN_Az-Dy"
       },
       "source": [
-        "* When using `tf.distribute.Strategy.experimental_distribute_dataset` APIs with a multiple worker setup, users pass a `tf.data.Dataset` that reads from files. If the `tf.data.experimental.AutoShardPolicy` is set to `AUTO` or `FILE`, the actual per step batch size may be smaller than the user defined global batch size. This can happen when the remaining elements in the file are less than the global batch size. Users can either exhaust the dataset without depending on the number of steps to run or set  `tf.data.experimental.AutoShardPolicy` to `DATA` to work around it.\n",
+        "* When using `tf.distribute.Strategy.experimental_distribute_dataset` APIs with a multi-worker setup, you pass a `tf.data.Dataset` that reads from files. If the `tf.data.experimental.AutoShardPolicy` is set to `AUTO` or `FILE`, the actual per-step batch size may be smaller than the one you defined for the global batch size. This can happen when the remaining elements in the file are less than the global batch size. You can either exhaust the dataset without depending on the number of steps to run, or set  `tf.data.experimental.AutoShardPolicy` to `DATA` to work around it.\n",
         "\n",
         "* Stateful dataset transformations are currently not supported with `tf.distribute` and any stateful ops that the dataset may have are currently ignored. For example, if your dataset has a `map_fn` that uses `tf.random.uniform` to rotate an image, then you have a dataset graph that depends on state (i.e the random seed) on the local machine where the python process is being executed.\n",
         "\n",
-        "* Experimental `tf.data.experimental.OptimizationOptions` that are disabled by default can in certain contexts -- such as when used together with `tf.distribute` -- cause a performance degradation. You should only enable them after you validate that they benefit the performance of your workload in a distribute setting.\n",
+        "* Experimental `tf.data.experimental.OptimizationOptions` that are disabled by default can in certain contexts—such as when used together with `tf.distribute`—cause a performance degradation. You should only enable them after you validate that they benefit the performance of your workload in a distribute setting.\n",
         "\n",
         "* Please refer to [this guide](https://www.tensorflow.org/guide/data_performance) for how to optimize your input pipeline with `tf.data` in general. A few additional tips:\n",
         " * If you have multiple workers and are using `tf.data.Dataset.list_files` to create a dataset from all files matching one or more glob patterns, remember to set the `seed` argument or set `shuffle=False` so that each worker shard the file consistently.\n",
@@ -695,7 +964,7 @@
       "source": [
         "* The order in which the data is processed by the workers when using `tf.distribute.experimental_distribute_dataset` or `tf.distribute.distribute_datasets_from_function` is not guaranteed. This is typically required if you are using `tf.distribute` to scale prediction. You can however insert an index for each element in the batch and order outputs accordingly. The following snippet is an example of how to order outputs.\n",
         "\n",
-        "Note: `tf.distribute.MirroredStrategy()` is used here for the sake of convenience. We only need to reorder inputs when we are using multiple workers and `tf.distribute.MirroredStrategy` is used to distribute training on a single worker."
+        "Note: `tf.distribute.MirroredStrategy` is used here for the sake of convenience. You only need to reorder inputs when you are using multiple workers, but `tf.distribute.MirroredStrategy` is used to distribute training on a single worker."
       ]
     },
     {
@@ -740,7 +1009,7 @@
       },
       "source": [
         "<a name=\"tensorinputs\">\n",
-        "## How do I distribute my data if I am not using a canonical tf.data.Dataset instance?"
+        "## Tensor inputs instead of tf.data"
       ]
     },
     {
@@ -756,8 +1025,8 @@
         "### Use experimental_distribute_values_from_function for arbitrary tensor inputs\n",
         "`strategy.run` accepts `tf.distribute.DistributedValues` which is the output of\n",
         "`next(iterator)`. To pass the tensor values, use\n",
-        "`experimental_distribute_values_from_function` to construct\n",
-        "`tf.distribute.DistributedValues` from raw tensors."
+        "`tf.distribute.Strategy.experimental_distribute_values_from_function` to construct\n",
+        "`tf.distribute.DistributedValues` from raw tensors. The user will have to specify their own batching and sharding logic in the input function with this option, which can be done using the `tf.distribute.experimental.ValueContext` input object."
       ]
     },
     {
@@ -769,14 +1038,13 @@
       "outputs": [],
       "source": [
         "mirrored_strategy = tf.distribute.MirroredStrategy()\n",
-        "worker_devices = mirrored_strategy.extended.worker_devices\n",
         "\n",
         "def value_fn(ctx):\n",
-        "  return tf.constant(1.0)\n",
+        "  return tf.constant(ctx.replica_id_in_sync_group)\n",
         "\n",
         "distributed_values = mirrored_strategy.experimental_distribute_values_from_function(value_fn)\n",
         "for _ in range(4):\n",
-        "  result = mirrored_strategy.run(lambda x:x, args=(distributed_values,))\n",
+        "  result = mirrored_strategy.run(lambda x: x, args=(distributed_values,))\n",
         "  print(result)"
       ]
     },
@@ -819,7 +1087,8 @@
         "dist_dataset = mirrored_strategy.experimental_distribute_dataset(dataset)\n",
         "iterator = iter(dist_dataset)\n",
         "for _ in range(4):\n",
-        "  mirrored_strategy.run(lambda x:x, args=(next(iterator),))"
+        "  result = mirrored_strategy.run(lambda x: x, args=(next(iterator),))\n",
+        "  print(result)"
       ]
     }
   ],
@@ -827,8 +1096,7 @@
     "colab": {
       "collapsed_sections": [],
       "name": "input.ipynb",
-      "provenance": [],
-      "toc_visible": true
+            "toc_visible": true
     },
     "kernelspec": {
       "display_name": "Python 3",
diff --git a/site/en/tutorials/distribute/keras.ipynb b/site/en/tutorials/distribute/keras.ipynb
index c75e5f88af5..b96656d4436 100644
--- a/site/en/tutorials/distribute/keras.ipynb
+++ b/site/en/tutorials/distribute/keras.ipynb
@@ -76,7 +76,7 @@
         "\n",
         "You will use the `tf.keras` APIs to build the model and `Model.fit` for training it. (To learn about distributed training with a custom training loop and the `MirroredStrategy`, check out [this tutorial](custom_training.ipynb).)\n",
         "\n",
-        "`MirroredStrategy` trains your model on multiple GPUs on a single machine. For _synchronous training on many GPUs on multiple workers_, use the `tf.distribute.MultiWorkerMirroredStrategy` [with the Keras Model.fit](multi_worker_with_keras.ipynb) or [a custom training loop](multi_worker_with_ctl.ipynb). For other options, refer to the [Distributed training guide](../../guide/distributed_training.ipynb).\n",
+        "`MirroredStrategy` trains your model on multiple GPUs on a single machine. For _synchronous training on many GPUs on multiple workers_, use the `tf.distribute.MultiWorkerMirroredStrategy` with the [Keras Model.fit](multi_worker_with_keras.ipynb) or [a custom training loop](multi_worker_with_ctl.ipynb). For other options, refer to the [Distributed training guide](../../guide/distributed_training.ipynb).\n",
         "\n",
         "To learn about various other strategies, there is the [Distributed training with TensorFlow](../../guide/distributed_training.ipynb) guide."
       ]
@@ -280,7 +280,7 @@
         "id": "4xsComp8Kz5H"
       },
       "source": [
-        "## Create the model"
+        "## Create the model and instantiate the optimizer"
       ]
     },
     {
@@ -289,7 +289,7 @@
         "id": "1BnQYQTpB3YA"
       },
       "source": [
-        "Create and compile the Keras model in the context of `Strategy.scope`:"
+        "Within the context of `Strategy.scope`, create and compile the model using the Keras API:"
       ]
     },
     {
@@ -310,10 +310,21 @@
         "  ])\n",
         "\n",
         "  model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
-        "                optimizer=tf.keras.optimizers.Adam(),\n",
+        "                optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),\n",
         "                metrics=['accuracy'])"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DCDKFcNJzdcd"
+      },
+      "source": [
+        "For this toy example with the MNIST dataset, you will be using the Adam optimizer's default learning rate of 0.001.\n",
+        "\n",
+        "For larger datasets, the key benefit of distributed training is to learn more in each training step, because each step processes more training data in parallel, which allows for a larger learning rate (within the limits of the model and dataset)."
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -329,13 +340,16 @@
         "id": "YOXO5nvvK3US"
       },
       "source": [
-        "Define the following `tf.keras.callbacks`:\n",
+        "Define the following [Keras Callbacks](https://www.tensorflow.org/guide/keras/train_and_evaluate):\n",
         "\n",
         "- `tf.keras.callbacks.TensorBoard`: writes a log for TensorBoard, which allows you to visualize the graphs.\n",
         "- `tf.keras.callbacks.ModelCheckpoint`: saves the model at a certain frequency, such as after every epoch.\n",
+        "- `tf.keras.callbacks.BackupAndRestore`: provides the fault tolerance functionality by backing up the model and current epoch number. Learn more in the _Fault tolerance_ section of the [Multi-worker training with Keras](multi_worker_with_keras.ipynb) tutorial.\n",
         "- `tf.keras.callbacks.LearningRateScheduler`: schedules the learning rate to change after, for example, every epoch/batch.\n",
         "\n",
-        "For illustrative purposes, add a custom callback called `PrintLR` to display the *learning rate* in the notebook."
+        "For illustrative purposes, add a [custom callback](https://www.tensorflow.org/guide/keras/custom_callback) called `PrintLR` to display the *learning rate* in the notebook.\n",
+        "\n",
+        "**Note:** Use the `BackupAndRestore` callback instead of `ModelCheckpoint` as the main mechanism to restore the training state upon a restart from a job failure. Since `BackupAndRestore` only supports eager mode, in graph mode consider using `ModelCheckpoint`."
       ]
     },
     {
@@ -349,7 +363,7 @@
         "# Define the checkpoint directory to store the checkpoints.\n",
         "checkpoint_dir = './training_checkpoints'\n",
         "# Define the name of the checkpoint files.\n",
-        "checkpoint_prefix = os.path.join(checkpoint_dir, \"ckpt_{epoch}\")"
+        "checkpoint_prefix = os.path.join(checkpoint_dir, \"ckpt_{epoch:04d}.weights.h5\")"
       ]
     },
     {
@@ -382,8 +396,7 @@
         "# Define a callback for printing the learning rate at the end of each epoch.\n",
         "class PrintLR(tf.keras.callbacks.Callback):\n",
         "  def on_epoch_end(self, epoch, logs=None):\n",
-        "    print('\\nLearning rate for epoch {} is {}'.format(epoch + 1,\n",
-        "                                                      model.optimizer.lr.numpy()))"
+        "    print('\\nLearning rate for epoch {} is {}'.format(epoch + 1, model.optimizer.learning_rate.numpy()))"
       ]
     },
     {
@@ -419,7 +432,7 @@
         "id": "6EophnOAB3YD"
       },
       "source": [
-        "Now, train the model in the usual way by calling `Model.fit` on the model and passing in the dataset created at the beginning of the tutorial. This step is the same whether you are distributing the training or not."
+        "Now, train the model in the usual way by calling Keras `Model.fit` on the model and passing in the dataset created at the beginning of the tutorial. This step is the same whether you are distributing the training or not."
       ]
     },
     {
@@ -473,7 +486,10 @@
       },
       "outputs": [],
       "source": [
-        "model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))\n",
+        "import pathlib\n",
+        "latest_checkpoint = sorted(pathlib.Path(checkpoint_dir).glob('*'))[-1]\n",
+        "\n",
+        "model.load_weights(latest_checkpoint)\n",
         "\n",
         "eval_loss, eval_acc = model.evaluate(eval_dataset)\n",
         "\n",
@@ -526,7 +542,7 @@
         "id": "kBLlogrDvMgg"
       },
       "source": [
-        "## Export to SavedModel"
+        "## Save the model"
       ]
     },
     {
@@ -535,7 +551,7 @@
         "id": "Xa87y_A0vRma"
       },
       "source": [
-        "Export the graph and the variables to the platform-agnostic SavedModel format using `Model.save`. After your model is saved, you can load it with or without the `Strategy.scope`."
+        "Save the model to a `.keras` zip archive using `Model.save`. After your model is saved, you can load it with or without the `Strategy.scope`."
       ]
     },
     {
@@ -546,7 +562,7 @@
       },
       "outputs": [],
       "source": [
-        "path = 'saved_model/'"
+        "path = 'my_model.keras'"
       ]
     },
     {
@@ -557,7 +573,7 @@
       },
       "outputs": [],
       "source": [
-        "model.save(path, save_format='tf')"
+        "model.save(path)"
       ]
     },
     {
@@ -626,7 +642,7 @@
         "\n",
         "More examples that use different distribution strategies with the Keras `Model.fit` API:\n",
         "\n",
-        "1. The [Solve GLUE tasks using BERT on TPU](https://www.tensorflow.org/text/tutorials/bert_glue) tutorial uses `tf.distribute.MirroredStrategy` for training on GPUs and `tf.distribute.TPUStrategy`—on TPUs.\n",
+        "1. The [Solve GLUE tasks using BERT on TPU](https://www.tensorflow.org/text/tutorials/bert_glue) tutorial uses `tf.distribute.MirroredStrategy` for training on GPUs and `tf.distribute.TPUStrategy` on TPUs.\n",
         "1. The [Save and load a model using a distribution strategy](save_and_load.ipynb) tutorial demonstates how to use the SavedModel APIs with `tf.distribute.Strategy`.\n",
         "1. The [official TensorFlow models](https://github.com/tensorflow/models/tree/master/official) can be configured to run multiple distribution strategies.\n",
         "\n",
diff --git a/site/en/tutorials/distribute/multi_worker_with_ctl.ipynb b/site/en/tutorials/distribute/multi_worker_with_ctl.ipynb
index ef3b4a73201..0361eea9328 100644
--- a/site/en/tutorials/distribute/multi_worker_with_ctl.ipynb
+++ b/site/en/tutorials/distribute/multi_worker_with_ctl.ipynb
@@ -63,11 +63,9 @@
       "source": [
         "## Overview\n",
         "\n",
-        "This tutorial demonstrates multi-worker training with custom training loop API, distributed via MultiWorkerMirroredStrategy, so a Keras model designed to run on [single-worker](https://www.tensorflow.org/tutorials/distribute/custom_training) can seamlessly work on multiple workers with minimal code change.\n",
+        "This tutorial demonstrates how to perform multi-worker distributed training with a Keras model and with [custom training loops](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch) using the `tf.distribute.Strategy` API. The training loop is distributed via `tf.distribute.MultiWorkerMirroredStrategy`, such that a `tf.keras` model—designed to run on [single-worker](custom_training.ipynb)—can seamlessly work on multiple workers with minimal code changes. Custom training loops provide flexibility and a greater control on training, while also making it easier to debug the model. Learn more about [writing a basic training loop](../../guide/basic_training_loops.ipynb), [writing a training loop from scratch](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch) and [custom training](../customization/custom_training_walkthrough.ipynb).\n",
         "\n",
-        "We are using custom training loops to train our model because they give us flexibility and a greater control on training. Moreover, it is easier to debug the model and the training loop. More detailed information is available in [Writing a training loop from scratch](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch).\n",
-        "\n",
-        "If you are looking for how to use `MultiWorkerMirroredStrategy` with keras `model.fit`, refer to this [tutorial](https://www.tensorflow.org/tutorials/distribute/multi_worker_with_keras) instead.\n",
+        "If you are looking for how to use `MultiWorkerMirroredStrategy` with `tf.keras.Model.fit`, refer to this [tutorial](multi_worker_with_keras.ipynb) instead.\n",
         "\n",
         "[Distributed Training in TensorFlow](../../guide/distributed_training.ipynb) guide is available for an overview of the distribution strategies TensorFlow supports for those interested in a deeper understanding of `tf.distribute.Strategy` APIs."
       ]
@@ -102,9 +100,8 @@
         "id": "Zz0EY91y3mxy"
       },
       "source": [
-        "Before importing TensorFlow, make a few changes to the environment.\n",
-        "\n",
-        "Disable all GPUs. This prevents errors caused by the workers all trying to use the same GPU. For a real application each worker would be on a different machine."
+        "Before importing TensorFlow, make a few changes to the environment:\n",
+        "* Disable all GPUs. This prevents errors caused by all workers trying to use the same GPU. In a real-world application, each worker would be on a different machine."
       ]
     },
     {
@@ -124,7 +121,7 @@
         "id": "7X1MS6385BWi"
       },
       "source": [
-        "Reset the `TF_CONFIG` environment variable, you'll see more about this later."
+        "* Reset the `'TF_CONFIG'` environment variable (you'll see more about this later)."
       ]
     },
     {
@@ -144,7 +141,7 @@
         "id": "Rd4L9Ii77SS8"
       },
       "source": [
-        "Be sure that the current directory is on python's path. This allows the notebook to import the files written by `%%writefile` later.\n"
+        "* Make sure that the current directory is on Python's path. This allows the notebook to import the files written by `%%writefile` later.\n"
       ]
     },
     {
@@ -194,7 +191,7 @@
         "id": "fLW6D2TzvC-4"
       },
       "source": [
-        "Next create an `mnist.py` file with a simple model and dataset setup. This python file will be used by the worker-processes in this tutorial:"
+        "Next, create an `mnist.py` file with a simple model and dataset setup. This Python file will be used by the worker-processes in this tutorial:"
       ]
     },
     {
@@ -230,13 +227,18 @@
         "  return dataset\n",
         "\n",
         "def build_cnn_model():\n",
+        "  regularizer = tf.keras.regularizers.L2(1e-5)\n",
         "  return tf.keras.Sequential([\n",
         "      tf.keras.Input(shape=(28, 28)),\n",
         "      tf.keras.layers.Reshape(target_shape=(28, 28, 1)),\n",
-        "      tf.keras.layers.Conv2D(32, 3, activation='relu'),\n",
+        "      tf.keras.layers.Conv2D(32, 3,\n",
+        "                             activation='relu',\n",
+        "                             kernel_regularizer=regularizer),\n",
         "      tf.keras.layers.Flatten(),\n",
-        "      tf.keras.layers.Dense(128, activation='relu'),\n",
-        "      tf.keras.layers.Dense(10)\n",
+        "      tf.keras.layers.Dense(128,\n",
+        "                            activation='relu',\n",
+        "                            kernel_regularizer=regularizer),\n",
+        "      tf.keras.layers.Dense(10, kernel_regularizer=regularizer)\n",
         "  ])"
       ]
     },
@@ -246,9 +248,9 @@
         "id": "JmgZwwymxqt5"
       },
       "source": [
-        "## Multi-worker Configuration\n",
+        "## Multi-worker configuration\n",
         "\n",
-        "Now let's enter the world of multi-worker training. In TensorFlow, the `TF_CONFIG` environment variable is required for training on multiple machines, each of which possibly has a different role. `TF_CONFIG` used below, is a JSON string used to specify the cluster configuration on each worker that is part of the cluster. This is the default method for specifying a cluster, using `cluster_resolver.TFConfigClusterResolver`,  but there are other options available in the `distribute.cluster_resolver` module."
+        "Now let's enter the world of multi-worker training. In TensorFlow, the `'TF_CONFIG'` environment variable is required for training on multiple machines. Each machine may have a different role. The `'TF_CONFIG'` variable used below is a JSON string that specifies the cluster configuration on each worker that is part of the cluster. This is the default method for specifying a cluster, using `cluster_resolver.TFConfigClusterResolver`,  but there are other options available in the `distribute.cluster_resolver` module. Learn more about setting up the `'TF_CONFIG'` variable in the [Distributed training guide](../../guide/distributed_training.ipynb)."
       ]
     },
     {
@@ -283,7 +285,7 @@
         "id": "JjgwJbPKZkJL"
       },
       "source": [
-        "Here is the same `TF_CONFIG` serialized as a JSON string:"
+        "Note that `tf_config` is just a local variable in Python. To use it for training configuration, serialize it as a JSON and place it in a `'TF_CONFIG'` environment variable. Here is the same `'TF_CONFIG'` serialized as a JSON string:"
       ]
     },
     {
@@ -303,11 +305,11 @@
         "id": "AUBmYRZqxthH"
       },
       "source": [
-        "There are two components of `TF_CONFIG`: `cluster` and `task`.\n",
+        "There are two components of `'TF_CONFIG'`: `'cluster'` and `'task'`.\n",
         "\n",
-        "* `cluster` is the same for all workers and provides information about the training cluster, which is a dict consisting of different types of jobs such as `worker`. In multi-worker training with `MultiWorkerMirroredStrategy`, there is usually one `worker` that takes on a little more responsibility like saving checkpoint and writing summary file for TensorBoard in addition to what a regular `worker` does. Such a worker is referred to as the `chief` worker, and it is customary that the `worker` with `index` 0 is appointed as the chief `worker` (in fact this is how `tf.distribute.Strategy` is implemented).\n",
+        "* `'cluster'` is the same for all workers and provides information about the training cluster, which is a dict consisting of different types of jobs such as `'worker'`. In multi-worker training with `MultiWorkerMirroredStrategy`, there is usually one `'worker'` that takes on a little more responsibility like saving checkpoints and writing summary files for TensorBoard in addition to what a regular `'worker'` does. Such a worker is referred to as the `'chief'` worker, and it is customary that the `'worker'` with `'index'` 0 is appointed as the chief `worker`.\n",
         "\n",
-        "* `task` provides information of the current task and is different on each worker. It specifies the `type` and `index` of that worker."
+        "* `'task'` provides information of the current task and is different on each worker. It specifies the `'type'` and `'index'` of that worker."
       ]
     },
     {
@@ -316,7 +318,7 @@
         "id": "8YFpxrcsZ2xG"
       },
       "source": [
-        "In this example, you set the task `type` to `\"worker\"` and the task `index` to `0`. This machine is the first worker and will be appointed as the chief worker and do more work than the others. Note that other machines will need to have the `TF_CONFIG` environment variable set as well, and it should have the same `cluster` dict, but different task `type` or task `index` depending on what the roles of those machines are.\n"
+        "In this example, you set the task `'type'` to `'worker'` and the task `'index'` to `0`. This machine is the first worker and will be appointed as the chief worker and do more work than the others. Note that other machines will need to have the `'TF_CONFIG'` environment variable set as well, and it should have the same `'cluster'` dict, but different task `'type'` or task `'index'` depending on what the roles of those machines are.\n"
       ]
     },
     {
@@ -325,18 +327,9 @@
         "id": "aogb74kHxynz"
       },
       "source": [
-        "For illustration purposes, this tutorial shows how one may set a `TF_CONFIG` with 2 workers on `localhost`.  In practice, users would create multiple workers on external IP addresses/ports, and set `TF_CONFIG` on each worker appropriately.\n",
+        "For illustration purposes, this tutorial shows how one may set a `'TF_CONFIG'` with two workers on `'localhost'`.  In practice, users would create multiple workers on external IP addresses/ports, and set `'TF_CONFIG'` on each worker appropriately.\n",
         "\n",
-        "In this example you will use 2 workers, the first worker's `TF_CONFIG` is shown above. For the second worker you would set `tf_config['task']['index']=1`"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "f83FVYqDX3aX"
-      },
-      "source": [
-        "Above, `tf_config` is just a local variable in python. To actually use it to configure training, this dictionary needs to be serialized as JSON, and placed in the `TF_CONFIG` environment variable."
+        "This example uses two workers. The first worker's `'TF_CONFIG'` is shown above. For the second worker, set `tf_config['task']['index']=1`."
       ]
     },
     {
@@ -354,7 +347,7 @@
         "id": "FcjAbuGY1ACJ"
       },
       "source": [
-        "Subprocesses inherit environment variables from their parent. So if you set an environment variable in this `jupyter notebook` process:"
+        "Subprocesses inherit environment variables from their parent. So if you set an environment variable in this Jupyter Notebook process:"
       ]
     },
     {
@@ -374,7 +367,7 @@
         "id": "gQkIX-cg18md"
       },
       "source": [
-        "You can access the environment variable from a subprocesses:"
+        "you can then access the environment variable from a subprocess:"
       ]
     },
     {
@@ -395,7 +388,7 @@
         "id": "af6BCA-Y2fpz"
       },
       "source": [
-        "In the next section, you'll use this to pass the `TF_CONFIG` to the worker subprocesses. You would never really launch your jobs this way, but it's sufficient for the purposes of this tutorial: To demonstrate a minimal multi-worker example."
+        "In the next section, you'll use this to pass the `'TF_CONFIG'` to the worker subprocesses. You would never really launch your jobs this way, but it's sufficient for the purposes of this tutorial: To demonstrate a minimal multi-worker example."
       ]
     },
     {
@@ -406,7 +399,7 @@
       "source": [
         "## MultiWorkerMirroredStrategy\n",
         "\n",
-        "To train the model, use an instance of `tf.distribute.MultiWorkerMirroredStrategy`, which creates copies of all variables in the model's layers on each device across all workers. The [`tf.distribute.Strategy` guide](../../guide/distributed_training.ipynb) has more details about this strategy."
+        "Before training the model, first create an instance of `tf.distribute.MultiWorkerMirroredStrategy`:"
       ]
     },
     {
@@ -426,7 +419,7 @@
         "id": "N0iv7SyyAohc"
       },
       "source": [
-        "Note: `TF_CONFIG` is parsed and TensorFlow's GRPC servers are started at the time `MultiWorkerMirroredStrategy()` is called, so the `TF_CONFIG` environment variable must be set before a `tf.distribute.Strategy` instance is created."
+        "Note: `'TF_CONFIG'` is parsed and TensorFlow's GRPC servers are started at the time you call `tf.distribute.MultiWorkerMirroredStrategy.` Therefore, you must set the `'TF_CONFIG'` environment variable before you instantiate a `tf.distribute.Strategy`. To save time in this illustrative example, this is not demonstrated in this tutorial, so that servers do not need to start. You can find a full example in the last section of this tutorial."
       ]
     },
     {
@@ -435,7 +428,7 @@
         "id": "TS4S-faBHHam"
       },
       "source": [
-        "Use `tf.distribute.Strategy.scope` to specify that a strategy should be used when building your model. This puts you in the \"[cross-replica context](https://www.tensorflow.org/guide/distributed_training?hl=en#mirroredstrategy)\" for this strategy, which means the strategy is put in control of things like variable placement."
+        "Use `tf.distribute.Strategy.scope` to specify that a strategy should be used when building your model. This allows the strategy to control things like variable placement—it will create copies of all variables in the model's layers on each device across all workers."
       ]
     },
     {
@@ -459,9 +452,8 @@
       },
       "source": [
         "## Auto-shard your data across workers\n",
-        "In multi-worker training, dataset sharding is not necessarily needed, however it gives you exactly once semantic which makes more training more reproducible, i.e. training on multiple workers should be the same as training on one worker. Note: performance can be affected in some cases.\n",
         "\n",
-        "See: [`distribute_datasets_from_function`](https://www.tensorflow.org/api_docs/python/tf/distribute/Strategy?version=nightly#distribute_datasets_from_function)"
+        "In multi-worker training, _dataset sharding_ is needed to ensure convergence and reproducibility. Sharding means handing each worker a subset of the entire dataset—it helps create the experience similar to training on a single worker. In the example below, you're relying on the default autosharding policy of `tf.distribute`. You can also customize it by setting the `tf.data.experimental.AutoShardPolicy` of the `tf.data.experimental.DistributeOptions`. To learn more, refer to the _Sharding_ section of the [Distributed input tutorial](input.ipynb)."
       ]
     },
     {
@@ -487,8 +479,8 @@
         "id": "rkNzSR3g60iP"
       },
       "source": [
-        "## Define Custom Training Loop and Train the model\n",
-        "Specify an optimizer"
+        "## Define a custom training loop and train the model\n",
+        "Specify an optimizer:"
       ]
     },
     {
@@ -500,7 +492,7 @@
       "outputs": [],
       "source": [
         "with strategy.scope():\n",
-        "  # The creation of optimizer and train_accuracy will need to be in\n",
+        "  # The creation of optimizer and train_accuracy needs to be in\n",
         "  # `strategy.scope()` as well, since they create variables.\n",
         "  optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001)\n",
         "  train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(\n",
@@ -513,7 +505,7 @@
         "id": "RmrDcAii4B5O"
       },
       "source": [
-        "Define a training step with `tf.function`\n"
+        "Define a training step with `tf.function`:\n"
       ]
     },
     {
@@ -533,11 +525,13 @@
         "    x, y = inputs\n",
         "    with tf.GradientTape() as tape:\n",
         "      predictions = multi_worker_model(x, training=True)\n",
-        "      per_batch_loss = tf.keras.losses.SparseCategoricalCrossentropy(\n",
+        "      per_example_loss = tf.keras.losses.SparseCategoricalCrossentropy(\n",
         "          from_logits=True,\n",
         "          reduction=tf.keras.losses.Reduction.NONE)(y, predictions)\n",
-        "      loss = tf.nn.compute_average_loss(\n",
-        "          per_batch_loss, global_batch_size=global_batch_size)\n",
+        "      loss = tf.nn.compute_average_loss(per_example_loss)\n",
+        "      model_losses = multi_worker_model.losses\n",
+        "      if model_losses:\n",
+        "        loss += tf.nn.scale_regularization_loss(tf.add_n(model_losses))\n",
         "\n",
         "    grads = tape.gradient(loss, multi_worker_model.trainable_variables)\n",
         "    optimizer.apply_gradients(\n",
@@ -558,7 +552,7 @@
       "source": [
         "### Checkpoint saving and restoring\n",
         "\n",
-        "Checkpointing implementation in a Custom Training Loop requires the user to handle it instead of using a keras callback. It allows you to save model's weights and restore them without having to save the whole model."
+        "As you write a custom training loop, you need to handle [checkpoint saving](../../guide/checkpoint.ipynb) manually instead of relying on a Keras callback. Note that for `MultiWorkerMirroredStrategy`, saving a checkpoint or a complete model requires the participation of all workers, because attempting to save only on the chief worker could lead to a deadlock. Workers also need to write to different paths to avoid overwriting each other. Here's an example of how to configure the directories:"
       ]
     },
     {
@@ -572,40 +566,34 @@
         "from multiprocessing import util\n",
         "checkpoint_dir = os.path.join(util.get_temp_dir(), 'ckpt')\n",
         "\n",
-        "def _is_chief(task_type, task_id):\n",
-        "  return task_type is None or task_type == 'chief' or (task_type == 'worker' and\n",
-        "                                                       task_id == 0)\n",
+        "def _is_chief(task_type, task_id, cluster_spec):\n",
+        "  return (task_type is None\n",
+        "          or task_type == 'chief'\n",
+        "          or (task_type == 'worker'\n",
+        "              and task_id == 0\n",
+        "              and \"chief\" not in cluster_spec.as_dict()))\n",
+        "\n",
         "def _get_temp_dir(dirpath, task_id):\n",
         "  base_dirpath = 'workertemp_' + str(task_id)\n",
         "  temp_dir = os.path.join(dirpath, base_dirpath)\n",
         "  tf.io.gfile.makedirs(temp_dir)\n",
         "  return temp_dir\n",
         "\n",
-        "def write_filepath(filepath, task_type, task_id):\n",
+        "def write_filepath(filepath, task_type, task_id, cluster_spec):\n",
         "  dirpath = os.path.dirname(filepath)\n",
         "  base = os.path.basename(filepath)\n",
-        "  if not _is_chief(task_type, task_id):\n",
+        "  if not _is_chief(task_type, task_id, cluster_spec):\n",
         "    dirpath = _get_temp_dir(dirpath, task_id)\n",
         "  return os.path.join(dirpath, base)"
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "P7fabUIEW7-M"
-      },
-      "source": [
-        "Note: Checkpointing and Saving need to happen on each worker and they need to write to different paths as they would override each others.\n",
-        "If you chose to only checkpoint/save on the chief, this can lead to deadlock and is not recommended."
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "nrcdPHtG4ObO"
       },
       "source": [
-        " Here, you'll create one `tf.train.Checkpoint` that tracks the model, which is managed by a `tf.train.CheckpointManager` so that only the latest checkpoint is preserved."
+        "Create one `tf.train.Checkpoint` that tracks the model, which is managed by a `tf.train.CheckpointManager`, so that only the latest checkpoints are preserved:"
       ]
     },
     {
@@ -623,11 +611,16 @@
         "    name='step_in_epoch')\n",
         "task_type, task_id = (strategy.cluster_resolver.task_type,\n",
         "                      strategy.cluster_resolver.task_id)\n",
+        "# Normally, you don't need to manually instantiate a `ClusterSpec`, but in this\n",
+        "# illustrative example you did not set `'TF_CONFIG'` before initializing the\n",
+        "# strategy. Check out the next section for \"real-world\" usage.\n",
+        "cluster_spec = tf.train.ClusterSpec(tf_config['cluster'])\n",
         "\n",
         "checkpoint = tf.train.Checkpoint(\n",
         "    model=multi_worker_model, epoch=epoch, step_in_epoch=step_in_epoch)\n",
         "\n",
-        "write_checkpoint_dir = write_filepath(checkpoint_dir, task_type, task_id)\n",
+        "write_checkpoint_dir = write_filepath(checkpoint_dir, task_type, task_id,\n",
+        "                                      cluster_spec)\n",
         "checkpoint_manager = tf.train.CheckpointManager(\n",
         "    checkpoint, directory=write_checkpoint_dir, max_to_keep=1)"
       ]
@@ -638,7 +631,7 @@
         "id": "RO7cbN40XD5v"
       },
       "source": [
-        "Now, when you need to restore, you can find the latest checkpoint saved using the convenient `tf.train.latest_checkpoint` function."
+        "Now, when you need to restore a checkpoint, you can find the latest checkpoint saved using the convenient `tf.train.latest_checkpoint` function (or by calling `tf.train.CheckpointManager.restore_or_initialize`)."
       ]
     },
     {
@@ -693,7 +686,7 @@
         "  # Once the `CheckpointManager` is set up, you're now ready to save, and remove\n",
         "  # the checkpoints non-chief workers saved.\n",
         "  checkpoint_manager.save()\n",
-        "  if not _is_chief(task_type, task_id):\n",
+        "  if not _is_chief(task_type, task_id, cluster_spec):\n",
         "    tf.io.gfile.rmtree(write_checkpoint_dir)\n",
         "\n",
         "  epoch.assign_add(1)\n",
@@ -706,7 +699,7 @@
         "id": "0W1Osks466DE"
       },
       "source": [
-        "## Full code setup on workers"
+        "## Complete code at a glance"
       ]
     },
     {
@@ -715,10 +708,11 @@
         "id": "jfYpmIxO6Jck"
       },
       "source": [
-        "To actually run with `MultiWorkerMirroredStrategy` you'll need to run worker processes and pass a `TF_CONFIG` to them.\n",
+        "To sum up all the procedures discussed so far:\n",
         "\n",
-        "Like the `mnist.py` file written earlier, here is the `main.py` that \n",
-        "contain the same code we walked through step by step previously in this colab, we're just writing it to a file so each of the workers will run it:"
+        "1. You create worker processes.\n",
+        "2. Pass `'TF_CONFIG'`s to the worker processes.\n",
+        "3. Let each work process run the script below that contains the training code."
       ]
     },
     {
@@ -746,19 +740,23 @@
         "num_steps_per_epoch=70\n",
         "\n",
         "# Checkpoint saving and restoring\n",
-        "def _is_chief(task_type, task_id):\n",
-        "  return task_type is None or task_type == 'chief' or (task_type == 'worker' and\n",
-        "                                                       task_id == 0)\n",
+        "def _is_chief(task_type, task_id, cluster_spec):\n",
+        "  return (task_type is None\n",
+        "          or task_type == 'chief'\n",
+        "          or (task_type == 'worker'\n",
+        "              and task_id == 0\n",
+        "              and 'chief' not in cluster_spec.as_dict()))\n",
+        "\n",
         "def _get_temp_dir(dirpath, task_id):\n",
         "  base_dirpath = 'workertemp_' + str(task_id)\n",
         "  temp_dir = os.path.join(dirpath, base_dirpath)\n",
         "  tf.io.gfile.makedirs(temp_dir)\n",
         "  return temp_dir\n",
         "\n",
-        "def write_filepath(filepath, task_type, task_id):\n",
+        "def write_filepath(filepath, task_type, task_id, cluster_spec):\n",
         "  dirpath = os.path.dirname(filepath)\n",
         "  base = os.path.basename(filepath)\n",
-        "  if not _is_chief(task_type, task_id):\n",
+        "  if not _is_chief(task_type, task_id, cluster_spec):\n",
         "    dirpath = _get_temp_dir(dirpath, task_id)\n",
         "  return os.path.join(dirpath, base)\n",
         "\n",
@@ -768,11 +766,11 @@
         "strategy = tf.distribute.MultiWorkerMirroredStrategy()\n",
         "\n",
         "with strategy.scope():\n",
-        "  # Model building/compiling need to be within `strategy.scope()`.\n",
+        "  # Model building/compiling need to be within `tf.distribute.Strategy.scope`.\n",
         "  multi_worker_model = mnist.build_cnn_model()\n",
         "\n",
         "  multi_worker_dataset = strategy.distribute_datasets_from_function(\n",
-        "      lambda input_context: mnist.dataset_fn(global_batch_size, input_context))        \n",
+        "      lambda input_context: mnist.dataset_fn(global_batch_size, input_context))\n",
         "  optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001)\n",
         "  train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(\n",
         "      name='train_accuracy')\n",
@@ -786,11 +784,13 @@
         "    x, y = inputs\n",
         "    with tf.GradientTape() as tape:\n",
         "      predictions = multi_worker_model(x, training=True)\n",
-        "      per_batch_loss = tf.keras.losses.SparseCategoricalCrossentropy(\n",
+        "      per_example_loss = tf.keras.losses.SparseCategoricalCrossentropy(\n",
         "          from_logits=True,\n",
         "          reduction=tf.keras.losses.Reduction.NONE)(y, predictions)\n",
-        "      loss = tf.nn.compute_average_loss(\n",
-        "          per_batch_loss, global_batch_size=global_batch_size)\n",
+        "      loss = tf.nn.compute_average_loss(per_example_loss)\n",
+        "      model_losses = multi_worker_model.losses\n",
+        "      if model_losses:\n",
+        "        loss += tf.nn.scale_regularization_loss(tf.add_n(model_losses))\n",
         "\n",
         "    grads = tape.gradient(loss, multi_worker_model.trainable_variables)\n",
         "    optimizer.apply_gradients(\n",
@@ -809,13 +809,15 @@
         "    initial_value=tf.constant(0, dtype=tf.dtypes.int64),\n",
         "    name='step_in_epoch')\n",
         "\n",
-        "task_type, task_id = (strategy.cluster_resolver.task_type,\n",
-        "                      strategy.cluster_resolver.task_id)\n",
+        "task_type, task_id, cluster_spec = (strategy.cluster_resolver.task_type,\n",
+        "                                    strategy.cluster_resolver.task_id,\n",
+        "                                    strategy.cluster_resolver.cluster_spec())\n",
         "\n",
         "checkpoint = tf.train.Checkpoint(\n",
         "    model=multi_worker_model, epoch=epoch, step_in_epoch=step_in_epoch)\n",
         "\n",
-        "write_checkpoint_dir = write_filepath(checkpoint_dir, task_type, task_id)\n",
+        "write_checkpoint_dir = write_filepath(checkpoint_dir, task_type, task_id,\n",
+        "                                      cluster_spec)\n",
         "checkpoint_manager = tf.train.CheckpointManager(\n",
         "    checkpoint, directory=write_checkpoint_dir, max_to_keep=1)\n",
         "\n",
@@ -838,11 +840,11 @@
         "  train_loss = total_loss / num_batches\n",
         "  print('Epoch: %d, accuracy: %f, train_loss: %f.'\n",
         "                %(epoch.numpy(), train_accuracy.result(), train_loss))\n",
-        "  \n",
+        "\n",
         "  train_accuracy.reset_states()\n",
         "\n",
         "  checkpoint_manager.save()\n",
-        "  if not _is_chief(task_type, task_id):\n",
+        "  if not _is_chief(task_type, task_id, cluster_spec):\n",
         "    tf.io.gfile.rmtree(write_checkpoint_dir)\n",
         "\n",
         "  epoch.assign_add(1)\n",
@@ -855,7 +857,6 @@
         "id": "ItVOvPN1qnZ6"
       },
       "source": [
-        "## Train and Evaluate\n",
         "The current directory now contains both Python files:"
       ]
     },
@@ -877,7 +878,7 @@
         "id": "qmEEStPS6vR_"
       },
       "source": [
-        "So json-serialize the `TF_CONFIG` and add it to the environment variables:"
+        "So JSON-serialize the `'TF_CONFIG'` and add it to the environment variables:"
       ]
     },
     {
@@ -897,7 +898,7 @@
         "id": "MsY3dQLK7jdf"
       },
       "source": [
-        "Now, you can launch a worker process that will run the `main.py` and use the `TF_CONFIG`:"
+        "Now, you can launch a worker process that will run the `main.py` and use the `'TF_CONFIG'`:"
       ]
     },
     {
@@ -935,9 +936,9 @@
         "1. It uses the `%%bash` which is a [notebook \"magic\"](https://ipython.readthedocs.io/en/stable/interactive/magics.html) to run some bash commands.\n",
         "2. It uses the `--bg` flag to run the `bash` process in the background, because this worker will not terminate. It waits for all the workers before it starts.\n",
         "\n",
-        "The backgrounded worker process won't print output to this notebook, so the `&>` redirects its output to a file, so you can see what happened.\n",
+        "The backgrounded worker process won't print the output to this notebook. The `&>` redirects its output to a file, so that you can inspect what happened.\n",
         "\n",
-        "So, wait a few seconds for the process to start up:"
+        "Wait a few seconds for the process to start up:"
       ]
     },
     {
@@ -958,7 +959,7 @@
         "id": "ZFPoNxg_9_Mx"
       },
       "source": [
-        "Now look what's been output to the worker's logfile so far:"
+        "Now, check the output to the worker's log file so far:"
       ]
     },
     {
@@ -988,7 +989,7 @@
         "id": "Pi8vPNNA_l4a"
       },
       "source": [
-        "So update the `tf_config` for the second worker's process to pick up:"
+        "Update the `tf_config` for the second worker's process to pick up:"
       ]
     },
     {
@@ -1030,7 +1031,7 @@
         "id": "hX4FA2O2AuAn"
       },
       "source": [
-        "Now if you recheck the logs written by the first worker you'll see that it participated in training that model:"
+        "If you recheck the logs written by the first worker, notice that it participated in training that model:"
       ]
     },
     {
@@ -1053,7 +1054,7 @@
       },
       "outputs": [],
       "source": [
-        "# Delete the `TF_CONFIG`, and kill any background tasks so they don't affect the next section.\n",
+        "# Delete the `'TF_CONFIG'`, and kill any background tasks so they don't affect the next section.\n",
         "os.environ.pop('TF_CONFIG', None)\n",
         "%killbgscripts"
       ]
@@ -1064,9 +1065,9 @@
         "id": "bhxMXa0AaZkK"
       },
       "source": [
-        "## Multi worker training in depth\n",
+        "## Multi-worker training in depth\n",
         "\n",
-        "This tutorial has demonstrated a `Custom Training Loop` workflow of the multi-worker setup. A detailed description of other topics is available in the [`model.fit's guide`](https://colab.sandbox.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/distribute/multi_worker_with_keras.ipynb) of the multi-worker setup and applicable to CTLs."
+        "This tutorial has demonstrated a custom training loop workflow of the multi-worker setup. Detailed descriptions of other topics is available in the [Multi-worker training with Keras (`tf.keras.Model.fit`)](multi_worker_with_keras.ipynb) tutorial applicable to custom training loops."
       ]
     },
     {
@@ -1075,10 +1076,11 @@
         "id": "ega2hdOQEmy_"
       },
       "source": [
-        "## See also\n",
-        "1. [Distributed Training in TensorFlow](https://www.tensorflow.org/guide/distributed_training) guide provides an overview of the available distribution strategies.\n",
+        "## Learn more\n",
+        "\n",
+        "1. The [Distributed training in TensorFlow](../../guide/distributed_training.ipynb) guide provides an overview of the available distribution strategies.\n",
         "2. [Official models](https://github.com/tensorflow/models/tree/master/official), many of which can be configured to run multiple distribution strategies.\n",
-        "3. The [Performance section](../../guide/function.ipynb) in the guide provides information about other strategies and [tools](../../guide/profiler.md) you can use to optimize the performance of your TensorFlow models.\n"
+        "3. The [Performance section](../../guide/function.ipynb) in the `tf.function` guide provides information about other strategies and [tools](../../guide/profiler.md) you can use to optimize the performance of your TensorFlow models.\n"
       ]
     }
   ],
@@ -1086,7 +1088,7 @@
     "colab": {
       "collapsed_sections": [],
       "name": "multi_worker_with_ctl.ipynb",
-      "toc_visible": true
+            "toc_visible": true
     },
     "kernelspec": {
       "display_name": "Python 3",
diff --git a/site/en/tutorials/distribute/multi_worker_with_estimator.ipynb b/site/en/tutorials/distribute/multi_worker_with_estimator.ipynb
index b4fffa60fb4..fcee0618854 100644
--- a/site/en/tutorials/distribute/multi_worker_with_estimator.ipynb
+++ b/site/en/tutorials/distribute/multi_worker_with_estimator.ipynb
@@ -186,7 +186,7 @@
         "\n",
         "There are two components of `TF_CONFIG`: `cluster` and `task`.  `cluster` provides information about the entire cluster, namely the workers and parameter servers in the cluster.  `task` provides information about the current task. The first component `cluster` is the same for all workers and parameter servers in the cluster, and the second component `task` is different on each worker and parameter server and specifies its own `type` and `index`. In this example, the task `type` is `worker` and the task `index` is `0`.\n",
         "\n",
-        "For illustration purposes, this tutorial shows how to set a `TF_CONFIG` with 2 workers on `localhost`.  In practice, you would create multiple workers on an external IP address and port, and set `TF_CONFIG` on each worker appropriately, i.e. modify the task `index`.\n",
+        "For illustration purposes, this tutorial shows how to set a `TF_CONFIG` with 2 workers on `localhost`.  In practice, you would create multiple workers on an external IP address and port, and set `TF_CONFIG` on each worker appropriately, i.e., modify the task `index`.\n",
         "\n",
         "Warning: *Do not execute the following code in Colab.*  TensorFlow's runtime will attempt to create a gRPC server at the specified IP address and port, which will likely fail. See the [keras version](multi_worker_with_keras.ipynb) of this tutorial for an example of how you can test run multiple workers on a single machine.\n",
         "\n",
@@ -351,8 +351,7 @@
         "Tce3stUlHN0L"
       ],
       "name": "multi_worker_with_estimator.ipynb",
-      "provenance": [],
-      "toc_visible": true
+            "toc_visible": true
     },
     "kernelspec": {
       "display_name": "Python 3",
diff --git a/site/en/tutorials/distribute/multi_worker_with_keras.ipynb b/site/en/tutorials/distribute/multi_worker_with_keras.ipynb
index 1f00bb99e5b..c972e8b7fb6 100644
--- a/site/en/tutorials/distribute/multi_worker_with_keras.ipynb
+++ b/site/en/tutorials/distribute/multi_worker_with_keras.ipynb
@@ -63,13 +63,36 @@
       "source": [
         "## Overview\n",
         "\n",
-        "This tutorial demonstrates how to perform multi-worker distributed training with a Keras model and the `Model.fit` API using the `tf.distribute.Strategy` API—specifically the `tf.distribute.MultiWorkerMirroredStrategy` class. With the help of this strategy, a Keras model that was designed to run on a single-worker can seamlessly work on multiple workers with minimal code changes.\n",
-        "\n",
-        "For those interested in a deeper understanding of `tf.distribute.Strategy` APIs, the [Distributed training in TensorFlow](../../guide/distributed_training.ipynb) guide is available for an overview of the distribution strategies TensorFlow supports.\n",
+        "This tutorial demonstrates how to perform multi-worker distributed training with a Keras model and the `Model.fit` API using the `tf.distribute.MultiWorkerMirroredStrategy` API. With the help of this strategy, a Keras model that was designed to run on a single-worker can seamlessly work on multiple workers with minimal code changes.\n",
         "\n",
         "To learn how to use the `MultiWorkerMirroredStrategy` with Keras and a custom training loop, refer to [Custom training loop with Keras and MultiWorkerMirroredStrategy](multi_worker_with_ctl.ipynb).\n",
         "\n",
-        "Note that the purpose of this tutorial is to demonstrate a minimal multi-worker example with two workers."
+        "This tutorial contains a minimal multi-worker example with two workers for demonstration purposes."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JUdRerXg6yz3"
+      },
+      "source": [
+        "### Choose the right strategy"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YAiCV_oL63GM"
+      },
+      "source": [
+        "Before you dive in, make sure that `tf.distribute.MultiWorkerMirroredStrategy` is the right choice for your accelerator(s) and training. These are two common ways of distributing training with data parallelism:\n",
+        "\n",
+        "* _Synchronous training_, where the steps of training are synced across the workers and replicas, such as `tf.distribute.MirroredStrategy`, `tf.distribute.TPUStrategy`, and `tf.distribute.MultiWorkerMirroredStrategy`. All workers train over different slices of input data in sync, and aggregating gradients at each step.\n",
+        "* _Asynchronous training_, where the training steps are not strictly synced, such as `tf.distribute.experimental.ParameterServerStrategy`. All workers are independently training over the input data and updating variables asynchronously.\n",
+        "\n",
+        "If you are looking for multi-worker synchronous training without TPU, then `tf.distribute.MultiWorkerMirroredStrategy` is your choice. It creates copies of all variables in the model's layers on each device across all workers. It uses `CollectiveOps`, a TensorFlow op for collective communication, to aggregate gradients and keeps the variables in sync. For those interested, check out the `tf.distribute.experimental.CommunicationOptions` parameter for the collective implementation options.\n",
+        "\n",
+        "For an overview of `tf.distribute.Strategy` APIs, refer to [Distributed training in TensorFlow](../../guide/distributed_training.ipynb)."
       ]
     },
     {
@@ -104,14 +127,14 @@
       "source": [
         "Before importing TensorFlow, make a few changes to the environment:\n",
         "\n",
-        "1. Disable all GPUs. This prevents errors caused by the workers all trying to use the same GPU. In a real-world application, each worker would be on a different machine."
+        "* In a real-world application, each worker would be on a different machine. For the purposes of this tutorial, all the workers will run on the **this** machine. Therefore, disable all GPUs to prevent errors caused by all workers trying to use the same GPU."
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "685pbYEY3jGC"
+        "id": "rpEIVI5upIzM"
       },
       "outputs": [],
       "source": [
@@ -124,7 +147,7 @@
         "id": "7X1MS6385BWi"
       },
       "source": [
-        "2. Reset the `TF_CONFIG` environment variable (you'll learn more about this later):"
+        "* Reset the `TF_CONFIG` environment variable (you'll learn more about this later):"
       ]
     },
     {
@@ -144,7 +167,7 @@
         "id": "Rd4L9Ii77SS8"
       },
       "source": [
-        "3. Make sure that the current directory is on Python's path—this allows the notebook to import the files written by `%%writefile` later:\n"
+        "* Make sure that the current directory is on Python's path—this allows the notebook to import the files written by `%%writefile` later:\n"
       ]
     },
     {
@@ -162,10 +185,30 @@
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "pDhHuMjb7bfU"
+        "id": "9hLpDZhAz2q-"
       },
       "source": [
-        "Now import TensorFlow:"
+        "Install `tf-nightly`, as the frequency of checkpoint saving at a particular step with the `save_freq` argument in `tf.keras.callbacks.BackupAndRestore` is introduced from TensorFlow 2.10:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "-XqozLfzz30N"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install tf-nightly"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "524e38dab658"
+      },
+      "source": [
+        "Finally, import TensorFlow:"
       ]
     },
     {
@@ -194,7 +237,7 @@
         "id": "fLW6D2TzvC-4"
       },
       "source": [
-        "Next, create an `mnist.py` file with a simple model and dataset setup. This Python file will be used by the worker-processes in this tutorial:"
+        "Next, create an `mnist_setup.py` file with a simple model and dataset setup. This Python file will be used by the worker processes in this tutorial:"
       ]
     },
     {
@@ -205,7 +248,7 @@
       },
       "outputs": [],
       "source": [
-        "%%writefile mnist.py\n",
+        "%%writefile mnist_setup.py\n",
         "\n",
         "import os\n",
         "import tensorflow as tf\n",
@@ -256,11 +299,11 @@
       },
       "outputs": [],
       "source": [
-        "import mnist\n",
+        "import mnist_setup\n",
         "\n",
         "batch_size = 64\n",
-        "single_worker_dataset = mnist.mnist_dataset(batch_size)\n",
-        "single_worker_model = mnist.build_and_compile_cnn_model()\n",
+        "single_worker_dataset = mnist_setup.mnist_dataset(batch_size)\n",
+        "single_worker_model = mnist_setup.build_and_compile_cnn_model()\n",
         "single_worker_model.fit(single_worker_dataset, epochs=3, steps_per_epoch=70)"
       ]
     },
@@ -276,7 +319,7 @@
         "\n",
         "### A cluster with jobs and tasks\n",
         "\n",
-        "In TensorFlow, distributed training involves: a `'cluster'`\n",
+        "In TensorFlow, distributed training involves a `'cluster'`\n",
         "with several jobs, and each of the jobs may have one or more `'task'`s.\n",
         "\n",
         "You will need the `TF_CONFIG` configuration environment variable for training on multiple machines, each of which possibly has a different role. `TF_CONFIG` is a JSON string used to specify the cluster configuration for each worker that is part of the cluster.\n",
@@ -284,10 +327,10 @@
         "There are two components of a `TF_CONFIG` variable: `'cluster'` and `'task'`.\n",
         "\n",
         "* A `'cluster'` is the same for all workers and provides information about the training cluster, which is a dict consisting of different types of jobs, such as `'worker'` or `'chief'`.\n",
-        "    - In multi-worker training with `tf.distribute.MultiWorkerMirroredStrategy`, there is usually one `'worker'` that takes on responsibilities, such as saving a checkpoint and writing a summary file for TensorBoard, in addition to what a regular `'worker'` does. Such `'worker'` is referred to as the chief worker (with a job name `'chief'`).\n",
-        "    - It is customary for the `'chief'` to have `'index'` `0` be appointed to (in fact, this is how `tf.distribute.Strategy` is implemented).\n",
+        "    - In multi-worker training with `tf.distribute.MultiWorkerMirroredStrategy`, there is usually one `'worker'` that takes on more responsibilities, such as saving a checkpoint and writing a summary file for TensorBoard, in addition to what a regular `'worker'` does. Such `'worker'` is referred to as the chief worker (with a job name `'chief'`).\n",
+        "    - It is customary for the worker with `'index'` `0` to be the `'chief'`.\n",
         "\n",
-        "* A `'task'` provides information of the current task and is different for each worker. It specifies the `'type'` and `'index'` of that worker.\n",
+        "* A `'task'` provides information on the current task and is different for each worker. It specifies the `'type'` and `'index'` of that worker.\n",
         "\n",
         "Below is an example configuration:"
       ]
@@ -314,7 +357,7 @@
         "id": "JjgwJbPKZkJL"
       },
       "source": [
-        "Here is the same `TF_CONFIG` serialized as a JSON string:"
+        "Note that `tf_config` is just a local variable in Python. To use it for training configuration, serialize it as a JSON and place it in a `TF_CONFIG` environment variable."
       ]
     },
     {
@@ -328,22 +371,13 @@
         "json.dumps(tf_config)"
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "f83FVYqDX3aX"
-      },
-      "source": [
-        "Note that`tf_config` is just a local variable in Python. To be able to use it for a training configuration, this dict needs to be serialized as a JSON and placed in a `TF_CONFIG` environment variable."
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "8YFpxrcsZ2xG"
       },
       "source": [
-        "In the example configuration above, you set the task `'type'` to `'worker'` and the task `'index'` to `0`. Therefore, this machine is the _first_ worker. It will be appointed as the `'chief'` worker and do more work than the others.\n",
+        "In the example configuration above, you set the task `'type'` to `'worker'` and the task `'index'` to `0`. Therefore, this machine is the _first_ worker. It will be appointed as the `'chief'` worker.\n",
         "\n",
         "Note: Other machines will need to have the `TF_CONFIG` environment variable set as well, and it should have the same `'cluster'` dict, but different task `'type'`s or task `'index'`es, depending on the roles of those machines."
       ]
@@ -354,12 +388,8 @@
         "id": "aogb74kHxynz"
       },
       "source": [
-        "For illustration purposes, this tutorial shows how you may set up a `TF_CONFIG` variable with two workers on a `localhost`.\n",
-        "\n",
-        "In practice, you would create multiple workers on external IP addresses/ports and set a `TF_CONFIG` variable on each worker accordingly.\n",
-        "\n",
-        "In this tutorial, you will use two workers:\n",
-        "- The first (`'chief'`) worker's `TF_CONFIG` is shown above.\n",
+        "In practice, you would create multiple workers on external IP addresses/ports and set a `TF_CONFIG` variable on each worker accordingly. For illustration purposes, this tutorial shows how you may set up a `TF_CONFIG` variable with two workers on a `localhost`:\n",
+        "- The first (`'chief'`) worker's `TF_CONFIG` as shown above.\n",
         "- For the second worker, you will set `tf_config['task']['index']=1`"
       ]
     },
@@ -378,9 +408,7 @@
         "id": "FcjAbuGY1ACJ"
       },
       "source": [
-        "Subprocesses inherit environment variables from their parent.\n",
-        "\n",
-        "For example, you can set an environment variable in this Jupyter Notebook process as follows:"
+        "Subprocesses inherit environment variables from their parent. So if you set an environment variable in this Jupyter Notebook process:"
       ]
     },
     {
@@ -400,7 +428,7 @@
         "id": "gQkIX-cg18md"
       },
       "source": [
-        "Then, you can access the environment variable from a subprocesses:"
+        "... then you can access the environment variable from the subprocesses:"
       ]
     },
     {
@@ -421,7 +449,16 @@
         "id": "af6BCA-Y2fpz"
       },
       "source": [
-        "In the next section, you'll use a similar method to pass the `TF_CONFIG` to the worker subprocesses. In a real-world scenario, you wouldn't launch your jobs this way, but it's sufficient in this example."
+        "In the next section, you'll use this method to pass the `TF_CONFIG` to the worker subprocesses. You would never really launch your jobs this way in a real-world scenario—this tutorial is just showing how to do it with a minimal multi-worker example."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dnDJmaRA9qnf"
+      },
+      "source": [
+        "## Train the model"
       ]
     },
     {
@@ -430,16 +467,7 @@
         "id": "UhNtHfuxCGVy"
       },
       "source": [
-        "## Choose the right strategy\n",
-        "\n",
-        "In TensorFlow, there are two main forms of distributed training:\n",
-        "\n",
-        "* _Synchronous training_, where the steps of training are synced across the workers and replicas, and\n",
-        "* _Asynchronous training_, where the training steps are not strictly synced (for example, [parameter server training](parameter_server_training.ipynb)).\n",
-        "\n",
-        "This tutorial demonstrates how to perform synchronous multi-worker training using an instance of `tf.distribute.MultiWorkerMirroredStrategy`.\n",
-        "\n",
-        "`MultiWorkerMirroredStrategy` creates copies of all variables in the model's layers on each device across all workers. It uses `CollectiveOps`, a TensorFlow op for collective communication, to aggregate gradients and keep the variables in sync.  The [`tf.distribute.Strategy` guide](../../guide/distributed_training.ipynb) has more details about this strategy."
+        "To train the model, firstly create an instance of the `tf.distribute.MultiWorkerMirroredStrategy`:"
       ]
     },
     {
@@ -459,16 +487,7 @@
         "id": "N0iv7SyyAohc"
       },
       "source": [
-        "Note: `TF_CONFIG` is parsed and TensorFlow's GRPC servers are started at the time `MultiWorkerMirroredStrategy()` is called, so the `TF_CONFIG` environment variable must be set before a `tf.distribute.Strategy` instance is created. Since `TF_CONFIG` is not set yet, the above strategy is effectively single-worker training."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "FMy2VM4Akzpr"
-      },
-      "source": [
-        "`MultiWorkerMirroredStrategy` provides multiple implementations via the [`CommunicationOptions`](https://www.tensorflow.org/api_docs/python/tf/distribute/experimental/CommunicationOptions) parameter: 1) `RING` implements ring-based collectives using gRPC as the cross-host communication layer; 2) `NCCL` uses the [NVIDIA Collective Communication Library](https://developer.nvidia.com/nccl) to implement collectives; and 3) `AUTO` defers the choice to the runtime. The best choice of collective implementation depends upon the number and kind of GPUs, and the network interconnect in the cluster."
+        "Note: `TF_CONFIG` is parsed and TensorFlow's GRPC servers are started at the time `MultiWorkerMirroredStrategy` is called, so the `TF_CONFIG` environment variable must be set before a `tf.distribute.Strategy` instance is created. Since `TF_CONFIG` is not set yet, the above strategy is effectively single-worker training."
       ]
     },
     {
@@ -477,8 +496,6 @@
         "id": "H47DDcOgfzm7"
       },
       "source": [
-        "## Train the model\n",
-        "\n",
         "With the integration of `tf.distribute.Strategy` API into `tf.keras`, the only change you will make to distribute the training to multiple-workers is enclosing the model building and `model.compile()` call inside `strategy.scope()`. The distribution strategy's scope dictates how and where the variables are created, and in the case of `MultiWorkerMirroredStrategy`, the variables created are `MirroredVariable`s, and they are replicated on each of the workers.\n"
       ]
     },
@@ -492,7 +509,7 @@
       "source": [
         "with strategy.scope():\n",
         "  # Model building/compiling need to be within `strategy.scope()`.\n",
-        "  multi_worker_model = mnist.build_and_compile_cnn_model()"
+        "  multi_worker_model = mnist_setup.build_and_compile_cnn_model()"
       ]
     },
     {
@@ -512,7 +529,7 @@
       "source": [
         "To actually run with `MultiWorkerMirroredStrategy` you'll need to run worker processes and pass a `TF_CONFIG` to them.\n",
         "\n",
-        "Like the `mnist.py` file written earlier, here is the `main.py` that each of the workers will run:"
+        "Like the `mnist_setup.py` file written earlier, here is the `main.py` that each of the workers will run:"
       ]
     },
     {
@@ -529,7 +546,7 @@
         "import json\n",
         "\n",
         "import tensorflow as tf\n",
-        "import mnist\n",
+        "import mnist_setup\n",
         "\n",
         "per_worker_batch_size = 64\n",
         "tf_config = json.loads(os.environ['TF_CONFIG'])\n",
@@ -538,11 +555,11 @@
         "strategy = tf.distribute.MultiWorkerMirroredStrategy()\n",
         "\n",
         "global_batch_size = per_worker_batch_size * num_workers\n",
-        "multi_worker_dataset = mnist.mnist_dataset(global_batch_size)\n",
+        "multi_worker_dataset = mnist_setup.mnist_dataset(global_batch_size)\n",
         "\n",
         "with strategy.scope():\n",
         "  # Model building/compiling need to be within `strategy.scope()`.\n",
-        "  multi_worker_model = mnist.build_and_compile_cnn_model()\n",
+        "  multi_worker_model = mnist_setup.build_and_compile_cnn_model()\n",
         "\n",
         "\n",
         "multi_worker_model.fit(multi_worker_dataset, epochs=3, steps_per_epoch=70)"
@@ -584,7 +601,7 @@
         "id": "qmEEStPS6vR_"
       },
       "source": [
-        "So json-serialize the `TF_CONFIG` and add it to the environment variables:"
+        "Serialize the `TF_CONFIG` to JSON and add it to the environment variables:"
       ]
     },
     {
@@ -686,7 +703,7 @@
         "id": "RqZhVF7L_KOy"
       },
       "source": [
-        "The last line of the log file should say: `Started server with target: grpc://localhost:12345`. The first worker is now ready, and is waiting for all the other worker(s) to be ready to proceed."
+        "The last line of the log file should say: `Started server with target: grpc://localhost:12345`. The first worker is now ready and is waiting for all the other worker(s) to be ready to proceed."
       ]
     },
     {
@@ -758,11 +775,7 @@
         "id": "zL79ak5PMzEg"
       },
       "source": [
-        "Unsurprisingly, this ran _slower_ than the test run at the beginning of this tutorial.\n",
-        "\n",
-        "Running multiple workers on a single machine only adds overhead.\n",
-        "\n",
-        "The goal here was not to improve the training time, but only to give an example of multi-worker training."
+        "Note: This may run slower than the test run at the beginning of this tutorial because running multiple workers on a single machine only adds overhead. The goal here is not to improve the training time but to give an example of multi-worker training.\n"
       ]
     },
     {
@@ -784,11 +797,16 @@
         "id": "9j2FJVHoUIrE"
       },
       "source": [
-        "## Multi-worker training in depth\n",
-        "\n",
-        "So far, you have learned how to perform a basic multi-worker setup.\n",
-        "\n",
-        "During the rest of the tutorial, you will learn about other factors, which may be useful or important for real use cases, in detail."
+        "## Multi-worker training in depth\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "C1hBks_dAZmT"
+      },
+      "source": [
+        "So far, you have learned how to perform a basic multi-worker setup. The rest of the tutorial goes over other factors, which may be useful or important for real use cases, in detail."
       ]
     },
     {
@@ -820,25 +838,41 @@
         "options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF\n",
         "\n",
         "global_batch_size = 64\n",
-        "multi_worker_dataset = mnist.mnist_dataset(batch_size=64)\n",
+        "multi_worker_dataset = mnist_setup.mnist_dataset(batch_size=64)\n",
         "dataset_no_auto_shard = multi_worker_dataset.with_options(options)"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "z85hElxsBQsT"
+      },
+      "source": [
+        "### Evaluation"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "gmqvlh5LhAoU"
       },
       "source": [
-        "### Evaluation\n",
-        "\n",
-        "If you pass the `validation_data` into `Model.fit`, it will alternate between training and evaluation for each epoch. The evaluation taking the `validation_data` is distributed across the same set of workers and the evaluation results are aggregated and available for all workers.\n",
+        "If you pass the `validation_data` into `Model.fit` as well, it will alternate between training and evaluation for each epoch. The evaluation work is distributed across the same set of workers, and its results are aggregated and available to all workers.\n",
         "\n",
         "Similar to training, the validation dataset is automatically sharded at the file level. You need to set a global batch size in the validation dataset and set the `validation_steps`.\n",
         "\n",
-        "A repeated dataset is also recommended for evaluation.\n",
+        "A repeated dataset (by calling `tf.data.Dataset.repeat`) is recommended for evaluation.\n",
         "\n",
-        "Alternatively, you can also create another task that periodically reads checkpoints and runs the evaluation. This is what Estimator does. But this is not a recommended way to perform evaluation and thus its details are omitted."
+        "Alternatively, you can also create another task that periodically reads checkpoints and runs the evaluation. This is what an Estimator does. But this is not a recommended way to perform evaluation and thus its details are omitted."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "FNkoxUPJBNTb"
+      },
+      "source": [
+        "### Performance"
       ]
     },
     {
@@ -847,25 +881,21 @@
         "id": "XVk4ftYx6JAO"
       },
       "source": [
-        "### Performance\n",
-        "\n",
-        "You now have a Keras model that is all set up to run in multiple workers with the `MultiWorkerMirroredStrategy`.\n",
-        "\n",
-        "To tweak performance of multi-worker training, you can try the following:\n",
+        "To tweak the performance of multi-worker training, you can try the following:\n",
         "\n",
         "- `tf.distribute.MultiWorkerMirroredStrategy` provides multiple [collective communication implementations](https://www.tensorflow.org/api_docs/python/tf/distribute/experimental/CommunicationImplementation):\n",
         "    - `RING` implements ring-based collectives using gRPC as the cross-host communication layer.\n",
         "    - `NCCL` uses the [NVIDIA Collective Communication Library](https://developer.nvidia.com/nccl) to implement collectives.\n",
         "    -  `AUTO` defers the choice to the runtime.\n",
         "    \n",
-        "    The best choice of collective implementation depends upon the number of GPUs, the type of GPUs, and the network interconnect in the cluster. To override the automatic choice, specify the `communication_options` parameter of `MultiWorkerMirroredStrategy`'s constructor. For example:\n",
+        "    The best choice of collective implementation depends upon the number of GPUs, the type of GPUs, and the network interconnects in the cluster. To override the automatic choice, specify the `communication_options` parameter of `MultiWorkerMirroredStrategy`'s constructor. For example:\n",
         "    \n",
         "    ```python\n",
-        "    communication_options=tf.distribute.experimental.CommunicationOptions(implementation=tf.distribute.experimental.CollectiveCommunication.NCCL)\n",
+        "    communication_options=tf.distribute.experimental.CommunicationOptions(implementation=tf.distribute.experimental.CommunicationImplementation.NCCL)\n",
         "    ```\n",
         "\n",
         "- Cast the variables to `tf.float` if possible:\n",
-        "    - The official ResNet model includes [an example](https://github.com/tensorflow/models/blob/8367cf6dabe11adf7628541706b660821f397dce/official/resnet/resnet_model.py#L466) of how this can be done."
+        "    - The official ResNet model includes [an example](https://github.com/tensorflow/models/blob/8367cf6dabe11adf7628541706b660821f397dce/official/resnet/resnet_model.py#L466) of how to do this."
       ]
     },
     {
@@ -882,7 +912,7 @@
         "\n",
         "When a worker becomes unavailable, other workers will fail (possibly after a timeout). In such cases, the unavailable worker needs to be restarted, as well as other workers that have failed.\n",
         "\n",
-        "Note: Previously, the `ModelCheckpoint` callback provided a mechanism to restore the training state upon a restart from a job failure for multi-worker training. The TensorFlow team are introducing a new [`BackupAndRestore`](#scrollTo=kmH8uCUhfn4w) callback, to also add the support to single worker training for a consistent experience, and removed fault tolerance functionality from existing `ModelCheckpoint` callback. From now on, applications that rely on this behavior should migrate to the new callback."
+        "Note: Previously, the `ModelCheckpoint` callback provided a mechanism to restore the training state upon a restart from a job failure for multi-worker training. The TensorFlow team is introducing a new [`BackupAndRestore`](#scrollTo=kmH8uCUhfn4w) callback, which also adds the support to single-worker training for a consistent experience, and removed the fault tolerance functionality from existing `ModelCheckpoint` callback. From now on, applications that rely on this behavior should migrate to the new `BackupAndRestore` callback."
       ]
     },
     {
@@ -891,13 +921,13 @@
         "id": "KvHPjGlyyFt6"
       },
       "source": [
-        "#### ModelCheckpoint callback\n",
+        "#### The `ModelCheckpoint` callback\n",
         "\n",
         "`ModelCheckpoint` callback no longer provides fault tolerance functionality, please use [`BackupAndRestore`](#scrollTo=kmH8uCUhfn4w) callback instead.\n",
         "\n",
         "The `ModelCheckpoint` callback can still be used to save checkpoints. But with this, if training was interrupted or successfully finished, in order to continue training from the checkpoint, the user is responsible to load the model manually.\n",
         "\n",
-        "Optionally the user can choose to save and restore model/weights outside `ModelCheckpoint` callback."
+        "Optionally, users can choose to save and restore model/weights outside `ModelCheckpoint` callback."
       ]
     },
     {
@@ -919,14 +949,14 @@
         "\n",
         "You should have some cleanup logic that deletes the temporary directories created by the workers once your training has completed.\n",
         "\n",
-        "The reason for saving on the chief and workers at the same time is because you might be aggregating variables during checkpointing which requires both the chief and workers to participate in the allreduce communication protocol. On the other hand, letting chief and workers save to the same model directory will result in errors due to contention.\n",
+        "The reason for saving on the chief and workers at the same time is because you might be aggregating variables during checkpointing, which requires both the chief and workers to participate in the allreduce communication protocol. On the other hand, letting chief and workers save to the same model directory will result in errors due to contention.\n",
         "\n",
-        "Using the `MultiWorkerMirroredStrategy`, the program is run on every worker, and in order to know whether the current worker is chief, it takes advantage of the cluster resolver object that has attributes `task_type` and `task_id`:\n",
-        "- `task_type` tells you what the current job is (e.g. `'worker'`).\n",
+        "Using the `MultiWorkerMirroredStrategy`, the program is run on every worker, and in order to know whether the current worker is the chief, it takes advantage of the cluster resolver object that has attributes `task_type` and `task_id`:\n",
+        "- `task_type` tells you what the current job is (for example, `'worker'`).\n",
         "- `task_id` tells you the identifier of the worker.\n",
         "- The worker with `task_id == 0` is designated as the chief worker.\n",
         "\n",
-        "In the code snippet below, the `write_filepath` function provides the file path to write, which depends on the the worker's `task_id`:\n",
+        "In the code snippet below, the `write_filepath` function provides the file path to write, which depends on the worker's `task_id`:\n",
         "\n",
         "- For the chief worker (with `task_id == 0`), it writes to the original file path. \n",
         "- For other workers, it creates a temporary directory—`temp_dir`—with the `task_id` in the directory path to write in:"
@@ -943,14 +973,14 @@
         "model_path = '/tmp/keras-model'\n",
         "\n",
         "def _is_chief(task_type, task_id):\n",
-        "  # Note: there are two possible `TF_CONFIG` configuration.\n",
+        "  # Note: there are two possible `TF_CONFIG` configurations.\n",
         "  #   1) In addition to `worker` tasks, a `chief` task type is use;\n",
         "  #      in this case, this function should be modified to\n",
         "  #      `return task_type == 'chief'`.\n",
         "  #   2) Only `worker` task type is used; in this case, worker 0 is\n",
         "  #      regarded as the chief. The implementation demonstrated here\n",
         "  #      is for this case.\n",
-        "  # For the purpose of this Colab section, the `task_type is None` case\n",
+        "  # For the purpose of this Colab section, the `task_type` is `None` case\n",
         "  # is added because it is effectively run with only a single worker.\n",
         "  return (task_type == 'worker' and task_id == 0) or task_type is None\n",
         "\n",
@@ -981,6 +1011,15 @@
         "With that, you're now ready to save:"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XnToxeIcg_6O"
+      },
+      "source": [
+        "Deprecated: For Keras objects, it's recommended to use the new high-level `.keras` format and `tf.keras.Model.export`, as demonstrated in the guide [here](https://www.tensorflow.org/guide/keras/save_and_serialize). The low-level SavedModel format continues to be supported for existing code."
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -998,7 +1037,7 @@
         "id": "8LXUVVl9_v5x"
       },
       "source": [
-        "As described above, later on the model should only be loaded from the path chief saved to, so let's remove the temporary ones the non-chief workers saved:"
+        "As described above, later on the model should only be loaded from the file path the chief worker saved to. Therefore, remove the temporary ones the non-chief workers have saved:"
       ]
     },
     {
@@ -1019,7 +1058,7 @@
         "id": "Nr-2PKlHAPBT"
       },
       "source": [
-        "Now, when it's time to load, let's use convenient `tf.keras.models.load_model` API, and continue with further work.\n",
+        "Now, when it's time to load, use the convenient `tf.keras.models.load_model` API, and continue with further work.\n",
         "\n",
         "Here, assume only using single worker to load and continue training, in which case you do not call `tf.keras.models.load_model` within another `strategy.scope()` (note that `strategy = tf.distribute.MultiWorkerMirroredStrategy()`, as defined earlier):"
       ]
@@ -1117,20 +1156,23 @@
         "id": "kmH8uCUhfn4w"
       },
       "source": [
-        "#### BackupAndRestore callback\n",
+        "#### The `BackupAndRestore` callback\n",
+        "\n",
+        "The `tf.keras.callbacks.BackupAndRestore` callback provides the fault tolerance functionality by backing up the model and current training state in a temporary checkpoint file under `backup_dir` argument to `BackupAndRestore`. \n",
         "\n",
-        "The `tf.keras.callbacks.experimental.BackupAndRestore` callback provides the fault tolerance functionality by backing up the model and current epoch number in a temporary checkpoint file under `backup_dir` argument to `BackupAndRestore`. This is done at the end of each epoch.\n",
+        "Note: In Tensorflow 2.9, the current model and the training state is backed up at epoch boundaries. In the `tf-nightly` version and from TensorFlow 2.10, the `BackupAndRestore` callback can back up the model and the training state at epoch or step boundaries. `BackupAndRestore` accepts an optional `save_freq` argument. `save_freq` accepts either `'epoch'` or an `int` value. If `save_freq` is set to `'epoch'` the model is backed up after every epoch. If `save_freq` is set to an integer value greater than `0`, the model is backed up after every `save_freq` number of batches.\n",
         "\n",
-        "Once jobs get interrupted and restart, the callback restores the last checkpoint, and training continues from the beginning of the interrupted epoch. Any partial training already done in the unfinished epoch before interruption will be thrown away, so that it doesn't affect the final model state.\n",
+        "Once the jobs get interrupted and restarted, the `BackupAndRestore` callback restores the last checkpoint, and you can continue training from the beginning of the epoch and step at which the training state was last saved.\n",
         "\n",
-        "To use it, provide an instance of `tf.keras.callbacks.experimental.BackupAndRestore` at the `Model.fit` call.\n",
+        "To use it, provide an instance of `tf.keras.callbacks.BackupAndRestore` at the `Model.fit` call.\n",
         "\n",
-        "With `MultiWorkerMirroredStrategy`, if a worker gets interrupted, the whole cluster pauses until the interrupted worker is restarted. Other workers will also restart, and the interrupted worker rejoins the cluster. Then, every worker reads the checkpoint file that was previously saved and picks up its former state, thereby allowing the cluster to get back in sync. Then, the training continues.\n",
+        "With `MultiWorkerMirroredStrategy`, if a worker gets interrupted, the whole cluster will pause until the interrupted worker is restarted. Other workers will also restart, and the interrupted worker will rejoin the cluster. Then, every worker will read the checkpoint file that was previously saved and pick up its former state, thereby allowing the cluster to get back in sync. Then, the training will continue. The distributed dataset iterator state will be re-initialized and not restored.\n",
         "\n",
         "The `BackupAndRestore` callback uses the `CheckpointManager` to save and restore the training state, which generates a file called checkpoint that tracks existing checkpoints together with the latest one. For this reason, `backup_dir` should not be re-used to store other checkpoints in order to avoid name collision.\n",
         "\n",
-        "Currently, the `BackupAndRestore` callback supports single worker with no strategy, MirroredStrategy, and multi-worker with MultiWorkerMirroredStrategy.\n",
-        "Below are two examples for both multi-worker training and single worker training."
+        "Currently, the `BackupAndRestore` callback supports single-worker training with no strategy—`MirroredStrategy`—and multi-worker training with `MultiWorkerMirroredStrategy`.\n",
+        "\n",
+        "Below are two examples for both multi-worker training and single-worker training:"
       ]
     },
     {
@@ -1141,12 +1183,73 @@
       },
       "outputs": [],
       "source": [
-        "# Multi-worker training with MultiWorkerMirroredStrategy\n",
-        "# and the BackupAndRestore callback.\n",
+        "# Multi-worker training with `MultiWorkerMirroredStrategy`\n",
+        "# and the `BackupAndRestore` callback. The training state \n",
+        "# is backed up at epoch boundaries by default.\n",
+        "\n",
+        "callbacks = [tf.keras.callbacks.BackupAndRestore(backup_dir='/tmp/backup')]\n",
+        "with strategy.scope():\n",
+        "  multi_worker_model = mnist_setup.build_and_compile_cnn_model()\n",
+        "multi_worker_model.fit(multi_worker_dataset,\n",
+        "                       epochs=3,\n",
+        "                       steps_per_epoch=70,\n",
+        "                       callbacks=callbacks)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "f8e86TAp0Rsl"
+      },
+      "source": [
+        "If the `save_freq` argument in the `BackupAndRestore` callback is set to `'epoch'`, the model is backed up after every epoch."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "rZjQGPsF0aEI"
+      },
+      "outputs": [],
+      "source": [
+        "# The training state is backed up at epoch boundaries because `save_freq` is\n",
+        "# set to `epoch`.\n",
+        "\n",
+        "callbacks = [tf.keras.callbacks.BackupAndRestore(backup_dir='/tmp/backup')]\n",
+        "with strategy.scope():\n",
+        "  multi_worker_model = mnist_setup.build_and_compile_cnn_model()\n",
+        "multi_worker_model.fit(multi_worker_dataset,\n",
+        "                       epochs=3,\n",
+        "                       steps_per_epoch=70,\n",
+        "                       callbacks=callbacks)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "p-r44kCM0jc6"
+      },
+      "source": [
+        "Note: The next code block uses features that are only available in `tf-nightly` until Tensorflow 2.10 is released.\n",
+        "\n",
+        "If the `save_freq` argument in the `BackupAndRestore` callback is set to an integer value greater than `0`, the model is backed up after every `save_freq` number of batches."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "bSJUyLSF0moC"
+      },
+      "outputs": [],
+      "source": [
+        "# The training state is backed up at every 30 steps because `save_freq` is set\n",
+        "# to an integer value of `30`.\n",
         "\n",
-        "callbacks = [tf.keras.callbacks.experimental.BackupAndRestore(backup_dir='/tmp/backup')]\n",
+        "callbacks = [tf.keras.callbacks.BackupAndRestore(backup_dir='/tmp/backup', save_freq=30)]\n",
         "with strategy.scope():\n",
-        "  multi_worker_model = mnist.build_and_compile_cnn_model()\n",
+        "  multi_worker_model = mnist_setup.build_and_compile_cnn_model()\n",
         "multi_worker_model.fit(multi_worker_dataset,\n",
         "                       epochs=3,\n",
         "                       steps_per_epoch=70,\n",
@@ -1161,7 +1264,7 @@
       "source": [
         "If you inspect the directory of `backup_dir` you specified in `BackupAndRestore`, you may notice some temporarily generated checkpoint files. Those files are needed for recovering the previously lost instances, and they will be removed by the library at the end of `Model.fit` upon successful exiting of your training.\n",
         "\n",
-        "Note: Currently the `BackupAndRestore` callback only supports eager mode. In graph mode, consider using [Save/Restore Model](#model_saving_and_loading) mentioned above, and by providing `initial_epoch` in `Model.fit`."
+        "Note: Currently the `BackupAndRestore` callback only supports eager mode. In graph mode, consider using `Model.save`/`tf.saved_model.save` and `tf.keras.models.load_model` for saving and restoring models, respectively, as described in the _Model saving and loading_ section above, and by providing `initial_epoch` in `Model.fit` during training."
       ]
     },
     {
@@ -1172,7 +1275,7 @@
       "source": [
         "## Additional resources\n",
         "\n",
-        "1. The [Distributed training in TensorFlow](https://www.tensorflow.org/guide/distributed_training) guide provides an overview of the available distribution strategies.\n",
+        "1. The [Distributed training in TensorFlow](../../guide/distributed_training.ipynb) guide provides an overview of the available distribution strategies.\n",
         "1. The [Custom training loop with Keras and MultiWorkerMirroredStrategy](multi_worker_with_ctl.ipynb) tutorial shows how to use the `MultiWorkerMirroredStrategy` with Keras and a custom training loop.\n",
         "1. Check out the [official models](https://github.com/tensorflow/models/tree/master/official), many of which can be configured to run multiple distribution strategies.\n",
         "1. The [Better performance with tf.function](../../guide/function.ipynb) guide provides information about other strategies and tools, such as the [TensorFlow Profiler](../../guide/profiler.md) you can use to optimize the performance of your TensorFlow models."
@@ -1181,9 +1284,8 @@
   ],
   "metadata": {
     "colab": {
-      "collapsed_sections": [],
       "name": "multi_worker_with_keras.ipynb",
-      "toc_visible": true
+            "toc_visible": true
     },
     "kernelspec": {
       "display_name": "Python 3",
diff --git a/site/en/tutorials/distribute/parameter_server_training.ipynb b/site/en/tutorials/distribute/parameter_server_training.ipynb
index fae0a2d3576..2e6bb0cfce2 100644
--- a/site/en/tutorials/distribute/parameter_server_training.ipynb
+++ b/site/en/tutorials/distribute/parameter_server_training.ipynb
@@ -74,7 +74,7 @@
         "\n",
         "A parameter server training cluster consists of _workers_ and _parameter servers_. Variables are created on parameter servers and they are read and updated by workers in each step. By default, workers read and update these variables independently without synchronizing with each other. This is why sometimes parameter server-style training is called _asynchronous training_.\n",
         "\n",
-        "In TensorFlow 2, parameter server training is powered by the `tf.distribute.experimental.ParameterServerStrategy` class, which distributes the training steps to a cluster that scales up to thousands of workers (accompanied by parameter servers)."
+        "In TensorFlow 2, parameter server training is powered by the `tf.distribute.ParameterServerStrategy` class, which distributes the training steps to a cluster that scales up to thousands of workers (accompanied by parameter servers)."
       ]
     },
     {
@@ -87,9 +87,9 @@
         "\n",
         "There are two main supported training methods:\n",
         "\n",
-        "- The Keras `Model.fit` API, which is recommended when you prefer a high-level abstraction and handling of training.\n",
-        "- A custom training loop (you can refer to [Custom training](https://www.tensorflow.org/tutorials/customization/custom_training_walkthrough#train_the_model), [Writing a training loop from scratch\n",
-        "](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch) and [Custom training loop with Keras and MultiWorkerMirroredStrategy](https://www.tensorflow.org/tutorials/distribute/multi_worker_with_ctl) for more details.) Custom loop training is recommended when you prefer to define the details of their training loop."
+        "- The Keras `Model.fit` API: if you prefer a high-level abstraction and handling of training. This is generally recommended if you are training a `tf.keras.Model`.\n",
+        "- A custom training loop: if you prefer to define the details of your training loop (you can refer to guides on [Custom training](../customization/custom_training_walkthrough.ipynb), [Writing a training loop from scratch\n",
+        "](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch) and [Custom training loop with Keras and MultiWorkerMirroredStrategy](multi_worker_with_ctl.ipynb) for more details)."
       ]
     },
     {
@@ -100,15 +100,15 @@
       "source": [
         "### A cluster with jobs and tasks\n",
         "\n",
-        "Regardless of the API of choice (`Model.fit` or a custom training loop), distributed training in TensorFlow 2 involves: a `'cluster'` with several `'jobs'`, and each of the jobs may have one or more `'tasks'`.\n",
+        "Regardless of the API of choice (`Model.fit` or a custom training loop), distributed training in TensorFlow 2 involves a `'cluster'` with several `'jobs'`, and each of the jobs may have one or more `'tasks'`.\n",
         "\n",
         "When using parameter server training, it is recommended to have:\n",
         "\n",
         "- One _coordinator_ job (which has the job name `chief`)\n",
-        "- Multiple _worker_ jobs (job name `worker`); and\n",
+        "- Multiple _worker_ jobs (job name `worker`)\n",
         "- Multiple _parameter server_ jobs (job name `ps`)\n",
         "\n",
-        "While the _coordinator_ creates resources, dispatches training tasks, writes checkpoints, and deals with task failures, _workers_ and _parameter servers_ run `tf.distribute.Server` that listen for requests from the coordinator."
+        "The _coordinator_ creates resources, dispatches training tasks, writes checkpoints, and deals with task failures. The _workers_ and _parameter servers_ run `tf.distribute.Server` instances that listen for requests from the coordinator."
       ]
     },
     {
@@ -117,10 +117,9 @@
         "id": "oLV1FbpLtqtB"
       },
       "source": [
-        "### Parameter server training with `Model.fit` API\n",
+        "### Parameter server training with the `Model.fit` API\n",
         "\n",
-        "Parameter server training with the `Model.fit` API requires the coordinator to use a `tf.distribute.experimental.ParameterServerStrategy` object, and a `tf.keras.utils.experimental.DatasetCreator` as the input. Similar to `Model.fit` usage with no strategy, or with other strategies, the workflow involves creating and compiling the model, preparing the callbacks, followed by\n",
-        "a `Model.fit` call."
+        "Parameter server training with the `Model.fit` API requires the coordinator to use a `tf.distribute.ParameterServerStrategy` object. Similar to `Model.fit` usage with no strategy, or with other strategies, the workflow involves creating and compiling the model, preparing the callbacks, and calling `Model.fit`."
       ]
     },
     {
@@ -131,12 +130,11 @@
       "source": [
         "### Parameter server training with a custom training loop\n",
         "\n",
-        "With custom training loops, the `tf.distribute.experimental.coordinator.ClusterCoordinator` class is the key component used for the coordinator.\n",
+        "With custom training loops, the `tf.distribute.coordinator.ClusterCoordinator` class is the key component used for the coordinator.\n",
         "\n",
-        "- The `ClusterCoordinator` class needs to work in conjunction with a `tf.distribute.Strategy` object.\n",
-        "- This `tf.distribute.Strategy` object is needed to provide the information of the cluster and is used to define a training step, as demonstrated in [Custom training with tf.distribute.Strategy](https://www.tensorflow.org/tutorials/distribute/custom_training#training_loop).\n",
+        "- The `ClusterCoordinator` class needs to work in conjunction with a `tf.distribute.ParameterServerStrategy` object.\n",
+        "- This `tf.distribute.Strategy` object is needed to provide the information of the cluster and is used to define a training step, as demonstrated in [Custom training with tf.distribute.Strategy](custom_training.ipynb).\n",
         "- The `ClusterCoordinator` object then dispatches the execution of these training steps to remote workers.\n",
-        "- For parameter server training, the `ClusterCoordinator` needs to work with a `tf.distribute.experimental.ParameterServerStrategy`.\n",
         "\n",
         "The most important API provided by the `ClusterCoordinator` object is `schedule`:\n",
         "\n",
@@ -144,7 +142,7 @@
         "- The queued functions will be dispatched to remote workers in background threads and their `RemoteValue`s will be filled asynchronously.\n",
         "- Since `schedule` doesn’t require worker assignment, the `tf.function` passed in can be executed on any available worker.\n",
         "- If the worker it is executed on becomes unavailable before its completion, the function will be retried on another available worker.\n",
-        "- Because of this fact and the fact that function execution is not atomic, a function may be executed more than once.\n",
+        "- Because of this fact and the fact that function execution is not atomic, a single function call may be executed more than once.\n",
         "\n",
         "In addition to dispatching remote functions, the `ClusterCoordinator` also helps\n",
         "to create datasets on all the workers and rebuild these datasets when a worker recovers from failure."
@@ -169,9 +167,7 @@
       },
       "outputs": [],
       "source": [
-        "!pip install portpicker\n",
-        "!pip uninstall tensorflow keras -y\n",
-        "!pip install tf-nightly"
+        "!pip install portpicker"
       ]
     },
     {
@@ -187,8 +183,7 @@
         "import os\n",
         "import random\n",
         "import portpicker\n",
-        "import tensorflow as tf\n",
-        "from tensorflow.keras.layers.experimental import preprocessing"
+        "import tensorflow as tf"
       ]
     },
     {
@@ -199,9 +194,9 @@
       "source": [
         "## Cluster setup\n",
         "\n",
-        "As mentioned above, a parameter server training cluster requires a coordinator task that runs your training program, one or several workers and parameter server tasks that run TensorFlow servers—`tf.distribute.Server`—and possibly an additional evaluation task that runs side-car evaluation (see the side-car evaluation section below). The requirements to set them up are:\n",
+        "As mentioned above, a parameter server training cluster requires a coordinator task that runs your training program, one or several workers and parameter server tasks that run TensorFlow servers—`tf.distribute.Server`—and possibly an additional evaluation task that runs sidecar evaluation (refer to the [sidecar evaluation section](#sidecar_evaluation) below). The requirements to set them up are:\n",
         "\n",
-        "- The coordinator task needs to know the addresses and ports of all other TensorFlow servers except the evaluator.\n",
+        "- The coordinator task needs to know the addresses and ports of all other TensorFlow servers, except the evaluator.\n",
         "- The workers and parameter servers need to know which port they need to listen to. For the sake of simplicity, you can usually pass in the complete cluster information when creating TensorFlow servers on these tasks.\n",
         "- The evaluator task doesn’t have to know the setup of the training cluster. If it does, it should not attempt to connect to the training cluster.\n",
         "- Workers and parameter servers should have task types as `\"worker\"` and `\"ps\"`, respectively. The coordinator should use `\"chief\"` as the task type for legacy reasons.\n",
@@ -217,7 +212,7 @@
       "source": [
         "### In-process cluster\n",
         "\n",
-        "You will start by creating several TensorFlow servers in advance and connect to them later. Note that this is only for the purpose of this tutorial's demonstration, and in real training the servers will be started on `\"worker\"` and `\"ps\"` machines."
+        "You will start by creating several TensorFlow servers in advance and you will connect to them later. Note that this is only for the purpose of this tutorial's demonstration, and in real training the servers will be started on `\"worker\"` and `\"ps\"` machines."
       ]
     },
     {
@@ -279,9 +274,9 @@
         "id": "pX_91OByt0J2"
       },
       "source": [
-        "The in-process cluster setup is frequently used in unit testing, such as [here](https://github.com/tensorflow/tensorflow/blob/7621d31921c2ed979f212da066631ddfda37adf5/tensorflow/python/distribute/coordinator/cluster_coordinator_test.py#L437).\n",
+        "The in-process cluster setup is frequently used in unit testing, such as [here](https://github.com/tensorflow/tensorflow/blob/eb4c40fc91da260199fa2aed6fe67d36ad49fafd/tensorflow/python/distribute/coordinator/cluster_coordinator_test.py#L447).\n",
         "\n",
-        "Another option for local testing is to launch processes on the local machine—check out [Multi-worker training with Keras](https://www.tensorflow.org/tutorials/distribute/multi_worker_with_keras) for an example of this approach."
+        "Another option for local testing is to launch processes on the local machine—check out [Multi-worker training with Keras](multi_worker_with_keras.ipynb) for an example of this approach."
       ]
     },
     {
@@ -292,7 +287,7 @@
       "source": [
         "## Instantiate a ParameterServerStrategy\n",
         "\n",
-        "Before you dive into the training code, let's instantiate a `ParameterServerStrategy` object. Note that this is needed regardless of whether you are proceeding with `Model.fit` or a custom training loop. The `variable_partitioner` argument will be explained in the [Variable sharding section](#variable-sharding)."
+        "Before you dive into the training code, let's instantiate a `tf.distribute.ParameterServerStrategy` object. Note that this is needed regardless of whether you are proceeding with `Model.fit` or a custom training loop. The `variable_partitioner` argument will be explained in the [Variable sharding section](#variable_sharding)."
       ]
     },
     {
@@ -308,7 +303,7 @@
         "        min_shard_bytes=(256 << 10),\n",
         "        max_shards=NUM_PS))\n",
         "\n",
-        "strategy = tf.distribute.experimental.ParameterServerStrategy(\n",
+        "strategy = tf.distribute.ParameterServerStrategy(\n",
         "    cluster_resolver,\n",
         "    variable_partitioner=variable_partitioner)"
       ]
@@ -331,7 +326,8 @@
         "### Variable sharding\n",
         "\n",
         "Variable sharding refers to splitting a variable into multiple smaller\n",
-        "variables, which are called _shards_. Variable sharding may be useful to distribute the network load when accessing these shards. It is also useful to distribute computation and storage of a normal variable across multiple parameter servers.\n",
+        "variables, which are called _shards_. Variable sharding may be useful to distribute the network load when accessing these shards. It is also useful to distribute computation and storage of a normal variable across multiple parameter servers, for example, when using very large embeddings\n",
+        "that may not fit in a single machine's memory.\n",
         "\n",
         "To enable variable sharding, you can pass in a `variable_partitioner` when\n",
         "constructing a `ParameterServerStrategy` object. The `variable_partitioner` will\n",
@@ -340,7 +336,7 @@
         "`variable_partitioner`s are provided such as\n",
         "`tf.distribute.experimental.partitioners.MinSizePartitioner`. It is recommended to use size-based partitioners like\n",
         "`tf.distribute.experimental.partitioners.MinSizePartitioner` to avoid\n",
-        "partitioning small variables, which could have negative impact on model training\n",
+        "partitioning small variables, which could have a negative impact on model training\n",
         "speed."
       ]
     },
@@ -350,16 +346,16 @@
         "id": "1--SxlxtsOb7"
       },
       "source": [
-        "When a `variable_partitioner` is passed in and if you create a variable directly\n",
-        "under `strategy.scope()`, it will become a container type with a `variables`\n",
-        "property which provides access to the list of shards. In most cases, this\n",
+        "When a `variable_partitioner` is passed in, and you create a variable directly\n",
+        "under `Strategy.scope`, the variable will become a container type with a `variables`\n",
+        "property, which provides access to the list of shards. In most cases, this\n",
         "container will be automatically converted to a Tensor by concatenating all the\n",
         "shards. As a result, it can be used as a normal variable. On the other hand,\n",
         "some TensorFlow methods such as `tf.nn.embedding_lookup` provide efficient\n",
         "implementation for this container type and in these methods automatic\n",
         "concatenation will be avoided.\n",
         "\n",
-        "Please see the API docs of `tf.distribute.experimental.ParameterServerStrategy` for more details."
+        "Refer to the API docs of `tf.distribute.ParameterServerStrategy` for more details."
       ]
     },
     {
@@ -371,7 +367,7 @@
         "## Training with `Model.fit`\n",
         "<a id=\"training_with_modelfit\"></a>\n",
         "\n",
-        "Keras provides an easy-to-use training API via `Model.fit` that handles the training loop under the hood, with the flexibility of overridable `train_step`, and callbacks, which provide functionalities such as checkpoint saving or summary saving for TensorBoard. With `Model.fit`, the same training code can be used for other strategies with a simple swap of the strategy object."
+        "Keras provides an easy-to-use training API via `Model.fit` that handles the training loop under the hood, with the flexibility of an overridable `train_step`, and callbacks which provide functionalities such as checkpoint saving or summary saving for TensorBoard. With `Model.fit`, the same training code can be used with other strategies with a simple swap of the strategy object."
       ]
     },
     {
@@ -382,12 +378,14 @@
       "source": [
         "### Input data\n",
         "\n",
-        "`Model.fit` with parameter server training requires that the input data be\n",
-        "provided in a callable that takes a single argument of type `tf.distribute.InputContext`, and returns a `tf.data.Dataset`. Then, create a `tf.keras.utils.experimental.DatasetCreator` object that takes such `callable`, and an optional `tf.distribute.InputOptions` object via `input_options` argument.\n",
+        "Keras `Model.fit` with `tf.distribute.ParameterServerStrategy` can take input data in the form of a `tf.data.Dataset`, `tf.distribute.DistributedDataset`, or a `tf.keras.utils.experimental.DatasetCreator`, with `Dataset` being the recommended option for ease of use. If you encounter memory issues using `Dataset`, however, you may need to use `DatasetCreator` with a callable `dataset_fn` argument (refer to the `tf.keras.utils.experimental.DatasetCreator` API documentation for details).\n",
         "\n",
-        "Note that it is recommended to shuffle and repeat the data with parameter server training, and specify `steps_per_epoch` in `fit` call so the library knows the epoch boundaries.\n",
+        "If you transform your dataset into a `tf.data.Dataset`, you should use `Dataset.shuffle` and `Dataset.repeat`, as demonstrated in the code example below.\n",
         "\n",
-        "Please see the [Distributed input](https://www.tensorflow.org/tutorials/distribute/input#usage_2) tutorial for more information about the `InputContext` argument."
+        "- Keras `Model.fit` with parameter server training assumes that each worker receives the same dataset, except when it is shuffled differently. Therefore, by calling `Dataset.shuffle`, you ensure more even iterations over the data.\n",
+        "- Because workers do not synchronize, they may finish processing their datasets at different times. Therefore, the easiest way to define epochs with parameter server training is to use `Dataset.repeat`—which repeats a dataset indefinitely when called without an argument—and specify the `steps_per_epoch` argument in the `Model.fit` call.\n",
+        "\n",
+        "Refer to the \"Training workflows\" section of the [tf.data guide](../../guide/data.ipynb) for more details on `shuffle` and `repeat`."
       ]
     },
     {
@@ -398,23 +396,14 @@
       },
       "outputs": [],
       "source": [
-        "def dataset_fn(input_context):\n",
-        "  global_batch_size = 64\n",
-        "  batch_size = input_context.get_per_replica_batch_size(global_batch_size)\n",
-        "\n",
-        "  x = tf.random.uniform((10, 10))\n",
-        "  y = tf.random.uniform((10,))\n",
-        "\n",
-        "  dataset = tf.data.Dataset.from_tensor_slices((x, y)).shuffle(10).repeat()\n",
-        "  dataset = dataset.shard(\n",
-        "      input_context.num_input_pipelines,\n",
-        "      input_context.input_pipeline_id)\n",
-        "  dataset = dataset.batch(batch_size)\n",
-        "  dataset = dataset.prefetch(2)\n",
+        "global_batch_size = 64\n",
         "\n",
-        "  return dataset\n",
+        "x = tf.random.uniform((10, 10))\n",
+        "y = tf.random.uniform((10,))\n",
         "\n",
-        "dc = tf.keras.utils.experimental.DatasetCreator(dataset_fn)"
+        "dataset = tf.data.Dataset.from_tensor_slices((x, y)).shuffle(10).repeat()\n",
+        "dataset = dataset.batch(global_batch_size)\n",
+        "dataset = dataset.prefetch(2)"
       ]
     },
     {
@@ -423,11 +412,18 @@
         "id": "v_jhF70K7zON"
       },
       "source": [
-        "The code in `dataset_fn` will be invoked on the input device, which is usually the CPU, on each of the worker machines.\n",
-        "\n",
+        "If you instead create your dataset with `tf.keras.utils.experimental.DatasetCreator`, the code in `dataset_fn` will be invoked on the input device, which is usually the CPU, on each of the worker machines.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "w60PuWrWwBD4"
+      },
+      "source": [
         "### Model construction and compiling\n",
         "\n",
-        "Now, you will create a `tf.keras.Model`—a trivial `tf.keras.models.Sequential` model for demonstration purposes—followed by a `Model.compile` call to incorporate components, such as an optimizer, metrics, or parameters such as `steps_per_execution`:"
+        "Now, you will create a `tf.keras.Model`—a trivial `tf.keras.models.Sequential` model for demonstration purposes—followed by a `Model.compile` call to incorporate components, such as an optimizer, metrics, and other parameters such as `steps_per_execution`:"
       ]
     },
     {
@@ -441,7 +437,7 @@
         "with strategy.scope():\n",
         "  model = tf.keras.models.Sequential([tf.keras.layers.Dense(10)])\n",
         "\n",
-        "model.compile(tf.keras.optimizers.SGD(), loss='mse', steps_per_execution=10)"
+        "  model.compile(tf.keras.optimizers.legacy.SGD(), loss=\"mse\", steps_per_execution=10)"
       ]
     },
     {
@@ -454,13 +450,13 @@
         "\n",
         "<a id=\"callbacks-and-training\"> </a>\n",
         "\n",
-        "Before you call `model.fit` for the actual training, let's prepare the needed callbacks for common tasks, such as:\n",
+        "Before you call Keras `Model.fit` for the actual training, prepare any needed [callbacks](https://www.tensorflow.org/guide/keras/train_and_evaluate) for common tasks, such as:\n",
         "\n",
-        "- `ModelCheckpoint`: to save the model weights.\n",
-        "- `BackupAndRestore`: to make sure the training progress is automatically backed up, and recovered if the cluster experiences unavailability (such as abort or preemption); or\n",
-        "- `TensorBoard`: to save the progress reports into summary files, which get visualized in TensorBoard tool.\n",
+        "- `tf.keras.callbacks.ModelCheckpoint`: saves the model at a certain frequency, such as after every epoch.\n",
+        "- `tf.keras.callbacks.BackupAndRestore`: provides fault tolerance by backing up the model and current epoch number, if the cluster experiences unavailability (such as abort or preemption). You can then restore the training state upon a restart from a job failure, and continue training from the beginning of the interrupted epoch.\n",
+        "- `tf.keras.callbacks.TensorBoard`: periodically writes model logs in summary files that can be visualized in the TensorBoard tool.\n",
         "\n",
-        "Note: Due to performance consideration, custom callbacks cannot have batch level callbacks overridden when used with `ParameterServerStrategy`. Please modify your custom callbacks to make them epoch level calls, and adjust `steps_per_epoch` to a suitable value. In addition, `steps_per_epoch` is a required argument for `Model.fit` when used with `ParameterServerStrategy`."
+        "Note: Due to performance considerations, custom callbacks cannot have batch level callbacks overridden when used with `ParameterServerStrategy`. Please modify your custom callbacks to make them epoch level calls, and adjust `steps_per_epoch` to a suitable value. In addition, `steps_per_epoch` is a required argument for `Model.fit` when used with `ParameterServerStrategy`."
       ]
     },
     {
@@ -471,18 +467,18 @@
       },
       "outputs": [],
       "source": [
-        "working_dir = '/tmp/my_working_dir'\n",
-        "log_dir = os.path.join(working_dir, 'log')\n",
-        "ckpt_filepath = os.path.join(working_dir, 'ckpt')\n",
-        "backup_dir = os.path.join(working_dir, 'backup')\n",
+        "working_dir = \"/tmp/my_working_dir\"\n",
+        "log_dir = os.path.join(working_dir, \"log\")\n",
+        "ckpt_filepath = os.path.join(working_dir, \"ckpt\")\n",
+        "backup_dir = os.path.join(working_dir, \"backup\")\n",
         "\n",
         "callbacks = [\n",
         "    tf.keras.callbacks.TensorBoard(log_dir=log_dir),\n",
         "    tf.keras.callbacks.ModelCheckpoint(filepath=ckpt_filepath),\n",
-        "    tf.keras.callbacks.experimental.BackupAndRestore(backup_dir=backup_dir),\n",
+        "    tf.keras.callbacks.BackupAndRestore(backup_dir=backup_dir),\n",
         "]\n",
         "\n",
-        "model.fit(dc, epochs=5, steps_per_epoch=20, callbacks=callbacks)"
+        "model.fit(dataset, epochs=5, steps_per_epoch=20, callbacks=callbacks)"
       ]
     },
     {
@@ -493,7 +489,7 @@
       "source": [
         "### Direct usage with `ClusterCoordinator` (optional)\n",
         "\n",
-        "Even if you choose the `Model.fit` training path, you can optionally instantiate a `tf.distribute.experimental.coordinator.ClusterCoordinator` object to schedule other functions you would like to be executed on the workers. See the [Training with a custom training loop](#training_with_custom_training_loop) section for more details and examples."
+        "Even if you choose the `Model.fit` training path, you can optionally instantiate a `tf.distribute.coordinator.ClusterCoordinator` object to schedule other functions you would like to be executed on the workers. Refer to the [Training with a custom training loop](#training_with_custom_training_loop) section for more details and examples."
       ]
     },
     {
@@ -506,11 +502,11 @@
         "\n",
         "<a id=\"training_with_custom_training_loop\"> </a>\n",
         "\n",
-        "Using custom training loops with `tf.distribute.Strategy` provides great flexibility to define training loops. With the `ParameterServerStrategy` defined above (as `strategy`), you will use a `tf.distribute.experimental.coordinator.ClusterCoordinator` to dispatch the execution of training steps to remote workers.\n",
+        "Using custom training loops with `tf.distribute.Strategy` provides great flexibility to define training loops. With the `ParameterServerStrategy` defined above (as `strategy`), you will use a `tf.distribute.coordinator.ClusterCoordinator` to dispatch the execution of training steps to remote workers.\n",
         "\n",
-        "Then, you will create a model, define a dataset and a step function, as you have done in the training loop with other `tf.distribute.Strategy`s. You can find more details in the [Custom training with tf.distribute.Strategy](https://www.tensorflow.org/tutorials/distribute/custom_training) tutorial.\n",
+        "Then, you will create a model, define a dataset, and define a step function, as you have done in the training loop with other `tf.distribute.Strategy`s. You can find more details in the [Custom training with tf.distribute.Strategy](custom_training.ipynb) tutorial.\n",
         "\n",
-        "To ensure efficient dataset prefetching, use the recommended distributed dataset creation APIs mentioned in the [Dispatch training steps to remote workers](https://www.tensorflow.org/tutorials/distribute/parameter_server_training#dispatch_training_steps_to_remote_workers) section below. Also, make sure to call `Strategy.run` inside `worker_fn` to take full advantage of GPUs allocated to workers. The rest of the steps are the same for training with or without GPUs.\n",
+        "To ensure efficient dataset prefetching, use the recommended distributed dataset creation APIs mentioned in the [Dispatch training steps to remote workers](#dispatch_training_steps_to_remote_workers) section below. Also, make sure to call `Strategy.run` inside `worker_fn` to take full advantage of GPUs allocated to workers. The rest of the steps are the same for training with or without GPUs.\n",
         "\n",
         "Let’s create these components in the following steps:\n"
       ]
@@ -523,11 +519,13 @@
       "source": [
         "### Set up the data\n",
         "\n",
-        "First, write a function that creates a dataset that includes preprocessing logic implemented by [Keras preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers).\n",
+        "First, write a function that creates a dataset.\n",
+        "\n",
+        "If you would like to preprocess the data with [Keras preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers) or [Tensorflow Transform layers](https://www.tensorflow.org/tfx/tutorials/transform/simple), create these layers **outside the `dataset_fn`** and **under `Strategy.scope`**, like you would do for any other Keras layers. This is because the `dataset_fn` will be wrapped into a `tf.function` and then executed on each worker to generate the data pipeline.\n",
         "\n",
-        "You will create these layers outside the `dataset_fn` but apply the transformation inside the `dataset_fn`, since you will wrap the `dataset_fn` into a `tf.function`, which doesn't allow variables to be created inside it.\n",
+        "If you don't follow the above procedure, creating the layers might create Tensorflow states which will be lifted out of the `tf.function` to the coordinator. Thus, accessing them on workers would incur repetitive RPC calls between coordinator and workers, and cause significant slowdown.\n",
         "\n",
-        "Note: There is a known performance implication when using lookup table resources, which layers, such as `tf.keras.layers.experimental.preprocessing.StringLookup`, employ. Refer to the [Known limitations](#known_limitations) section for more information."
+        "Placing the layers under `Strategy.scope` will instead create them on all workers. Then, you will apply the transformation inside the `dataset_fn` via `tf.data.Dataset.map`. Refer to _Data preprocessing_ in the [Distributed input](input.ipynb) tutorial for more information on data preprocessing with distributed input."
       ]
     },
     {
@@ -544,10 +542,10 @@
         "label_vocab = [\"yes\", \"no\"]\n",
         "\n",
         "with strategy.scope():\n",
-        "  feature_lookup_layer = preprocessing.StringLookup(\n",
+        "  feature_lookup_layer = tf.keras.layers.StringLookup(\n",
         "      vocabulary=feature_vocab,\n",
         "      mask_token=None)\n",
-        "  label_lookup_layer = preprocessing.StringLookup(\n",
+        "  label_lookup_layer = tf.keras.layers.StringLookup(\n",
         "      vocabulary=label_vocab,\n",
         "      num_oov_indices=0,\n",
         "      mask_token=None)\n",
@@ -637,7 +635,7 @@
       "source": [
         "### Build the model\n",
         "\n",
-        "Next, create the model and other objects. Make sure to create all variables under `strategy.scope`."
+        "Next, create the model and other objects. Make sure to create all variables under `Strategy.scope`."
       ]
     },
     {
@@ -648,7 +646,7 @@
       },
       "outputs": [],
       "source": [
-        "# These variables created under the `strategy.scope` will be placed on parameter\n",
+        "# These variables created under the `Strategy.scope` will be placed on parameter\n",
         "# servers in a round-robin fashion.\n",
         "with strategy.scope():\n",
         "  # Create the model. The input needs to be compatible with Keras processing layers.\n",
@@ -658,10 +656,13 @@
         "  emb_layer = tf.keras.layers.Embedding(\n",
         "      input_dim=len(feature_lookup_layer.get_vocabulary()), output_dim=16384)\n",
         "  emb_output = tf.reduce_mean(emb_layer(model_input), axis=1)\n",
-        "  dense_output = tf.keras.layers.Dense(units=1, activation=\"sigmoid\")(emb_output)\n",
+        "  dense_output = tf.keras.layers.Dense(\n",
+        "      units=1, activation=\"sigmoid\",\n",
+        "      kernel_regularizer=tf.keras.regularizers.L2(1e-4),\n",
+        "  )(emb_output)\n",
         "  model = tf.keras.Model({\"features\": model_input}, dense_output)\n",
         "\n",
-        "  optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.1)\n",
+        "  optimizer = tf.keras.optimizers.legacy.RMSprop(learning_rate=0.1)\n",
         "  accuracy = tf.keras.metrics.Accuracy()"
       ]
     },
@@ -671,7 +672,7 @@
         "id": "iyuxiqCQU50m"
       },
       "source": [
-        "Let's confirm that the use of `FixedShardsPartitioner` split all variables into two shards and each shard was assigned to different parameter servers:"
+        "Let's confirm that the use of `FixedShardsPartitioner` split all variables into two shards and that each shard was assigned to a different parameter server:"
       ]
     },
     {
@@ -685,8 +686,9 @@
         "assert len(emb_layer.weights) == 2\n",
         "assert emb_layer.weights[0].shape == (4, 16384)\n",
         "assert emb_layer.weights[1].shape == (4, 16384)\n",
-        "assert emb_layer.weights[0].device == \"/job:ps/replica:0/task:0/device:CPU:0\"\n",
-        "assert emb_layer.weights[1].device == \"/job:ps/replica:0/task:1/device:CPU:0\""
+        "\n",
+        "print(emb_layer.weights[0].device)\n",
+        "print(emb_layer.weights[1].device)\n"
       ]
     },
     {
@@ -714,9 +716,12 @@
         "    with tf.GradientTape() as tape:\n",
         "      pred = model(batch_data, training=True)\n",
         "      per_example_loss = tf.keras.losses.BinaryCrossentropy(\n",
-        "              reduction=tf.keras.losses.Reduction.NONE)(labels, pred)\n",
+        "          reduction=tf.keras.losses.Reduction.NONE)(labels, pred)\n",
         "      loss = tf.nn.compute_average_loss(per_example_loss)\n",
-        "      gradients = tape.gradient(loss, model.trainable_variables)\n",
+        "      model_losses = model.losses\n",
+        "      if model_losses:\n",
+        "        loss += tf.nn.scale_regularization_loss(tf.add_n(model_losses))\n",
+        "    gradients = tape.gradient(loss, model.trainable_variables)\n",
         "\n",
         "    optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n",
         "\n",
@@ -735,7 +740,7 @@
         "id": "rvrYQUeYiLNy"
       },
       "source": [
-        "In the above training step function, calling `Strategy.run` and `Strategy.reduce` in the `step_fn` can support multiple GPUs per worker. If the workers have GPUs allocated, `Strategy.run` will distribute the datasets on multiple replicas.\n"
+        "In the above training step function, calling `Strategy.run` and `Strategy.reduce` in the `step_fn` can support multiple GPUs per worker. If the workers have GPUs allocated, `Strategy.run` will distribute the datasets on multiple replicas (GPUs). Their parallel calls to `tf.nn.compute_average_loss()` compute the average of the loss across the replicas (GPUs) of one worker, independent of the total number of workers."
       ]
     },
     {
@@ -747,7 +752,7 @@
         "### Dispatch training steps to remote workers\n",
         "<a id=\"dispatch_training_steps_to_remote_workers\"> </a>\n",
         "\n",
-        "After all the computations are defined by `ParameterServerStrategy`, you will use the `tf.distribute.experimental.coordinator.ClusterCoordinator` class to create resources and distribute the training steps to remote workers.\n",
+        "After all the computations are defined by `ParameterServerStrategy`, you will use the `tf.distribute.coordinator.ClusterCoordinator` class to create resources and distribute the training steps to remote workers.\n",
         "\n",
         "Let’s first create a `ClusterCoordinator` object and pass in the strategy object:"
       ]
@@ -760,7 +765,7 @@
       },
       "outputs": [],
       "source": [
-        "coordinator = tf.distribute.experimental.coordinator.ClusterCoordinator(strategy)"
+        "coordinator = tf.distribute.coordinator.ClusterCoordinator(strategy)"
       ]
     },
     {
@@ -769,7 +774,7 @@
         "id": "-xRIgKxciOSe"
       },
       "source": [
-        "Then, create a per-worker dataset and an iterator. In the `per_worker_dataset_fn` below, wrapping the `dataset_fn` into `strategy.distribute_datasets_from_function` is recommended to allow efficient prefetching to GPUs seamlessly."
+        "Then, create a per-worker dataset and an iterator using the `ClusterCoordinator.create_per_worker_dataset` API, which replicates the dataset to all workers. In the `per_worker_dataset_fn` below, wrapping the `dataset_fn` into `strategy.distribute_datasets_from_function` is recommended to allow efficient prefetching to GPUs seamlessly."
       ]
     },
     {
@@ -808,15 +813,15 @@
       },
       "outputs": [],
       "source": [
-        "num_epoches = 4\n",
+        "num_epochs = 4\n",
         "steps_per_epoch = 5\n",
-        "for i in range(num_epoches):\n",
+        "for i in range(num_epochs):\n",
         "  accuracy.reset_states()\n",
         "  for _ in range(steps_per_epoch):\n",
         "    coordinator.schedule(step_fn, args=(per_worker_iterator,))\n",
         "  # Wait at epoch boundaries.\n",
         "  coordinator.join()\n",
-        "  print (\"Finished epoch %d, accuracy is %f.\" % (i, accuracy.result().numpy()))"
+        "  print(\"Finished epoch %d, accuracy is %f.\" % (i, accuracy.result().numpy()))"
       ]
     },
     {
@@ -837,7 +842,7 @@
       "outputs": [],
       "source": [
         "loss = coordinator.schedule(step_fn, args=(per_worker_iterator,))\n",
-        "print (\"Final loss is %f\" % loss.fetch())"
+        "print(\"Final loss is %f\" % loss.fetch())"
       ]
     },
     {
@@ -857,7 +862,7 @@
         "  # Do something like logging metrics or writing checkpoints.\n",
         "```\n",
         "\n",
-        "For the complete training and serving workflow for this particular example, please check out this [test](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/distribute/parameter_server_training_test.py).\n"
+        "For the complete training and serving workflow for this particular example, please check out this [test](https://github.com/keras-team/keras/blob/master/keras/integration_test/parameter_server_keras_preprocessing_test.py).\n"
       ]
     },
     {
@@ -868,11 +873,11 @@
       "source": [
         "### More about dataset creation\n",
         "\n",
-        "The dataset in the above code is created using the `ClusterCoordinator.create_per_worker_dataset` API). It creates one dataset per worker and returns a container object. You can call the `iter` method on it to create a per-worker iterator. The per-worker iterator contains one iterator per worker and the corresponding slice of a worker will be substituted in the input argument of the function passed to the `ClusterCoordinator.schedule` method before the function is executed on a particular worker.\n",
+        "The dataset in the above code is created using the `ClusterCoordinator.create_per_worker_dataset` API. It creates one dataset per worker and returns a container object. You can call the `iter` method on it to create a per-worker iterator. The per-worker iterator contains one iterator per worker and the corresponding slice of a worker will be substituted in the input argument of the function passed to the `ClusterCoordinator.schedule` method before the function is executed on a particular worker.\n",
         "\n",
-        "Currently, the `ClusterCoordinator.schedule` method assumes workers are equivalent and thus assumes the datasets on different workers are the same except they may be shuffled differently if they contain a `Dataset.shuffle` operation. Because of this, it is also recommended that the datasets to be repeated indefinitely and you schedule a finite number of steps instead of relying on the `OutOfRangeError` from a dataset.\n",
+        "The `ClusterCoordinator.schedule` method assumes workers are equivalent and thus assumes the datasets on different workers are the same (except that they may be shuffled differently). Because of this, it is also recommended to repeat datasets, and schedule a finite number of steps instead of relying on receiving an `OutOfRangeError` from a dataset.\n",
         "\n",
-        "Another important note is that `tf.data` datasets don’t support implicit serialization and deserialization across task boundaries. So it is important to create the whole dataset inside the function passed to `ClusterCoordinator.create_per_worker_dataset`."
+        "Another important note is that `tf.data` datasets don’t support implicit serialization and deserialization across task boundaries. So it is important to create the whole dataset inside the function passed to `ClusterCoordinator.create_per_worker_dataset`. The `create_per_worker_dataset` API can also directly take a `tf.data.Dataset` or `tf.distribute.DistributedDataset` as input."
       ]
     },
     {
@@ -883,7 +888,7 @@
       "source": [
         "## Evaluation\n",
         "\n",
-        "There is more than one way to define and run an evaluation loop in distributed training. Each has its own pros and cons as described below. The inline evaluation method is recommended if you don't have a preference."
+        "The two main approaches to performing evaluation with `tf.distribute.ParameterServerStrategy` training are inline evaluation and sidecar evaluation. Each has its own pros and cons as described below. The inline evaluation method is recommended if you don't have a preference. For users using `Model.fit`, `Model.evaluate` uses inline (distributed) evaluation under the hood."
       ]
     },
     {
@@ -894,12 +899,12 @@
       "source": [
         "### Inline evaluation\n",
         "\n",
-        "In this method, the coordinator alternates between training and evaluation and thus it is called it _inline evaluation_.\n",
+        "In this method, the coordinator alternates between training and evaluation, and thus it is called _inline evaluation_.\n",
         "\n",
         "There are several benefits of inline evaluation. For example:\n",
         "\n",
         "- It can support large evaluation models and evaluation datasets that a single task cannot hold.\n",
-        "- The evaluation results can be used to make decisions for training the next epoch.\n",
+        "- The evaluation results can be used to make decisions for training the next epoch, for example, whether to stop training early.\n",
         "\n",
         "There are two ways to implement inline evaluation: direct evaluation and distributed evaluation.\n",
         "\n",
@@ -915,7 +920,7 @@
       "outputs": [],
       "source": [
         "eval_dataset = tf.data.Dataset.from_tensor_slices(\n",
-        "      feature_and_label_gen(num_examples=16)).map(\n",
+        "    feature_and_label_gen(num_examples=16)).map(\n",
         "          lambda x: (\n",
         "              {\"features\": feature_preprocess_stage(x[\"features\"])},\n",
         "              label_preprocess_stage(x[\"label\"])\n",
@@ -928,7 +933,7 @@
         "  actual_pred = tf.cast(tf.greater(pred, 0.5), tf.int64)\n",
         "  eval_accuracy.update_state(labels, actual_pred)\n",
         "\n",
-        "print (\"Evaluation accuracy: %f\" % eval_accuracy.result())"
+        "print(\"Evaluation accuracy: %f\" % eval_accuracy.result())"
       ]
     },
     {
@@ -976,7 +981,7 @@
         "for _ in range(eval_steps_per_epoch):\n",
         "  coordinator.schedule(eval_step, args=(per_worker_eval_iterator,))\n",
         "coordinator.join()\n",
-        "print (\"Evaluation accuracy: %f\" % eval_accuracy.result())"
+        "print(\"Evaluation accuracy: %f\" % eval_accuracy.result())"
       ]
     },
     {
@@ -985,7 +990,23 @@
         "id": "cKrQktZX5z7a"
       },
       "source": [
-        "Note: Currently, the `schedule` and `join` methods of `tf.distribute.experimental.coordinator.ClusterCoordinator` don’t support visitation guarantee or exactly-once semantics. In other words, there is no guarantee that all evaluation examples in a dataset will be evaluated exactly once; some may not be visited and some may be evaluated multiple times. Visitation guarantee on evaluation dataset is being worked on."
+        "#### Enabling exactly-once evaluation\n",
+        "<a id=\"exactly_once_evaluation\"></a>\n",
+        "\n",
+        "The `schedule` and `join` methods of `tf.distribute.coordinator.ClusterCoordinator` don’t support visitation guarantees or exactly-once semantics by default. In other words, in the above example there is no guarantee that all evaluation examples in a dataset will be evaluated exactly once; some may not be visited and some may be evaluated multiple times.\n",
+        "\n",
+        "Exactly-once evaluation may be preferred to reduce the variance of evaluation across epochs, and improve model selection done via early stopping,  hyperparameter tuning, or other methods. There are different ways to enable exactly-once evaluation:\n",
+        "\n",
+        "- With a `Model.fit/.evaluate` workflow, it can be enabled by adding an argument to `Model.compile`. Refer to docs for the `pss_evaluation_shards` argument.\n",
+        "- The `tf.data` service API can be used to provide exactly-once visitation for evaluation when using `ParameterServerStrategy` (refer to the _Dynamic Sharding_ section of the `tf.data.experimental.service` API documentation).\n",
+        "- [Sidecar evaluation](#sidecar_evaluation) provides exactly-once evaluation by default, since the evaluation happens on a single machine. However this can be much slower than performing evaluation distributed across many workers.\n",
+        "\n",
+        "The first option, using `Model.compile`, is the suggested solution for most users.\n",
+        "\n",
+        "Exactly-once evaluation has some limitations:\n",
+        "\n",
+        "- It is not supported to write a custom distributed evaluation loop with an exactly-once visitation guarantee. File a GitHub issue if you need support for this.\n",
+        "- It cannot automatically handle computation of metrics that use the `Layer.add_metric` API. These should be excluded from evaluation, or reworked into `Metric` objects."
       ]
     },
     {
@@ -994,9 +1015,69 @@
         "id": "H40X-9Gs3i7_"
       },
       "source": [
-        "### Side-car evaluation\n",
+        "### Sidecar evaluation\n",
+        "<a id=\"sidecar_evaluation\"></a>\n",
+        "\n",
+        "Another method for defining and running an evaluation loop in `tf.distribute.ParameterServerStrategy` training is called _sidecar evaluation_, in which you create a dedicated evaluator task that repeatedly reads checkpoints and runs evaluation on the latest checkpoint (refer to [this guide](../../guide/checkpoint.ipynb) for more details on checkpointing). The coordinator and worker tasks do not spend any time on evaluation, so for a fixed number of iterations the overall training time should be shorter than using other evaluation methods. However, it requires an additional evaluator task and periodic checkpointing to trigger evaluation."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "HonyjnXK9-ys"
+      },
+      "source": [
+        "To write an evaluation loop for sidecar evaluation, you have two\n",
+        "options:\n",
+        "\n",
+        "1. Use the `tf.keras.utils.SidecarEvaluator` API.\n",
+        "2. Create a custom evaluation loop.\n",
         "\n",
-        "Another method is called _side-car evaluation_ where you create a dedicated evaluator task that repeatedly reads checkpoints and runs evaluation on a latest checkpoint. It allows your training program to finish early if you don't need to change your training loop based on evaluation results. However, it requires an additional evaluator task and periodic checkpointing to trigger evaluation. Following is a possible side-car evaluation loop:\n",
+        "Refer to the `tf.keras.utils.SidecarEvaluator` API documentation for more details on option 1."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "U_c0EiwB88OG"
+      },
+      "source": [
+        "Sidecar evaluation is supported only with a single task. This means:\n",
+        "\n",
+        "*   It is guaranteed that each example is evaluated once. In the event the\n",
+        "    evaluator is preempted or restarted, it simply restarts the\n",
+        "    evaluation loop from the latest checkpoint, and the partial evaluation\n",
+        "    progress made before the restart is discarded.\n",
+        "\n",
+        "*   However, running evaluation on a single task implies that a full evaluation\n",
+        "    can possibly take a long time.\n",
+        "\n",
+        "*   If the size of the model is too large to fit into an evaluator's memory,\n",
+        "    single sidecar evaluation is not applicable."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "VNJoWVc797B1"
+      },
+      "source": [
+        "Another caveat is that the `tf.keras.utils.SidecarEvaluator` implementation, and the custom\n",
+        "evaluation loop below, may skip some checkpoints because it always picks up the\n",
+        "latest checkpoint available, and during an evaluation epoch, multiple\n",
+        "checkpoints can be produced from the training cluster. You can write a custom\n",
+        "evaluation loop that evaluates every checkpoint, but it is not covered in this\n",
+        "tutorial. On the other hand, it may sit idle if checkpoints are produced less\n",
+        "frequently than how long it takes to run evaluation."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "G5jopxBd85Ji"
+      },
+      "source": [
+        "A custom evaluation loop provides more control over the details, such as choosing which checkpoint to evaluate, or providing any additional logic to run along with evaluation. The following is a possible custom sidecar evaluation loop:\n",
         "\n",
         "```python\n",
         "checkpoint_dir = ...\n",
@@ -1016,7 +1097,7 @@
         "  eval_model.evaluate(eval_data)\n",
         "\n",
         "  # Evaluation finishes when it has evaluated the last epoch.\n",
-        "  if latest_checkpoint.endswith('-{}'.format(train_epoches)):\n",
+        "  if latest_checkpoint.endswith('-{}'.format(train_epochs)):\n",
         "    break\n",
         "```"
       ]
@@ -1034,9 +1115,9 @@
         "\n",
         "In a real production environment, you will run all tasks in different processes on different machines. The simplest way to configure cluster information on each task is to set `\"TF_CONFIG\"` environment variables and use a `tf.distribute.cluster_resolver.TFConfigClusterResolver` to parse `\"TF_CONFIG\"`.\n",
         "\n",
-        "For a general description about `\"TF_CONFIG\"` environment variables, refer to the [Distributed training](https://www.tensorflow.org/guide/distributed_training#setting_up_tf_config_environment_variable) guide.\n",
+        "For a general description of `\"TF_CONFIG\"` environment variables, refer to \"Setting up the `TF_CONFIG` environment variable\" in the [Distributed training](../../guide/distributed_training.ipynb) guide.\n",
         "\n",
-        "If you start your training tasks using Kubernetes or other configuration templates, it is very likely that these templates have already set `“TF_CONFIG\"` for you."
+        "If you start your training tasks using Kubernetes or other configuration templates, likely, these templates have already set `“TF_CONFIG\"` for you."
       ]
     },
     {
@@ -1047,7 +1128,7 @@
       "source": [
         "### Set the `\"TF_CONFIG\"` environment variable\n",
         "\n",
-        "Suppose you have 3 workers and 2 parameter servers, the `\"TF_CONFIG\"` of worker 1 can be:\n",
+        "Suppose you have 3 workers and 2 parameter servers. Then the `\"TF_CONFIG\"` of worker 1 can be:\n",
         "\n",
         "```python\n",
         "os.environ[\"TF_CONFIG\"] = json.dumps({\n",
@@ -1089,12 +1170,12 @@
         "if cluster_resolver.task_type in (\"worker\", \"ps\"):\n",
         "  # Start a TensorFlow server and wait.\n",
         "elif cluster_resolver.task_type == \"evaluator\":\n",
-        "  # Run side-car evaluation\n",
+        "  # Run sidecar evaluation\n",
         "else:\n",
         "  # Run the coordinator.\n",
         "```\n",
         "\n",
-        "The following code starts a TensorFlow server and waits:\n",
+        "The following code starts a TensorFlow server and waits, useful for the `\"worker\"` and `\"ps\"` roles:\n",
         "\n",
         "```python\n",
         "# Set the environment variable to allow reporting worker and ps failure to the\n",
@@ -1128,7 +1209,7 @@
       "source": [
         "### Worker failure\n",
         "\n",
-        "`tf.distribute.experimental.coordinator.ClusterCoordinator` or `Model.fit` provide built-in fault tolerance for worker failure. Upon worker recovery, the previously provided dataset function (either to `ClusterCoordinator.create_per_worker_dataset` for a custom training loop, or `tf.keras.utils.experimental.DatasetCreator` for `Model.fit`) will be invoked on the workers to re-create the datasets."
+        "Both the `tf.distribute.coordinator.ClusterCoordinator` custom training loop and `Model.fit` approaches provide built-in fault tolerance for worker failure. Upon worker recovery, the `ClusterCoordinator` invokes dataset re-creation on the workers."
       ]
     },
     {
@@ -1172,7 +1253,7 @@
         "global_steps = int(optimizer.iterations.numpy())\n",
         "starting_epoch = global_steps // steps_per_epoch\n",
         "\n",
-        "for _ in range(starting_epoch, num_epoches):\n",
+        "for _ in range(starting_epoch, num_epochs):\n",
         "  for _ in range(steps_per_epoch):\n",
         "    coordinator.schedule(step_fn, args=(per_worker_iterator,))\n",
         "  coordinator.join()\n",
@@ -1212,17 +1293,21 @@
       "source": [
         "## Performance improvement\n",
         "\n",
-        "There are several possible reasons if you see performance issues when you train with `ParameterServerStrategy` and `ClusterResolver`.\n",
+        "There are several possible reasons you may experience performance issues when you train with `tf.distribute.ParameterServerStrategy` and `tf.distribute.coordinator.ClusterCoordinator`.\n",
         "\n",
-        "One common reason is parameter servers have unbalanced load and some heavily-loaded parameter servers have reached capacity. There can also be multiple root causes. Some simple methods to mitigate this issue are to:\n",
+        "One common reason is that the parameter servers have unbalanced load and some heavily-loaded parameter servers have reached capacity. There can also be multiple root causes. Some simple methods to mitigate this issue are to:\n",
         "\n",
         "1. Shard your large model variables via specifying a `variable_partitioner` when constructing a `ParameterServerStrategy`.\n",
-        "2. Avoid creating a hotspot variable that is required by all parameter servers in a single step if possible. For example, use a constant learning rate or subclass `tf.keras.optimizers.schedules.LearningRateSchedule` in optimizers since the default behavior is that the learning rate will become a variable placed on a particular parameter server and requested by all other parameter servers in each step.\n",
+        "2. Avoid creating a hotspot variable that is required by all parameter servers in a single step, by both:\n",
+        "\n",
+        "  1) Using a constant learning rate or subclass `tf.keras.optimizers.schedules.LearningRateSchedule` in optimizers. This is because the default behavior is that the learning rate will become a variable placed on a particular parameter server, and requested by all other parameter servers in each step); and\n",
+        "\n",
+        "  2) Using a `tf.keras.optimizers.legacy.Optimizer` (the standard `tf.keras.optimizers.Optimizer`s could still lead to hotspot variables).\n",
         "3. Shuffle your large vocabularies before passing them to Keras preprocessing layers.\n",
         "\n",
-        "Another possible reason for performance issues is the coordinator. Your first implementation of `schedule`/`join` is Python-based and thus may have threading overhead. Also the latency between the coordinator and the workers can be large. If this is the case,\n",
+        "Another possible reason for performance issues is the coordinator. The implementation of `schedule`/`join` is Python-based and thus may have threading overhead. Also, the latency between the coordinator and the workers can be large. If this is the case:\n",
         "\n",
-        "- For `Model.fit`, you can set `steps_per_execution` argument provided at `Model.compile` to a value larger than 1.\n",
+        "- For `Model.fit`, you can set the `steps_per_execution` argument provided at `Model.compile` to a value larger than 1.\n",
         "\n",
         "- For a custom training loop, you can pack multiple steps into a single `tf.function`:\n",
         "\n",
@@ -1241,7 +1326,7 @@
         "\n",
         "As the library is optimized further, hopefully most users won't have to manually pack steps in the future.\n",
         "\n",
-        "In addition, a small trick for performance improvement is to schedule functions without a return value as explained in the handling task failure section above."
+        "In addition, a small trick for performance improvement is to schedule functions without a return value as explained in the [handling task failure section](#handling_task_failure) above."
       ]
     },
     {
@@ -1261,22 +1346,35 @@
         "- `os.environment[\"grpc_fail_fast\"]=\"use_caller\"` is needed on every task including the coordinator, to make fault tolerance work properly.\n",
         "- Synchronous parameter server training is not supported.\n",
         "- It is usually necessary to pack multiple steps into a single function to achieve optimal performance.\n",
-        "- It is not supported to load a saved_model via `tf.saved_model.load` containing sharded variables. Note loading such a saved_model using TensorFlow Serving is expected to work.\n",
-        "- It is not supported to load a checkpoint containing sharded optimizer slot variables into a different number of shards.\n",
+        "- It is not supported to load a saved_model via `tf.saved_model.load` containing sharded variables. Note loading such a saved_model using TensorFlow Serving is expected to work (refer to the [serving tutorial](https://www.tensorflow.org/tfx/tutorials/serving/rest_simple) for details).\n",
         "- It is not supported to recover from parameter server failure without restarting the coordinator task.\n",
-        "- Usage of `tf.lookup.StaticHashTable` (which is commonly employed by some `tf.keras.layers.experimental.preprocessing` layers, such as `IntegerLookup`, `StringLookup`, and `TextVectorization`) results in resources placed on the coordinator at this time with parameter server training. This has performance implications for lookup RPCs from workers to the coordinator. This is a current high priority to address.\n",
-        "\n",
+        "- Creation of `tf.lookup.StaticHashTable`, commonly employed by some Keras preprocessing layers, such as `tf.keras.layers.IntegerLookup`, `tf.keras.layers.StringLookup`, and `tf.keras.layers.TextVectorization`, should be placed under `Strategy.scope`. Otherwise, resources will be placed on the coordinator, and lookup RPCs from workers to the coordinator incur performance implications.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2MKBF0RPSvzB"
+      },
+      "source": [
         "### `Model.fit` specifics\n",
         "\n",
         "- `steps_per_epoch` argument is required in `Model.fit`. You can select a value that provides appropriate intervals in an epoch.\n",
         "- `ParameterServerStrategy` does not have support for custom callbacks that have batch-level calls for performance reasons. You should convert those calls into epoch-level calls with suitably picked `steps_per_epoch`, so that they are called every `steps_per_epoch` number of steps. Built-in callbacks are not affected: their batch-level calls have been modified to be performant. Supporting batch-level calls for `ParameterServerStrategy` is being planned.\n",
-        "- For the same reason, unlike other strategies, progress bar and metrics are logged only at epoch boundaries.\n",
-        "- `run_eagerly` is not supported.\n",
-        "\n",
+        "- For the same reason, unlike other strategies, progress bars and metrics are logged only at epoch boundaries.\n",
+        "- `run_eagerly` is not supported.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wvY-mg35Sx5L"
+      },
+      "source": [
         "### Custom training loop specifics\n",
         "\n",
-        "- `ClusterCoordinator.schedule` doesn't support visitation guarantees for a dataset.\n",
-        "- When `ClusterCoordinator.create_per_worker_dataset` is used, the whole dataset must be created inside the function passed to it.\n",
+        "- `ClusterCoordinator.schedule` doesn't support visitation guarantees for a dataset in general, although a visitation guarantee for evaluation is possible through `Model.fit/.evaluate`. See [Enabling exactly-once evaluation](#exactly_once_evaluation).\n",
+        "- When `ClusterCoordinator.create_per_worker_dataset` is used with a callable as input, the whole dataset must be created inside the function passed to it.\n",
         "- `tf.data.Options` is ignored in a dataset created by `ClusterCoordinator.create_per_worker_dataset`."
       ]
     }
@@ -1284,9 +1382,8 @@
   "metadata": {
     "accelerator": "GPU",
     "colab": {
-      "collapsed_sections": [],
       "name": "parameter_server_training.ipynb",
-      "toc_visible": true
+            "toc_visible": true
     },
     "kernelspec": {
       "display_name": "Python 3",
diff --git a/site/en/tutorials/distribute/save_and_load.ipynb b/site/en/tutorials/distribute/save_and_load.ipynb
index 7317b277c45..c53a9b8bf0b 100644
--- a/site/en/tutorials/distribute/save_and_load.ipynb
+++ b/site/en/tutorials/distribute/save_and_load.ipynb
@@ -71,7 +71,12 @@
       "source": [
         "## Overview\n",
         "\n",
-        "It's common to save and load a model during training. There are two sets of APIs for saving and loading a keras model: a high-level API, and a low-level API. This tutorial demonstrates how you can use the SavedModel APIs when using `tf.distribute.Strategy`. To learn about SavedModel and serialization in general, please read the [saved model guide](../../guide/saved_model.ipynb), and the [Keras model serialization guide](../../guide/keras/save_and_serialize.ipynb). Let's start with a simple example: "
+        "This tutorial demonstrates how you can save and load models in a SavedModel format with `tf.distribute.Strategy` during or after training. There are two kinds of APIs for saving and loading a Keras model: high-level (`tf.keras.Model.save` and `tf.keras.models.load_model`) and low-level (`tf.saved_model.save` and `tf.saved_model.load`).\n",
+        "\n",
+        "To learn about SavedModel and serialization in general, please read the [saved model guide](../../guide/saved_model.ipynb), and the [Keras model serialization guide](https://www.tensorflow.org/guide/keras/save_and_serialize). Let's start with a simple example.\n",
+        "\n",
+        "Caution: TensorFlow models are code and it is important to be careful with untrusted code. Learn more in [Using TensorFlow securely](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md).\n",
+        "\n"
       ]
     },
     {
@@ -102,7 +107,7 @@
         "id": "qqapWj98ptNV"
       },
       "source": [
-        "Prepare the data and model using `tf.distribute.Strategy`:"
+        "Load and prepare the data with TensorFlow Datasets and `tf.data`, and create the model using `tf.distribute.MirroredStrategy`:"
       ]
     },
     {
@@ -116,7 +121,7 @@
         "mirrored_strategy = tf.distribute.MirroredStrategy()\n",
         "\n",
         "def get_data():\n",
-        "  datasets, ds_info = tfds.load(name='mnist', with_info=True, as_supervised=True)\n",
+        "  datasets = tfds.load(name='mnist', as_supervised=True)\n",
         "  mnist_train, mnist_test = datasets['train'], datasets['test']\n",
         "\n",
         "  BUFFER_SIZE = 10000\n",
@@ -157,7 +162,7 @@
         "id": "qmU4Y3feS9Na"
       },
       "source": [
-        "Train the model: "
+        "Train the model with `tf.keras.Model.fit`: "
       ]
     },
     {
@@ -181,11 +186,11 @@
       "source": [
         "## Save and load the model\n",
         "\n",
-        "Now that you have a simple model to work with, let's take a look at the saving/loading APIs. \n",
-        "There are two sets of APIs available:\n",
+        "Now that you have a simple model to work with, let's explore the saving/loading APIs. \n",
+        "There are two kinds of APIs available:\n",
         "\n",
-        "*   High level keras `model.save` and `tf.keras.models.load_model`\n",
-        "*   Low level `tf.saved_model.save` and `tf.saved_model.load`\n"
+        "*   High-level (Keras): `Model.save` and `tf.keras.models.load_model` (`.keras` zip archive format)\n",
+        "*   Low-level: `tf.saved_model.save` and `tf.saved_model.load` (TF SavedModel format)\n"
       ]
     },
     {
@@ -194,7 +199,7 @@
         "id": "FX_IF2F1tvFs"
       },
       "source": [
-        "### The Keras APIs"
+        "### The Keras API"
       ]
     },
     {
@@ -203,7 +208,7 @@
         "id": "O8xfceg4Z3H_"
       },
       "source": [
-        "Here is an example of saving and loading a model with the Keras APIs:"
+        "Here is an example of saving and loading a model with the Keras API:"
       ]
     },
     {
@@ -214,7 +219,7 @@
       },
       "outputs": [],
       "source": [
-        "keras_model_path = \"/tmp/keras_save\"\n",
+        "keras_model_path = '/tmp/keras_save.keras'\n",
         "model.save(keras_model_path)"
       ]
     },
@@ -245,9 +250,9 @@
         "id": "gYAnskzorda-"
       },
       "source": [
-        "After restoring the model, you can continue training on it, even without needing to call `compile()` again, since it is already compiled before saving. The model is saved in the TensorFlow's standard `SavedModel` proto format. For more information, please refer to [the guide to `saved_model` format](../../guide/saved_model.ipynb).\n",
+        "After restoring the model, you can continue training on it, even without needing to call `Model.compile` again, since it was already compiled before saving. The model is saved a Keras zip archive format, marked by the `.keras` extension. For more information, please refer to [the guide on Keras saving](https://www.tensorflow.org/guide/keras/save_and_serialize).\n",
         "\n",
-        "Now to load the model and train it using a `tf.distribute.Strategy`:"
+        "Now, restore the model and train it using a `tf.distribute.Strategy`:"
       ]
     },
     {
@@ -258,7 +263,7 @@
       },
       "outputs": [],
       "source": [
-        "another_strategy = tf.distribute.OneDeviceStrategy(\"/cpu:0\")\n",
+        "another_strategy = tf.distribute.OneDeviceStrategy('/cpu:0')\n",
         "with another_strategy.scope():\n",
         "  restored_keras_model_ds = tf.keras.models.load_model(keras_model_path)\n",
         "  restored_keras_model_ds.fit(train_dataset, epochs=2)"
@@ -270,7 +275,7 @@
         "id": "PdiiPmL5tQk5"
       },
       "source": [
-        "As you can see, loading works as expected with `tf.distribute.Strategy`. The strategy used here does not have to be the same strategy used before saving. "
+        "As the `Model.fit` output shows, loading works as expected with `tf.distribute.Strategy`. The strategy used here does not have to be the same strategy used before saving. "
       ]
     },
     {
@@ -279,7 +284,7 @@
         "id": "3CrXIbmFt0f6"
       },
       "source": [
-        "### The `tf.saved_model` APIs"
+        "### The `tf.saved_model` API"
       ]
     },
     {
@@ -288,7 +293,7 @@
         "id": "HtGzPp6et4Em"
       },
       "source": [
-        "Now let's take a look at the lower level APIs. Saving the model is similar to the keras API:"
+        "Saving the model with lower-level API is similar to the Keras API:"
       ]
     },
     {
@@ -300,7 +305,7 @@
       "outputs": [],
       "source": [
         "model = get_model()  # get a fresh model\n",
-        "saved_model_path = \"/tmp/tf_save\"\n",
+        "saved_model_path = '/tmp/tf_save'\n",
         "tf.saved_model.save(model, saved_model_path)"
       ]
     },
@@ -310,7 +315,7 @@
         "id": "q1QNRYcwuRll"
       },
       "source": [
-        "Loading can be done with `tf.saved_model.load()`. However, since it is an API that is on the lower level (and hence has a wider range of use cases), it does not return a Keras model. Instead, it returns an object that contain functions that can be used to do inference. For example:"
+        "Loading can be done with `tf.saved_model.load`. However, since it is a lower-level API (and hence has a wider range of use cases), it does not return a Keras model. Instead, it returns an object that contain functions that can be used to do inference. For example:"
       ]
     },
     {
@@ -321,7 +326,7 @@
       },
       "outputs": [],
       "source": [
-        "DEFAULT_FUNCTION_KEY = \"serving_default\"\n",
+        "DEFAULT_FUNCTION_KEY = 'serving_default'\n",
         "loaded = tf.saved_model.load(saved_model_path)\n",
         "inference_func = loaded.signatures[DEFAULT_FUNCTION_KEY]"
       ]
@@ -332,7 +337,7 @@
         "id": "x65l7AaHUZCA"
       },
       "source": [
-        "The loaded object may contain multiple functions, each associated with a key. The `\"serving_default\"` is the default key for the inference function with a saved Keras model. To do an inference with this function: "
+        "The loaded object may contain multiple functions, each associated with a key. The `\"serving_default\"` key is the default key for the inference function with a saved Keras model. To do inference with this function: "
       ]
     },
     {
@@ -375,7 +380,9 @@
         "\n",
         "  # Calling the function in a distributed manner\n",
         "  for batch in dist_predict_dataset:\n",
-        "    another_strategy.run(inference_func,args=(batch,))"
+        "    result = another_strategy.run(inference_func, args=(batch,))\n",
+        "    print(result)\n",
+        "    break"
       ]
     },
     {
@@ -384,7 +391,7 @@
         "id": "hWGSukoyw3fF"
       },
       "source": [
-        "Calling the restored function is just a forward pass on the saved model (predict). What if yout want to continue training the loaded function? Or embed the loaded function into a bigger model? A common practice is to wrap this loaded object to a Keras layer to achieve this. Luckily, [TF Hub](https://www.tensorflow.org/hub) has [hub.KerasLayer](https://github.com/tensorflow/hub/blob/master/tensorflow_hub/keras_layer.py) for this purpose, shown here:"
+        "Calling the restored function is just a forward pass on the saved model (`tf.keras.Model.predict`). What if you want to continue training the loaded function? Or what if you need to embed the loaded function into a bigger model? A common practice is to wrap this loaded object into a Keras layer to achieve this. Luckily, [TF Hub](https://www.tensorflow.org/hub) has [`hub.KerasLayer`](https://github.com/tensorflow/hub/blob/master/tensorflow_hub/keras_layer.py) for this purpose, shown here:"
       ]
     },
     {
@@ -421,7 +428,7 @@
         "id": "Oe1z_OtSJlu2"
       },
       "source": [
-        "As you can see, `hub.KerasLayer` wraps the result loaded back from `tf.saved_model.load()` into a Keras layer that can be used to build another model. This is very useful for transfer learning. "
+        "In the above example, Tensorflow Hub's `hub.KerasLayer` wraps the result loaded back from `tf.saved_model.load` into a Keras layer that is used to build another model. This is very useful for transfer learning. "
       ]
     },
     {
@@ -439,11 +446,11 @@
         "id": "GC6GQ9HDLxD6"
       },
       "source": [
-        "For saving, if you are working with a keras model, it is almost always recommended to use the Keras's `model.save()` API. If what you are saving is not a Keras model, then the lower level API is your only choice. \n",
+        "For saving, if you are working with a Keras model, use the Keras `Model.save` API unless you need the additional control allowed by the low-level API. If what you are saving is not a Keras model, then the lower-level API, `tf.saved_model.save`, is your only choice. \n",
         "\n",
-        "For loading, which API you use depends on what you want to get from the loading API. If you cannot (or do not want to) get a Keras model, then use `tf.saved_model.load()`. Otherwise, use `tf.keras.models.load_model()`. Note that you can get a Keras model back only if you saved a Keras model. \n",
+        "For loading, your API choice depends on what you want to get from the model loading API. If you cannot (or do not want to) get a Keras model, then use `tf.saved_model.load`. Otherwise, use `tf.keras.models.load_model`. Note that you can get a Keras model back only if you saved a Keras model. \n",
         "\n",
-        "It is possible to mix and match the APIs. You can save a Keras model with `model.save`, and load a non-Keras model with the low-level API, `tf.saved_model.load`. "
+        "It is possible to mix and match the APIs. You can save a Keras model with `Model.save`, and load a non-Keras model with the low-level API, `tf.saved_model.load`. "
       ]
     },
     {
@@ -456,13 +463,13 @@
       "source": [
         "model = get_model()\n",
         "\n",
-        "# Saving the model using Keras's save() API\n",
-        "model.save(keras_model_path) \n",
+        "# Saving the model using Keras `Model.save`\n",
+        "model.save(saved_model_path)\n",
         "\n",
         "another_strategy = tf.distribute.MirroredStrategy()\n",
-        "# Loading the model using lower level API\n",
+        "# Loading the model using the lower-level API\n",
         "with another_strategy.scope():\n",
-        "  loaded = tf.saved_model.load(keras_model_path)"
+        "  loaded = tf.saved_model.load(saved_model_path)"
       ]
     },
     {
@@ -471,7 +478,7 @@
         "id": "0Z7lSj8nZiW5"
       },
       "source": [
-        "### Saving/Loading from local device"
+        "### Saving/Loading from a local device"
       ]
     },
     {
@@ -480,7 +487,7 @@
         "id": "NVAjWcosZodw"
       },
       "source": [
-        "When saving and loading from a local io device while running remotely, for example using a cloud TPU, the option `experimental_io_device` must be used to set the io device to localhost."
+        "When saving and loading from a local I/O device while training on remote devices—for example, when using a Cloud TPU—you must use the option `experimental_io_device` in `tf.saved_model.SaveOptions` and `tf.saved_model.LoadOptions` to set the I/O device to `localhost`. For example:"
       ]
     },
     {
@@ -494,7 +501,7 @@
         "model = get_model()\n",
         "\n",
         "# Saving the model to a path on localhost.\n",
-        "saved_model_path = \"/tmp/tf_save\"\n",
+        "saved_model_path = '/tmp/tf_save'\n",
         "save_options = tf.saved_model.SaveOptions(experimental_io_device='/job:localhost')\n",
         "model.save(saved_model_path, options=save_options)\n",
         "\n",
@@ -517,14 +524,10 @@
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "Tzog2ti7YYgy"
+        "id": "2cCSZrD7VCxe"
       },
       "source": [
-        "A special case is when you have a Keras model that does not have well-defined inputs. For example, a Sequential model can be created without any input shapes (`Sequential([Dense(3), ...]`). Subclassed models also do not have well-defined inputs after initialization. In this case, you should stick with the lower level APIs on both saving and loading, otherwise you will get an error. \n",
-        "\n",
-        "To check if your model has well-defined inputs, just check if `model.inputs` is `None`. If it is not `None`, you are all good. Input shapes are automatically defined when the model is used in `.fit`, `.evaluate`, `.predict`, or when calling the model (`model(inputs)`). \n",
-        "\n",
-        "Here is an example:"
+        "One special case is when you create Keras models in certain ways, and then save them before training. For example:"
       ]
     },
     {
@@ -536,6 +539,7 @@
       "outputs": [],
       "source": [
         "class SubclassedModel(tf.keras.Model):\n",
+        "  \"\"\"Example model defined by subclassing `tf.keras.Model`.\"\"\"\n",
         "\n",
         "  output_name = 'output_layer'\n",
         "\n",
@@ -548,8 +552,89 @@
         "    return self._dense_layer(inputs)\n",
         "\n",
         "my_model = SubclassedModel()\n",
-        "# my_model.save(keras_model_path)  # ERROR! \n",
-        "tf.saved_model.save(my_model, saved_model_path)"
+        "try:\n",
+        "  my_model.save(saved_model_path)\n",
+        "except ValueError as e:\n",
+        "  print(f'{type(e).__name__}: ', *e.args)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "D4qMyXFDSPDO"
+      },
+      "source": [
+        "A SavedModel saves the `tf.types.experimental.ConcreteFunction` objects generated when you trace a `tf.function` (check _When is a Function tracing?_ in the [Introduction to graphs and tf.function](../../guide/intro_to_graphs.ipynb) guide to learn more). If you get a `ValueError` like this it's because `Model.save` was not able to find or create a traced `ConcreteFunction`.\n",
+        "\n",
+        "**Caution:** You shouldn't save a model without at least one `ConcreteFunction`, since the low-level API will otherwise generate a SavedModel with no `ConcreteFunction` signatures ([learn more](../../guide/saved_model.ipynb) about the SavedModel format). For example:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "064SE47mYDj8"
+      },
+      "outputs": [],
+      "source": [
+        "tf.saved_model.save(my_model, saved_model_path)\n",
+        "x = tf.saved_model.load(saved_model_path)\n",
+        "x.signatures"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "LRTxlASJX-cY"
+      },
+      "source": [
+        "\n",
+        "Usually the model's forward pass—the `call` method—will be traced automatically when the model is called for the first time, often via the Keras `Model.fit` method. A `ConcreteFunction` can also be generated by the Keras [Sequential](https://www.tensorflow.org/guide/keras/sequential_model) and [Functional](https://www.tensorflow.org/guide/keras/functional) APIs, if you set the input shape, for example, by making the first layer either a `tf.keras.layers.InputLayer` or another layer type, and passing it the `input_shape` keyword argument. \n",
+        "\n",
+        "To verify if your model has any traced `ConcreteFunction`s, check if `Model.save_spec` is `None`:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "xAXise4eR0YJ"
+      },
+      "outputs": [],
+      "source": [
+        "print(my_model.save_spec() is None)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "G2G_FQrWJAO3"
+      },
+      "source": [
+        "Let's use `tf.keras.Model.fit` to train the model, and notice that the `save_spec` gets defined and model saving will work:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "cv5LTi0zDkKS"
+      },
+      "outputs": [],
+      "source": [
+        "BATCH_SIZE_PER_REPLICA = 4\n",
+        "BATCH_SIZE = BATCH_SIZE_PER_REPLICA * mirrored_strategy.num_replicas_in_sync\n",
+        "\n",
+        "dataset_size = 100\n",
+        "dataset = tf.data.Dataset.from_tensors(\n",
+        "    (tf.range(5, dtype=tf.float32), tf.range(5, dtype=tf.float32))\n",
+        "    ).repeat(dataset_size).batch(BATCH_SIZE)\n",
+        "\n",
+        "my_model.compile(optimizer='adam', loss='mean_squared_error')\n",
+        "my_model.fit(dataset, epochs=2)\n",
+        "\n",
+        "print(my_model.save_spec() is None)\n",
+        "my_model.save(saved_model_path)"
       ]
     }
   ],
diff --git a/site/en/tutorials/estimator/boosted_trees.ipynb b/site/en/tutorials/estimator/boosted_trees.ipynb
deleted file mode 100644
index 4c1bb1890c0..00000000000
--- a/site/en/tutorials/estimator/boosted_trees.ipynb
+++ /dev/null
@@ -1,612 +0,0 @@
-{
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "7765UFHoyGx6"
-      },
-      "source": [
-        "##### Copyright 2019 The TensorFlow Authors."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "cellView": "form",
-        "id": "KVtTDrUNyL7x"
-      },
-      "outputs": [],
-      "source": [
-        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
-        "# you may not use this file except in compliance with the License.\n",
-        "# You may obtain a copy of the License at\n",
-        "#\n",
-        "# https://www.apache.org/licenses/LICENSE-2.0\n",
-        "#\n",
-        "# Unless required by applicable law or agreed to in writing, software\n",
-        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
-        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
-        "# See the License for the specific language governing permissions and\n",
-        "# limitations under the License."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "xPYxZMrWyA0N"
-      },
-      "source": [
-        "# Boosted trees using Estimators"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "p_vOREjRx-Y0"
-      },
-      "source": [
-        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/tutorials/estimator/boosted_trees\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
-        "  </td>\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/estimator/boosted_trees.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
-        "  </td>\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/tutorials/estimator/boosted_trees.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\">View source on GitHub</a>\n",
-        "  </td>\n",
-        "  <td>\n",
-        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/tutorials/estimator/boosted_trees.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
-        "  </td>\n",
-        "</table>"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "6gWdn5lrlkhR"
-      },
-      "source": [
-        "> Warning: Estimators are not recommended for new code.  Estimators run `v1.Session`-style code which is more difficult to write correctly, and can behave unexpectedly, especially when combined with TF 2 code. Estimators do fall under our [compatibility guarantees] (https://tensorflow.org/guide/versions), but will receive no fixes other than security vulnerabilities. See the [migration guide](https://tensorflow.org/guide/migrate) for details."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "qNW3c_rop5J8"
-      },
-      "source": [
-        "**Note**: Modern Keras based implementations of many state of the art decision forest algorithms are available in [TensorFlow Decision Forests](https://tensorflow.org/decision_forests)."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "dW3r7qVxzqN5"
-      },
-      "source": [
-        "This tutorial is an end-to-end walkthrough of training a Gradient Boosting  model using decision trees with the `tf.estimator` API. Boosted Trees models are among the most popular and effective machine learning approaches for both regression and classification. It is an ensemble technique that combines the predictions from several (think 10s, 100s or even 1000s) tree models.\n",
-        "\n",
-        "Boosted Trees models are popular with many machine learning practitioners as they can achieve impressive performance with minimal hyperparameter tuning."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "eylrTPAN3rJV"
-      },
-      "source": [
-        "## Load the titanic dataset\n",
-        "You will be using the titanic dataset, where the (rather morbid) goal is to predict passenger survival, given characteristics such as gender, age, class, etc."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "KuhAiPfZ3rJW"
-      },
-      "outputs": [],
-      "source": [
-        "import numpy as np\n",
-        "import pandas as pd\n",
-        "from IPython.display import clear_output\n",
-        "from matplotlib import pyplot as plt\n",
-        "\n",
-        "# Load dataset.\n",
-        "dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')\n",
-        "dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')\n",
-        "y_train = dftrain.pop('survived')\n",
-        "y_eval = dfeval.pop('survived')"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "NFtnFm1T0kMf"
-      },
-      "outputs": [],
-      "source": [
-        "import tensorflow as tf\n",
-        "tf.random.set_seed(123)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "3ioodHdVJVdA"
-      },
-      "source": [
-        "The dataset consists of a training set and an evaluation set:\n",
-        "\n",
-        "* `dftrain` and `y_train` are the *training set*—the data the model uses to learn.\n",
-        "* The model is tested against the *eval set*, `dfeval`, and `y_eval`.\n",
-        "\n",
-        "For training you will use the following features:\n",
-        "\n",
-        "\n",
-        "<table>\n",
-        "  <tr>\n",
-        "    <th>Feature Name</th>\n",
-        "    <th>Description</th>\n",
-        "  </tr>\n",
-        "  <tr>\n",
-        "    <td>sex</td>\n",
-        "    <td>Gender of passenger</td>\n",
-        "  </tr>\n",
-        "  <tr>\n",
-        "    <td>age</td>\n",
-        "    <td>Age of passenger</td>\n",
-        "  </tr>\n",
-        "    <tr>\n",
-        "    <td>n_siblings_spouses</td>\n",
-        "    <td>siblings and partners aboard</td>\n",
-        "  </tr>\n",
-        "    <tr>\n",
-        "    <td>parch</td>\n",
-        "    <td>of parents and children aboard</td>\n",
-        "  </tr>\n",
-        "    <tr>\n",
-        "    <td>fare</td>\n",
-        "    <td>Fare passenger paid.</td>\n",
-        "  </tr>\n",
-        "    <tr>\n",
-        "    <td>class</td>\n",
-        "    <td>Passenger's class on ship</td>\n",
-        "  </tr>\n",
-        "    <tr>\n",
-        "    <td>deck</td>\n",
-        "    <td>Which deck passenger was on</td>\n",
-        "  </tr>\n",
-        "    <tr>\n",
-        "    <td>embark_town</td>\n",
-        "    <td>Which town passenger embarked from</td>\n",
-        "  </tr>\n",
-        "    <tr>\n",
-        "    <td>alone</td>\n",
-        "    <td>If passenger was alone</td>\n",
-        "  </tr>\n",
-        "</table>"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "AoPiWsJALr-k"
-      },
-      "source": [
-        "## Explore the data"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "slcat1yzmzw5"
-      },
-      "source": [
-        "Let's first preview some of the data and create summary statistics on the training set."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "15PLelXBlxEW"
-      },
-      "outputs": [],
-      "source": [
-        "dftrain.head()"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "j2hiM4ETmqP0"
-      },
-      "outputs": [],
-      "source": [
-        "dftrain.describe()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "-IR0e8V-LyJ4"
-      },
-      "source": [
-        "There are 627 and 264 examples in the training and evaluation sets, respectively."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "_1NwYqGwDjFf"
-      },
-      "outputs": [],
-      "source": [
-        "dftrain.shape[0], dfeval.shape[0]"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "28UFJ4KSMK3V"
-      },
-      "source": [
-        "The majority of passengers are in their 20's and 30's."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "CaVDmZtuDfux"
-      },
-      "outputs": [],
-      "source": [
-        "dftrain.age.hist(bins=20)\n",
-        "plt.show()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "1pifWiCoMbR5"
-      },
-      "source": [
-        "There are approximately twice as male passengers as female passengers aboard."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "-WazAq30MO5J"
-      },
-      "outputs": [],
-      "source": [
-        "dftrain.sex.value_counts().plot(kind='barh')\n",
-        "plt.show()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "7_XkxrpmmVU_"
-      },
-      "source": [
-        "The majority of passengers were in the \"third\" class."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "zZ3PvVy4l4gI"
-      },
-      "outputs": [],
-      "source": [
-        "dftrain['class'].value_counts().plot(kind='barh')\n",
-        "plt.show()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "HM5SlwlxmZMT"
-      },
-      "source": [
-        "Most passengers embarked from Southampton."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "RVTSrdr4mZaC"
-      },
-      "outputs": [],
-      "source": [
-        "dftrain['embark_town'].value_counts().plot(kind='barh')\n",
-        "plt.show()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "aTn1niLPob3x"
-      },
-      "source": [
-        "Females have a much higher chance of surviving vs. males. This will clearly be a predictive feature for the model."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Eh3KW5oYkaNS"
-      },
-      "outputs": [],
-      "source": [
-        "pd.concat([dftrain, y_train], axis=1).groupby('sex').survived.mean().plot(kind='barh').set_xlabel('% survive')\n",
-        "plt.show()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "krkRHuMp3rJn"
-      },
-      "source": [
-        "## Create feature columns and input functions\n",
-        "The Gradient Boosting estimator can utilize both numeric and categorical features. Feature columns work with all TensorFlow estimators and their purpose is to define the features used for modeling. Additionally they provide some feature engineering capabilities like one-hot-encoding, normalization, and bucketization. In this tutorial, the fields in `CATEGORICAL_COLUMNS` are transformed from categorical columns to one-hot-encoded columns ([indicator column](https://www.tensorflow.org/api_docs/python/tf/feature_column/indicator_column)):"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "upaNWxcF3rJn"
-      },
-      "outputs": [],
-      "source": [
-        "CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck',\n",
-        "                       'embark_town', 'alone']\n",
-        "NUMERIC_COLUMNS = ['age', 'fare']\n",
-        "\n",
-        "def one_hot_cat_column(feature_name, vocab):\n",
-        "  return tf.feature_column.indicator_column(\n",
-        "      tf.feature_column.categorical_column_with_vocabulary_list(feature_name,\n",
-        "                                                 vocab))\n",
-        "feature_columns = []\n",
-        "for feature_name in CATEGORICAL_COLUMNS:\n",
-        "  # Need to one-hot encode categorical features.\n",
-        "  vocabulary = dftrain[feature_name].unique()\n",
-        "  feature_columns.append(one_hot_cat_column(feature_name, vocabulary))\n",
-        "\n",
-        "for feature_name in NUMERIC_COLUMNS:\n",
-        "  feature_columns.append(tf.feature_column.numeric_column(feature_name,\n",
-        "                                           dtype=tf.float32))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "74GNtFpStSAz"
-      },
-      "source": [
-        "You can view the transformation that a feature column produces. For example, here is the output when using the `indicator_column` on a single example:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Eaq79D9FtmF8"
-      },
-      "outputs": [],
-      "source": [
-        "example = dict(dftrain.head(1))\n",
-        "class_fc = tf.feature_column.indicator_column(tf.feature_column.categorical_column_with_vocabulary_list('class', ('First', 'Second', 'Third')))\n",
-        "print('Feature value: \"{}\"'.format(example['class'].iloc[0]))\n",
-        "print('One-hot encoded: ', tf.keras.layers.DenseFeatures([class_fc])(example).numpy())"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "YbCUn3nCusC3"
-      },
-      "source": [
-        "Additionally, you can view all of the feature column transformations together:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "omIYcsVws3g0"
-      },
-      "outputs": [],
-      "source": [
-        "tf.keras.layers.DenseFeatures(feature_columns)(example).numpy()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "-UOlROp33rJo"
-      },
-      "source": [
-        "Next you need to create the input functions. These will specify how data will be read into our model for both training and inference. You will use the `from_tensor_slices` method in the [`tf.data`](https://www.tensorflow.org/api_docs/python/tf/data) API to read in data directly from Pandas. This is suitable for smaller, in-memory datasets. For larger datasets, the tf.data API supports a variety of file formats (including [csv](https://www.tensorflow.org/api_docs/python/tf/data/experimental/make_csv_dataset)) so that you can process datasets that do not fit in memory."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "9dquwCQB3rJp"
-      },
-      "outputs": [],
-      "source": [
-        "# Use entire batch since this is such a small dataset.\n",
-        "NUM_EXAMPLES = len(y_train)\n",
-        "\n",
-        "def make_input_fn(X, y, n_epochs=None, shuffle=True):\n",
-        "  def input_fn():\n",
-        "    dataset = tf.data.Dataset.from_tensor_slices((dict(X), y))\n",
-        "    if shuffle:\n",
-        "      dataset = dataset.shuffle(NUM_EXAMPLES)\n",
-        "    # For training, cycle thru dataset as many times as need (n_epochs=None).\n",
-        "    dataset = dataset.repeat(n_epochs)\n",
-        "    # In memory training doesn't use batching.\n",
-        "    dataset = dataset.batch(NUM_EXAMPLES)\n",
-        "    return dataset\n",
-        "  return input_fn\n",
-        "\n",
-        "# Training and evaluation input functions.\n",
-        "train_input_fn = make_input_fn(dftrain, y_train)\n",
-        "eval_input_fn = make_input_fn(dfeval, y_eval, shuffle=False, n_epochs=1)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "HttfNNlN3rJr"
-      },
-      "source": [
-        "## Train and evaluate the model\n",
-        "\n",
-        "Below you will do the following steps:\n",
-        "\n",
-        "1. Initialize the model, specifying the features and hyperparameters.\n",
-        "2. Feed the training data to the model using the `train_input_fn` and train the model using the `train` function.\n",
-        "3. You will assess model performance using the evaluation set—in this example, the `dfeval` DataFrame. You will verify that the predictions match the labels from the `y_eval` array.\n",
-        "\n",
-        "Before training a Boosted Trees model, let's first train a linear classifier (logistic regression model). It is best practice to start with a simpler model to establish a benchmark."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "JPOGpmmq3rJr"
-      },
-      "outputs": [],
-      "source": [
-        "linear_est = tf.estimator.LinearClassifier(feature_columns)\n",
-        "\n",
-        "# Train model.\n",
-        "linear_est.train(train_input_fn, max_steps=100)\n",
-        "\n",
-        "# Evaluation.\n",
-        "result = linear_est.evaluate(eval_input_fn)\n",
-        "clear_output()\n",
-        "print(pd.Series(result))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "BarkNXwA3rJu"
-      },
-      "source": [
-        "Next let's train a Boosted Trees model. For boosted trees, regression (`BoostedTreesRegressor`) and classification (`BoostedTreesClassifier`) are supported. Since the goal is to predict a class - survive or not survive, you will use the `BoostedTreesClassifier`.\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "tgEzMtlw3rJu"
-      },
-      "outputs": [],
-      "source": [
-        "# Since data fits into memory, use entire dataset per layer. It will be faster.\n",
-        "# Above one batch is defined as the entire dataset.\n",
-        "n_batches = 1\n",
-        "est = tf.estimator.BoostedTreesClassifier(feature_columns,\n",
-        "                                          n_batches_per_layer=n_batches)\n",
-        "\n",
-        "# The model will stop training once the specified number of trees is built, not\n",
-        "# based on the number of steps.\n",
-        "est.train(train_input_fn, max_steps=100)\n",
-        "\n",
-        "# Eval.\n",
-        "result = est.evaluate(eval_input_fn)\n",
-        "clear_output()\n",
-        "print(pd.Series(result))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "hEflwznXvuMP"
-      },
-      "source": [
-        "Now you can use the train model to make predictions on a passenger from the evaluation set. TensorFlow models are optimized to make predictions on a batch, or collection, of examples at once. Earlier,  the `eval_input_fn` is  defined using the entire evaluation set."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "6zmIjTr73rJ4"
-      },
-      "outputs": [],
-      "source": [
-        "pred_dicts = list(est.predict(eval_input_fn))\n",
-        "probs = pd.Series([pred['probabilities'][1] for pred in pred_dicts])\n",
-        "\n",
-        "probs.plot(kind='hist', bins=20, title='predicted probabilities')\n",
-        "plt.show()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "mBUaNN1BzJHG"
-      },
-      "source": [
-        "Finally you can also look at the receiver operating characteristic (ROC) of the results, which will give us a better idea of the tradeoff between the true positive rate and false positive rate."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "NzxghvVz3rJ6"
-      },
-      "outputs": [],
-      "source": [
-        "from sklearn.metrics import roc_curve\n",
-        "\n",
-        "fpr, tpr, _ = roc_curve(y_eval, probs)\n",
-        "plt.plot(fpr, tpr)\n",
-        "plt.title('ROC curve')\n",
-        "plt.xlabel('false positive rate')\n",
-        "plt.ylabel('true positive rate')\n",
-        "plt.xlim(0,)\n",
-        "plt.ylim(0,)\n",
-        "plt.show()"
-      ]
-    }
-  ],
-  "metadata": {
-    "colab": {
-      "collapsed_sections": [],
-      "name": "boosted_trees.ipynb",
-      "toc_visible": true
-    },
-    "kernelspec": {
-      "display_name": "Python 3",
-      "name": "python3"
-    }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 0
-}
diff --git a/site/en/tutorials/estimator/boosted_trees_model_understanding.ipynb b/site/en/tutorials/estimator/boosted_trees_model_understanding.ipynb
deleted file mode 100644
index c437574a13a..00000000000
--- a/site/en/tutorials/estimator/boosted_trees_model_understanding.ipynb
+++ /dev/null
@@ -1,1027 +0,0 @@
-{
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "7765UFHoyGx6"
-      },
-      "source": [
-        "##### Copyright 2019 The TensorFlow Authors."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "cellView": "form",
-        "id": "KVtTDrUNyL7x"
-      },
-      "outputs": [],
-      "source": [
-        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
-        "# you may not use this file except in compliance with the License.\n",
-        "# You may obtain a copy of the License at\n",
-        "#\n",
-        "# https://www.apache.org/licenses/LICENSE-2.0\n",
-        "#\n",
-        "# Unless required by applicable law or agreed to in writing, software\n",
-        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
-        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
-        "# See the License for the specific language governing permissions and\n",
-        "# limitations under the License."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "r0_fqL3ayLHX"
-      },
-      "source": [
-        "# Gradient Boosted Trees: Model understanding"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "PS6_yKSoyLAl"
-      },
-      "source": [
-        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/tutorials/estimator/boosted_trees_model_understanding\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
-        "  </td>\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/estimator/boosted_trees_model_understanding.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
-        "  </td>\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/tutorials/estimator/boosted_trees_model_understanding.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
-        "  </td>\n",
-        "  <td>\n",
-        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/tutorials/estimator/boosted_trees_model_understanding.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
-        "  </td>\n",
-        "</table>"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "pV4mnvs7l40o"
-      },
-      "source": [
-        "> Warning: Estimators are not recommended for new code.  Estimators run `v1.Session`-style code which is more difficult to write correctly, and can behave unexpectedly, especially when combined with TF 2 code. Estimators do fall under our [compatibility guarantees](https://tensorflow.org/guide/versions), but will receive no fixes other than security vulnerabilities. See the [migration guide](https://tensorflow.org/guide/migrate) for details."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "f4L1ffaFp2gT"
-      },
-      "source": [
-        "**Note**: Modern Keras based implementations of many state of the art decision forest algorithms are available in [TensorFlow Decision Forests](https://tensorflow.org/decision_forests)."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "dW3r7qVxzqN5"
-      },
-      "source": [
-        "For an end-to-end walkthrough of training a Gradient Boosting model check out the [boosted trees tutorial](./boosted_trees). In this tutorial you will:\n",
-        "\n",
-        "* Learn how to interpret a Boosted Trees model both *locally* and *globally*\n",
-        "* Gain intution for how a Boosted Trees model fits a dataset\n",
-        "\n",
-        "## How to interpret Boosted Trees models both locally and globally\n",
-        "\n",
-        "Local interpretability refers to an understanding of a model’s predictions at the individual example level, while global interpretability refers to an understanding of the model as a whole. Such techniques can help machine learning (ML) practitioners detect bias and bugs during the model development stage.\n",
-        "\n",
-        "For local interpretability, you will learn how to create and visualize per-instance contributions. To distinguish this from feature importances, we refer to these values as directional feature contributions (DFCs).\n",
-        "\n",
-        "For global interpretability you will retrieve and visualize gain-based feature importances, [permutation feature importances](https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf) and also show aggregated DFCs."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "eylrTPAN3rJV"
-      },
-      "source": [
-        "## Load the titanic dataset\n",
-        "You will be using the titanic dataset, where the (rather morbid) goal is to predict passenger survival, given characteristics such as gender, age, class, etc."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "132V3PZ8V8VA"
-      },
-      "outputs": [],
-      "source": [
-        "!pip install statsmodels"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "KuhAiPfZ3rJW"
-      },
-      "outputs": [],
-      "source": [
-        "import numpy as np\n",
-        "import pandas as pd\n",
-        "from IPython.display import clear_output\n",
-        "\n",
-        "# Load dataset.\n",
-        "dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')\n",
-        "dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')\n",
-        "y_train = dftrain.pop('survived')\n",
-        "y_eval = dfeval.pop('survived')"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "sp1ShjJJeyH3"
-      },
-      "outputs": [],
-      "source": [
-        "import tensorflow as tf\n",
-        "tf.random.set_seed(123)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "3ioodHdVJVdA"
-      },
-      "source": [
-        "For a description of the features, please review the prior tutorial."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "krkRHuMp3rJn"
-      },
-      "source": [
-        "## Create feature columns, input_fn, and the train the estimator"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "JiJ6K3hr1lXW"
-      },
-      "source": [
-        "### Preprocess the data"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "udMytRJC05oW"
-      },
-      "source": [
-        "Create the feature columns, using the original numeric columns as is and one-hot-encoding categorical variables."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "upaNWxcF3rJn"
-      },
-      "outputs": [],
-      "source": [
-        "fc = tf.feature_column\n",
-        "CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck',\n",
-        "                       'embark_town', 'alone']\n",
-        "NUMERIC_COLUMNS = ['age', 'fare']\n",
-        "\n",
-        "def one_hot_cat_column(feature_name, vocab):\n",
-        "  return fc.indicator_column(\n",
-        "      fc.categorical_column_with_vocabulary_list(feature_name,\n",
-        "                                                 vocab))\n",
-        "feature_columns = []\n",
-        "for feature_name in CATEGORICAL_COLUMNS:\n",
-        "  # Need to one-hot encode categorical features.\n",
-        "  vocabulary = dftrain[feature_name].unique()\n",
-        "  feature_columns.append(one_hot_cat_column(feature_name, vocabulary))\n",
-        "\n",
-        "for feature_name in NUMERIC_COLUMNS:\n",
-        "  feature_columns.append(fc.numeric_column(feature_name,\n",
-        "                                           dtype=tf.float32))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "9rTefnXe1n0v"
-      },
-      "source": [
-        "### Build the input pipeline"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "-UOlROp33rJo"
-      },
-      "source": [
-        "Create the input functions using the `from_tensor_slices` method in the [`tf.data`](https://www.tensorflow.org/api_docs/python/tf/data) API to read in data directly from Pandas."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "9dquwCQB3rJp"
-      },
-      "outputs": [],
-      "source": [
-        "# Use entire batch since this is such a small dataset.\n",
-        "NUM_EXAMPLES = len(y_train)\n",
-        "\n",
-        "def make_input_fn(X, y, n_epochs=None, shuffle=True):\n",
-        "  def input_fn():\n",
-        "    dataset = tf.data.Dataset.from_tensor_slices((X.to_dict(orient='list'), y))\n",
-        "    if shuffle:\n",
-        "      dataset = dataset.shuffle(NUM_EXAMPLES)\n",
-        "    # For training, cycle thru dataset as many times as need (n_epochs=None).\n",
-        "    dataset = (dataset\n",
-        "      .repeat(n_epochs)\n",
-        "      .batch(NUM_EXAMPLES))\n",
-        "    return dataset\n",
-        "  return input_fn\n",
-        "\n",
-        "# Training and evaluation input functions.\n",
-        "train_input_fn = make_input_fn(dftrain, y_train)\n",
-        "eval_input_fn = make_input_fn(dfeval, y_eval, shuffle=False, n_epochs=1)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "HttfNNlN3rJr"
-      },
-      "source": [
-        "### Train the model"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "tgEzMtlw3rJu"
-      },
-      "outputs": [],
-      "source": [
-        "params = {\n",
-        "  'n_trees': 50,\n",
-        "  'max_depth': 3,\n",
-        "  'n_batches_per_layer': 1,\n",
-        "  # You must enable center_bias = True to get DFCs. This will force the model to\n",
-        "  # make an initial prediction before using any features (e.g. use the mean of\n",
-        "  # the training labels for regression or log odds for classification when\n",
-        "  # using cross entropy loss).\n",
-        "  'center_bias': True\n",
-        "}\n",
-        "\n",
-        "est = tf.estimator.BoostedTreesClassifier(feature_columns, **params)\n",
-        "# Train model.\n",
-        "est.train(train_input_fn, max_steps=100)\n",
-        "\n",
-        "# Evaluation.\n",
-        "results = est.evaluate(eval_input_fn)\n",
-        "clear_output()\n",
-        "pd.Series(results).to_frame()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "JgAz3jDa_tRA"
-      },
-      "source": [
-        "For performance reasons, when your data fits in memory, we recommend use the arg `train_in_memory=True` in the `tf.estimator.BoostedTreesClassifier` function. However if training time is not of a concern or if you have a very large dataset and want to do distributed training, use the `tf.estimator.BoostedTrees` API shown above.\n",
-        "\n",
-        "\n",
-        "When using this method, you should not batch your input data, as the method operates on the entire dataset.\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "y7ztzoSk_vjY"
-      },
-      "outputs": [],
-      "source": [
-        "in_memory_params = dict(params)\n",
-        "in_memory_params['n_batches_per_layer'] = 1\n",
-        "# In-memory input_fn does not use batching.\n",
-        "def make_inmemory_train_input_fn(X, y):\n",
-        "  y = np.expand_dims(y, axis=1)\n",
-        "  def input_fn():\n",
-        "    return dict(X), y\n",
-        "  return input_fn\n",
-        "train_input_fn = make_inmemory_train_input_fn(dftrain, y_train)\n",
-        "\n",
-        "# Train the model.\n",
-        "est = tf.estimator.BoostedTreesClassifier(\n",
-        "    feature_columns, \n",
-        "    train_in_memory=True, \n",
-        "    **in_memory_params)\n",
-        "\n",
-        "est.train(train_input_fn)\n",
-        "print(est.evaluate(eval_input_fn))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "TSZYqNcRuczV"
-      },
-      "source": [
-        "## Model interpretation and plotting"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "BjcfLiI3uczW"
-      },
-      "outputs": [],
-      "source": [
-        "import matplotlib.pyplot as plt\n",
-        "import seaborn as sns\n",
-        "sns_colors = sns.color_palette('colorblind')"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ywTtbBvBuczY"
-      },
-      "source": [
-        "## Local interpretability\n",
-        "Next you will output the directional feature contributions (DFCs) to explain individual predictions using the approach outlined in [Palczewska et al](https://arxiv.org/pdf/1312.1121.pdf) and by Saabas in [Interpreting Random Forests](http://blog.datadive.net/interpreting-random-forests/) (this method is also available in scikit-learn for Random Forests in the [`treeinterpreter`](https://github.com/andosa/treeinterpreter) package). The DFCs are generated with:\n",
-        "\n",
-        "`pred_dicts = list(est.experimental_predict_with_explanations(pred_input_fn))`\n",
-        "\n",
-        "(Note: The method is named experimental as we may modify the API before dropping the experimental prefix.)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "TIL93B4sDRqE"
-      },
-      "outputs": [],
-      "source": [
-        "pred_dicts = list(est.experimental_predict_with_explanations(eval_input_fn))"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "tDPoRx_ZaY1E"
-      },
-      "outputs": [],
-      "source": [
-        "# Create DFC Pandas dataframe.\n",
-        "labels = y_eval.values\n",
-        "probs = pd.Series([pred['probabilities'][1] for pred in pred_dicts])\n",
-        "df_dfc = pd.DataFrame([pred['dfc'] for pred in pred_dicts])\n",
-        "df_dfc.describe().T"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "EUKSaVoraY1C"
-      },
-      "source": [
-        "A nice property of DFCs is that the sum of the contributions + the bias is equal to the prediction for a given example."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Hd9VuizRaY1H"
-      },
-      "outputs": [],
-      "source": [
-        "# Sum of DFCs + bias == probabality.\n",
-        "bias = pred_dicts[0]['bias']\n",
-        "dfc_prob = df_dfc.sum(axis=1) + bias\n",
-        "np.testing.assert_almost_equal(dfc_prob.values,\n",
-        "                               probs.values)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "tx5p4vEhuczg"
-      },
-      "source": [
-        "Plot DFCs for an individual passenger. Let's make the plot nice by color coding based on the contributions' directionality and add the feature values on figure."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "6z_Tq1Pquczj"
-      },
-      "outputs": [],
-      "source": [
-        "# Boilerplate code for plotting :)\n",
-        "def _get_color(value):\n",
-        "    \"\"\"To make positive DFCs plot green, negative DFCs plot red.\"\"\"\n",
-        "    green, red = sns.color_palette()[2:4]\n",
-        "    if value >= 0: return green\n",
-        "    return red\n",
-        "\n",
-        "def _add_feature_values(feature_values, ax):\n",
-        "    \"\"\"Display feature's values on left of plot.\"\"\"\n",
-        "    x_coord = ax.get_xlim()[0]\n",
-        "    OFFSET = 0.15\n",
-        "    for y_coord, (feat_name, feat_val) in enumerate(feature_values.items()):\n",
-        "        t = plt.text(x_coord, y_coord - OFFSET, '{}'.format(feat_val), size=12)\n",
-        "        t.set_bbox(dict(facecolor='white', alpha=0.5))\n",
-        "    from matplotlib.font_manager import FontProperties\n",
-        "    font = FontProperties()\n",
-        "    font.set_weight('bold')\n",
-        "    t = plt.text(x_coord, y_coord + 1 - OFFSET, 'feature\\nvalue',\n",
-        "    fontproperties=font, size=12)\n",
-        "\n",
-        "def plot_example(example):\n",
-        "  TOP_N = 8 # View top 8 features.\n",
-        "  sorted_ix = example.abs().sort_values()[-TOP_N:].index  # Sort by magnitude.\n",
-        "  example = example[sorted_ix]\n",
-        "  colors = example.map(_get_color).tolist()\n",
-        "  ax = example.to_frame().plot(kind='barh',\n",
-        "                          color=colors,\n",
-        "                          legend=None,\n",
-        "                          alpha=0.75,\n",
-        "                          figsize=(10,6))\n",
-        "  ax.grid(False, axis='y')\n",
-        "  ax.set_yticklabels(ax.get_yticklabels(), size=14)\n",
-        "\n",
-        "  # Add feature values.\n",
-        "  _add_feature_values(dfeval.iloc[ID][sorted_ix], ax)\n",
-        "  return ax"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Ht1P2-1euczk"
-      },
-      "outputs": [],
-      "source": [
-        "# Plot results.\n",
-        "ID = 182\n",
-        "example = df_dfc.iloc[ID]  # Choose ith example from evaluation set.\n",
-        "TOP_N = 8  # View top 8 features.\n",
-        "sorted_ix = example.abs().sort_values()[-TOP_N:].index\n",
-        "ax = plot_example(example)\n",
-        "ax.set_title('Feature contributions for example {}\\n pred: {:1.2f}; label: {}'.format(ID, probs[ID], labels[ID]))\n",
-        "ax.set_xlabel('Contribution to predicted probability', size=14)\n",
-        "plt.show()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "aPXgWyFcfzAc"
-      },
-      "source": [
-        "The larger magnitude contributions have a larger impact on the model's prediction. Negative contributions indicate the feature value for this given example reduced the model's prediction, while positive values contribute an increase in the prediction."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "0swvlkZFaY1Z"
-      },
-      "source": [
-        "You can also plot the example's DFCs compare with the entire distribution using a voilin plot."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "zo7rNd1v_5e2"
-      },
-      "outputs": [],
-      "source": [
-        "# Boilerplate plotting code.\n",
-        "def dist_violin_plot(df_dfc, ID):\n",
-        "  # Initialize plot.\n",
-        "  fig, ax = plt.subplots(1, 1, figsize=(10, 6))\n",
-        "\n",
-        "  # Create example dataframe.\n",
-        "  TOP_N = 8  # View top 8 features.\n",
-        "  example = df_dfc.iloc[ID]\n",
-        "  ix = example.abs().sort_values()[-TOP_N:].index\n",
-        "  example = example[ix]\n",
-        "  example_df = example.to_frame(name='dfc')\n",
-        "\n",
-        "  # Add contributions of entire distribution.\n",
-        "  parts=ax.violinplot([df_dfc[w] for w in ix],\n",
-        "                 vert=False,\n",
-        "                 showextrema=False,\n",
-        "                 widths=0.7,\n",
-        "                 positions=np.arange(len(ix)))\n",
-        "  face_color = sns_colors[0]\n",
-        "  alpha = 0.15\n",
-        "  for pc in parts['bodies']:\n",
-        "      pc.set_facecolor(face_color)\n",
-        "      pc.set_alpha(alpha)\n",
-        "\n",
-        "  # Add feature values.\n",
-        "  _add_feature_values(dfeval.iloc[ID][sorted_ix], ax)\n",
-        "\n",
-        "  # Add local contributions.\n",
-        "  ax.scatter(example,\n",
-        "              np.arange(example.shape[0]),\n",
-        "              color=sns.color_palette()[2],\n",
-        "              s=100,\n",
-        "              marker=\"s\",\n",
-        "              label='contributions for example')\n",
-        "\n",
-        "  # Legend\n",
-        "  # Proxy plot, to show violinplot dist on legend.\n",
-        "  ax.plot([0,0], [1,1], label='eval set contributions\\ndistributions',\n",
-        "          color=face_color, alpha=alpha, linewidth=10)\n",
-        "  legend = ax.legend(loc='lower right', shadow=True, fontsize='x-large',\n",
-        "                     frameon=True)\n",
-        "  legend.get_frame().set_facecolor('white')\n",
-        "\n",
-        "  # Format plot.\n",
-        "  ax.set_yticks(np.arange(example.shape[0]))\n",
-        "  ax.set_yticklabels(example.index)\n",
-        "  ax.grid(False, axis='y')\n",
-        "  ax.set_xlabel('Contribution to predicted probability', size=14)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "PiLw2tlm_9aK"
-      },
-      "source": [
-        "Plot this example."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "VkCqraA2uczm"
-      },
-      "outputs": [],
-      "source": [
-        "dist_violin_plot(df_dfc, ID)\n",
-        "plt.title('Feature contributions for example {}\\n pred: {:1.2f}; label: {}'.format(ID, probs[ID], labels[ID]))\n",
-        "plt.show()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "TVJFM85SAWVq"
-      },
-      "source": [
-        "Finally, third-party tools, such as [LIME](https://github.com/marcotcr/lime) and [shap](https://github.com/slundberg/shap), can also help understand individual predictions for a model."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "PnNXH6mZuczr"
-      },
-      "source": [
-        "## Global feature importances\n",
-        "\n",
-        "Additionally, you might want to understand the model as a whole, rather than studying individual predictions. Below, you will compute and use:\n",
-        "\n",
-        "* Gain-based feature importances using `est.experimental_feature_importances`\n",
-        "* Permutation importances\n",
-        "* Aggregate DFCs using `est.experimental_predict_with_explanations`\n",
-        "\n",
-        "Gain-based feature importances measure the loss change when splitting on a particular feature, while permutation feature importances are computed by evaluating model performance on the evaluation set by shuffling each feature one-by-one and attributing the change in model performance to the shuffled feature.\n",
-        "\n",
-        "In general, permutation feature importance are preferred to gain-based feature importance, though both methods can be unreliable in situations where potential predictor variables vary in their scale of measurement or their number of categories and when features are correlated ([source](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-9-307)). Check out [this article](http://explained.ai/rf-importance/index.html) for an in-depth overview and great discussion on different feature importance types."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "3ocBcMatuczs"
-      },
-      "source": [
-        "### Gain-based feature importances"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "gMaxCgPbBJ-j"
-      },
-      "source": [
-        "Gain-based feature importances are built into the TensorFlow Boosted Trees estimators using `est.experimental_feature_importances`."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "pPTxbAaeuczt"
-      },
-      "outputs": [],
-      "source": [
-        "importances = est.experimental_feature_importances(normalize=True)\n",
-        "df_imp = pd.Series(importances)\n",
-        "\n",
-        "# Visualize importances.\n",
-        "N = 8\n",
-        "ax = (df_imp.iloc[0:N][::-1]\n",
-        "    .plot(kind='barh',\n",
-        "          color=sns_colors[0],\n",
-        "          title='Gain feature importances',\n",
-        "          figsize=(10, 6)))\n",
-        "ax.grid(False, axis='y')"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "GvfAcBeGuczw"
-      },
-      "source": [
-        "### Average absolute DFCs\n",
-        "You can also average the absolute values of DFCs to understand impact at a global level."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "JkvAWLWLuczx"
-      },
-      "outputs": [],
-      "source": [
-        "# Plot.\n",
-        "dfc_mean = df_dfc.abs().mean()\n",
-        "N = 8\n",
-        "sorted_ix = dfc_mean.abs().sort_values()[-N:].index  # Average and sort by absolute.\n",
-        "ax = dfc_mean[sorted_ix].plot(kind='barh',\n",
-        "                       color=sns_colors[1],\n",
-        "                       title='Mean |directional feature contributions|',\n",
-        "                       figsize=(10, 6))\n",
-        "ax.grid(False, axis='y')"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Z0k_DvPLaY1o"
-      },
-      "source": [
-        "You can also see how DFCs vary as a feature value varies."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "ZcIfN1IpaY1o"
-      },
-      "outputs": [],
-      "source": [
-        "FEATURE = 'fare'\n",
-        "feature = pd.Series(df_dfc[FEATURE].values, index=dfeval[FEATURE].values).sort_index()\n",
-        "ax = sns.regplot(feature.index.values, feature.values, lowess=True)\n",
-        "ax.set_ylabel('contribution')\n",
-        "ax.set_xlabel(FEATURE)\n",
-        "ax.set_xlim(0, 100)\n",
-        "plt.show()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "lbpG72ULucz0"
-      },
-      "source": [
-        "### Permutation feature importance"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "6esOw1VOucz0"
-      },
-      "outputs": [],
-      "source": [
-        "def permutation_importances(est, X_eval, y_eval, metric, features):\n",
-        "    \"\"\"Column by column, shuffle values and observe effect on eval set.\n",
-        "\n",
-        "    source: http://explained.ai/rf-importance/index.html\n",
-        "    A similar approach can be done during training. See \"Drop-column importance\"\n",
-        "    in the above article.\"\"\"\n",
-        "    baseline = metric(est, X_eval, y_eval)\n",
-        "    imp = []\n",
-        "    for col in features:\n",
-        "        save = X_eval[col].copy()\n",
-        "        X_eval[col] = np.random.permutation(X_eval[col])\n",
-        "        m = metric(est, X_eval, y_eval)\n",
-        "        X_eval[col] = save\n",
-        "        imp.append(baseline - m)\n",
-        "    return np.array(imp)\n",
-        "\n",
-        "def accuracy_metric(est, X, y):\n",
-        "    \"\"\"TensorFlow estimator accuracy.\"\"\"\n",
-        "    eval_input_fn = make_input_fn(X,\n",
-        "                                  y=y,\n",
-        "                                  shuffle=False,\n",
-        "                                  n_epochs=1)\n",
-        "    return est.evaluate(input_fn=eval_input_fn)['accuracy']\n",
-        "features = CATEGORICAL_COLUMNS + NUMERIC_COLUMNS\n",
-        "importances = permutation_importances(est, dfeval, y_eval, accuracy_metric,\n",
-        "                                      features)\n",
-        "df_imp = pd.Series(importances, index=features)\n",
-        "\n",
-        "sorted_ix = df_imp.abs().sort_values().index\n",
-        "ax = df_imp[sorted_ix][-5:].plot(kind='barh', color=sns_colors[2], figsize=(10, 6))\n",
-        "ax.grid(False, axis='y')\n",
-        "ax.set_title('Permutation feature importance')\n",
-        "plt.show()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "E236y3pVEzHg"
-      },
-      "source": [
-        "## Visualizing model fitting"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "TrcQ-839EzZ6"
-      },
-      "source": [
-        "Lets first simulate/create training data using the following formula:\n",
-        "\n",
-        "\n",
-        "$$z=x* e^{-x^2 - y^2}$$\n",
-        "\n",
-        "\n",
-        "Where \\(z\\) is the dependent variable you are trying to predict and \\(x\\) and \\(y\\) are the features."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "e8woaj81GGE9"
-      },
-      "outputs": [],
-      "source": [
-        "from numpy.random import uniform, seed\n",
-        "from scipy.interpolate import griddata\n",
-        "\n",
-        "# Create fake data\n",
-        "seed(0)\n",
-        "npts = 5000\n",
-        "x = uniform(-2, 2, npts)\n",
-        "y = uniform(-2, 2, npts)\n",
-        "z = x*np.exp(-x**2 - y**2)\n",
-        "xy = np.zeros((2,np.size(x)))\n",
-        "xy[0] = x\n",
-        "xy[1] = y\n",
-        "xy = xy.T"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "GRI3KHfLZsGP"
-      },
-      "outputs": [],
-      "source": [
-        "# Prep data for training.\n",
-        "df = pd.DataFrame({'x': x, 'y': y, 'z': z})\n",
-        "\n",
-        "xi = np.linspace(-2.0, 2.0, 200),\n",
-        "yi = np.linspace(-2.1, 2.1, 210),\n",
-        "xi,yi = np.meshgrid(xi, yi)\n",
-        "\n",
-        "df_predict = pd.DataFrame({\n",
-        "    'x' : xi.flatten(),\n",
-        "    'y' : yi.flatten(),\n",
-        "})\n",
-        "predict_shape = xi.shape"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "w0JnH4IhZuAb"
-      },
-      "outputs": [],
-      "source": [
-        "def plot_contour(x, y, z, **kwargs):\n",
-        "  # Grid the data.\n",
-        "  plt.figure(figsize=(10, 8))\n",
-        "  # Contour the gridded data, plotting dots at the nonuniform data points.\n",
-        "  CS = plt.contour(x, y, z, 15, linewidths=0.5, colors='k')\n",
-        "  CS = plt.contourf(x, y, z, 15,\n",
-        "                    vmax=abs(zi).max(), vmin=-abs(zi).max(), cmap='RdBu_r')\n",
-        "  plt.colorbar()  # Draw colorbar.\n",
-        "  # Plot data points.\n",
-        "  plt.xlim(-2, 2)\n",
-        "  plt.ylim(-2, 2)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "KF7WsIcYGF_E"
-      },
-      "source": [
-        "You can visualize the function. Redder colors correspond to larger function values."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "WrxuqaaXGFOK"
-      },
-      "outputs": [],
-      "source": [
-        "zi = griddata(xy, z, (xi, yi), method='linear', fill_value='0')\n",
-        "plot_contour(xi, yi, zi)\n",
-        "plt.scatter(df.x, df.y, marker='.')\n",
-        "plt.title('Contour on training data')\n",
-        "plt.show()"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "hoANr0f2GFrM"
-      },
-      "outputs": [],
-      "source": [
-        "fc = [tf.feature_column.numeric_column('x'),\n",
-        "      tf.feature_column.numeric_column('y')]"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "xVRWyoY3ayTK"
-      },
-      "outputs": [],
-      "source": [
-        "def predict(est):\n",
-        "  \"\"\"Predictions from a given estimator.\"\"\"\n",
-        "  predict_input_fn = lambda: tf.data.Dataset.from_tensors(dict(df_predict))\n",
-        "  preds = np.array([p['predictions'][0] for p in est.predict(predict_input_fn)])\n",
-        "  return preds.reshape(predict_shape)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "uyPu5618GU7K"
-      },
-      "source": [
-        "First let's try to fit a linear model to the data."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "zUIV2IVgGVSk"
-      },
-      "outputs": [],
-      "source": [
-        "train_input_fn = make_input_fn(df, df.z)\n",
-        "est = tf.estimator.LinearRegressor(fc)\n",
-        "est.train(train_input_fn, max_steps=500);"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "_u4WAcCqfbco"
-      },
-      "outputs": [],
-      "source": [
-        "plot_contour(xi, yi, predict(est))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "XD_fMAUtSCSa"
-      },
-      "source": [
-        "It's not a very good fit. Next let's try to fit a GBDT model to it and try to understand how the model fits the function."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "-dHlKFlFgHDQ"
-      },
-      "outputs": [],
-      "source": [
-        "n_trees = 37 #@param {type: \"slider\", min: 1, max: 80, step: 1}\n",
-        "\n",
-        "est = tf.estimator.BoostedTreesRegressor(fc, n_batches_per_layer=1, n_trees=n_trees)\n",
-        "est.train(train_input_fn, max_steps=500)\n",
-        "clear_output()\n",
-        "plot_contour(xi, yi, predict(est))\n",
-        "plt.text(-1.8, 2.1, '# trees: {}'.format(n_trees), color='w', backgroundcolor='black', size=20)\n",
-        "plt.show()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "5WcZ9fubh1wT"
-      },
-      "source": [
-        "As you increase the number of trees, the model's predictions better approximates the underlying function."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "cj8u3NCG-IKX"
-      },
-      "source": [
-        "![](https://www.tensorflow.org/images/boosted_trees/boosted_trees_ntrees.gif)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "SMKoEZnCdrsp"
-      },
-      "source": [
-        "## Conclusion"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ZSZUSrjXdw9g"
-      },
-      "source": [
-        "In this tutorial you learned how to interpret Boosted Trees models using directional feature contributions and feature importance techniques. These techniques provide insight into how the features impact a model's predictions. Finally, you also gained intution for how a Boosted Tree model fits a complex function by viewing the decision surface for several models."
-      ]
-    }
-  ],
-  "metadata": {
-    "accelerator": "GPU",
-    "colab": {
-      "collapsed_sections": [],
-      "name": "boosted_trees_model_understanding.ipynb",
-      "toc_visible": true
-    },
-    "kernelspec": {
-      "display_name": "Python 3",
-      "name": "python3"
-    }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 0
-}
diff --git a/site/en/tutorials/estimator/keras_model_to_estimator.ipynb b/site/en/tutorials/estimator/keras_model_to_estimator.ipynb
index e41380908f5..be97a38b6eb 100644
--- a/site/en/tutorials/estimator/keras_model_to_estimator.ipynb
+++ b/site/en/tutorials/estimator/keras_model_to_estimator.ipynb
@@ -68,7 +68,7 @@
         "id": "Dhcq8Ds4mCtm"
       },
       "source": [
-        "> Warning: Estimators are not recommended for new code.  Estimators run `v1.Session`-style code which is more difficult to write correctly, and can behave unexpectedly, especially when combined with TF 2 code. Estimators do fall under our [compatibility guarantees](https://tensorflow.org/guide/versions), but will receive no fixes other than security vulnerabilities. See the [migration guide](https://tensorflow.org/guide/migrate) for details."
+        "> Warning: TensorFlow 2.15 included the final release of the `tf-estimator` package. Estimators will not be available in TensorFlow 2.16 or after. See the [migration guide](https://tensorflow.org/guide/migrate/migrating_estimator) for more information about how to convert off of Estimators."
       ]
     },
     {
@@ -272,8 +272,7 @@
     "colab": {
       "collapsed_sections": [],
       "name": "keras_model_to_estimator.ipynb",
-      "provenance": [],
-      "toc_visible": true
+            "toc_visible": true
     },
     "kernelspec": {
       "display_name": "Python 3",
diff --git a/site/en/tutorials/estimator/linear.ipynb b/site/en/tutorials/estimator/linear.ipynb
index ea46d41ede1..a26ffe2df4f 100644
--- a/site/en/tutorials/estimator/linear.ipynb
+++ b/site/en/tutorials/estimator/linear.ipynb
@@ -61,7 +61,7 @@
         "id": "JOccPOFMm5Tc"
       },
       "source": [
-        "> Warning: Estimators are not recommended for new code.  Estimators run `v1.Session`-style code which is more difficult to write correctly, and can behave unexpectedly, especially when combined with TF 2 code. Estimators do fall under our [compatibility guarantees](https://tensorflow.org/guide/versions), but will receive no fixes other than security vulnerabilities. See the [migration guide](https://tensorflow.org/guide/migrate) for details."
+        "> Warning: TensorFlow 2.15 included the final release of the `tf-estimator` package. Estimators will not be available in TensorFlow 2.16 or after. See the [migration guide](https://tensorflow.org/guide/migrate/migrating_estimator) for more information about how to convert off of Estimators."
       ]
     },
     {
@@ -293,14 +293,31 @@
         "pd.concat([dftrain, y_train], axis=1).groupby('sex').survived.mean().plot(kind='barh').set_xlabel('% survive')"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "qCHvgeorEsHa"
+      },
+      "source": [
+        "## Feature Engineering for the Model"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Dhcq8Ds4mCtm"
+      },
+      "source": [
+        "> Warning: The tf.feature_columns module described in this tutorial is not recommended for new code. Keras preprocessing layers cover this functionality, for migration instructions see the [Migrating feature columns guide](https://www.tensorflow.org/guide/migrate/migrating_feature_columns). The tf.feature_columns module was designed for use with TF1 Estimators. It does fall under our [compatibility guarantees](https://tensorflow.org/guide/versions), but will receive no fixes other than security vulnerabilities."
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "VqDKQLZn8L-B"
       },
       "source": [
-        "## Feature Engineering for the Model\n",
-        "Estimators use a system called [feature columns](https://www.tensorflow.org/guide/feature_columns) to describe how the model should interpret each of the raw input features. An Estimator expects a vector of numeric inputs, and *feature columns* describe how the model should convert each feature.\n",
+        "Estimators use a system called [feature columns](https://www.tensorflow.org/tutorials/structured_data/feature_columns) to describe how the model should interpret each of the raw input features. An Estimator expects a vector of numeric inputs, and *feature columns* describe how the model should convert each feature.\n",
         "\n",
         "Selecting and crafting the right set of feature columns is key to learning an effective model. A feature column can be either one of the raw inputs in the original features `dict` (a *base feature column*), or any new columns created using transformations defined over one or multiple base columns (a *derived feature columns*).\n",
         "\n",
@@ -583,8 +600,7 @@
     "colab": {
       "collapsed_sections": [],
       "name": "linear.ipynb",
-      "provenance": [],
-      "toc_visible": true
+            "toc_visible": true
     },
     "kernelspec": {
       "display_name": "Python 3",
diff --git a/site/en/tutorials/estimator/premade.ipynb b/site/en/tutorials/estimator/premade.ipynb
index a34096ea2b8..dc81847c7cd 100644
--- a/site/en/tutorials/estimator/premade.ipynb
+++ b/site/en/tutorials/estimator/premade.ipynb
@@ -68,7 +68,7 @@
         "id": "stQiPWL6ni6_"
       },
       "source": [
-        "> Warning: Estimators are not recommended for new code.  Estimators run `v1.Session`-style code which is more difficult to write correctly, and can behave unexpectedly, especially when combined with TF 2 code. Estimators do fall under [compatibility guarantees](https://tensorflow.org/guide/versions), but will receive no fixes other than security vulnerabilities. See the [migration guide](https://tensorflow.org/guide/migrate) for details."
+        "> Warning: TensorFlow 2.15 included the final release of the `tf-estimator` package. Estimators will not be available in TensorFlow 2.16 or after. See the [migration guide](https://tensorflow.org/guide/migrate/migrating_estimator) for more information about how to convert off of Estimators."
       ]
     },
     {
diff --git a/site/en/tutorials/generative/autoencoder.ipynb b/site/en/tutorials/generative/autoencoder.ipynb
index d2af1c3a345..1b2a6fcd2a8 100644
--- a/site/en/tutorials/generative/autoencoder.ipynb
+++ b/site/en/tutorials/generative/autoencoder.ipynb
@@ -6,9 +6,16 @@
         "id": "Ndo4ERqnwQOU"
       },
       "source": [
-        "##### Copyright 2020 The TensorFlow Authors."
+        "##### Copyright 2024 The TensorFlow Authors."
       ]
     },
+    {
+      "metadata": {
+        "id": "13rwRG5Jec7n"
+      },
+      "cell_type": "markdown",
+      "source": []
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -76,7 +83,7 @@
       "source": [
         "This tutorial introduces autoencoders with three examples: the basics, image denoising, and anomaly detection.\n",
         "\n",
-        "An autoencoder is a special type of neural network that is trained to copy its input to its output. For example, given an image of a handwritten digit, an autoencoder first encodes the image into a lower dimensional latent representation, then decodes the latent representation back to an image. An autoencoder learns to compress the data while minimizing the reconstruction error. \n",
+        "An autoencoder is a special type of neural network that is trained to copy its input to its output. For example, given an image of a handwritten digit, an autoencoder first encodes the image into a lower dimensional latent representation, then decodes the latent representation back to an image. An autoencoder learns to compress the data while minimizing the reconstruction error.\n",
         "\n",
         "To learn more about autoencoders, please consider reading chapter 14 from [Deep Learning](https://www.deeplearningbook.org/) by Ian Goodfellow, Yoshua Bengio, and Aaron Courville."
       ]
@@ -117,7 +124,7 @@
       },
       "source": [
         "## Load the dataset\n",
-        "To start, you will train the basic autoencoder using the Fashon MNIST dataset. Each image in this dataset is 28x28 pixels. "
+        "To start, you will train the basic autoencoder using the Fashion MNIST dataset. Each image in this dataset is 28x28 pixels."
       ]
     },
     {
@@ -159,27 +166,29 @@
       },
       "outputs": [],
       "source": [
-        "latent_dim = 64 \n",
-        "\n",
         "class Autoencoder(Model):\n",
-        "  def __init__(self, latent_dim):\n",
+        "  def __init__(self, latent_dim, shape):\n",
         "    super(Autoencoder, self).__init__()\n",
-        "    self.latent_dim = latent_dim   \n",
+        "    self.latent_dim = latent_dim\n",
+        "    self.shape = shape\n",
         "    self.encoder = tf.keras.Sequential([\n",
         "      layers.Flatten(),\n",
         "      layers.Dense(latent_dim, activation='relu'),\n",
         "    ])\n",
         "    self.decoder = tf.keras.Sequential([\n",
-        "      layers.Dense(784, activation='sigmoid'),\n",
-        "      layers.Reshape((28, 28))\n",
+        "      layers.Dense(tf.math.reduce_prod(shape).numpy(), activation='sigmoid'),\n",
+        "      layers.Reshape(shape)\n",
         "    ])\n",
         "\n",
         "  def call(self, x):\n",
         "    encoded = self.encoder(x)\n",
         "    decoded = self.decoder(encoded)\n",
         "    return decoded\n",
-        "  \n",
-        "autoencoder = Autoencoder(latent_dim) "
+        "\n",
+        "\n",
+        "shape = x_test.shape[1:]\n",
+        "latent_dim = 64\n",
+        "autoencoder = Autoencoder(latent_dim, shape)\n"
       ]
     },
     {
@@ -329,8 +338,8 @@
       "outputs": [],
       "source": [
         "noise_factor = 0.2\n",
-        "x_train_noisy = x_train + noise_factor * tf.random.normal(shape=x_train.shape) \n",
-        "x_test_noisy = x_test + noise_factor * tf.random.normal(shape=x_test.shape) \n",
+        "x_train_noisy = x_train + noise_factor * tf.random.normal(shape=x_train.shape)\n",
+        "x_test_noisy = x_test + noise_factor * tf.random.normal(shape=x_test.shape)\n",
         "\n",
         "x_train_noisy = tf.clip_by_value(x_train_noisy, clip_value_min=0., clip_value_max=1.)\n",
         "x_test_noisy = tf.clip_by_value(x_test_noisy, clip_value_min=0., clip_value_max=1.)"
@@ -492,7 +501,7 @@
       },
       "outputs": [],
       "source": [
-        "encoded_imgs = autoencoder.encoder(x_test).numpy()\n",
+        "encoded_imgs = autoencoder.encoder(x_test_noisy).numpy()\n",
         "decoded_imgs = autoencoder.decoder(encoded_imgs).numpy()"
       ]
     },
@@ -655,7 +664,7 @@
         "id": "wVcTBDo-CqFS"
       },
       "source": [
-        "Plot a normal ECG. "
+        "Plot a normal ECG."
       ]
     },
     {
@@ -719,12 +728,12 @@
         "      layers.Dense(32, activation=\"relu\"),\n",
         "      layers.Dense(16, activation=\"relu\"),\n",
         "      layers.Dense(8, activation=\"relu\")])\n",
-        "    \n",
+        "\n",
         "    self.decoder = tf.keras.Sequential([\n",
         "      layers.Dense(16, activation=\"relu\"),\n",
         "      layers.Dense(32, activation=\"relu\"),\n",
         "      layers.Dense(140, activation=\"sigmoid\")])\n",
-        "    \n",
+        "\n",
         "  def call(self, x):\n",
         "    encoded = self.encoder(x)\n",
         "    decoded = self.decoder(encoded)\n",
@@ -761,8 +770,8 @@
       },
       "outputs": [],
       "source": [
-        "history = autoencoder.fit(normal_train_data, normal_train_data, \n",
-        "          epochs=20, \n",
+        "history = autoencoder.fit(normal_train_data, normal_train_data,\n",
+        "          epochs=20,\n",
         "          batch_size=512,\n",
         "          validation_data=(test_data, test_data),\n",
         "          shuffle=True)"
@@ -906,7 +915,7 @@
         "id": "uEGlA1Be50Nj"
       },
       "source": [
-        "Note: There are other strategies you could use to select a threshold value above which test examples should be classified as anomalous, the correct approach will depend on your dataset. You can learn more with the links at the end of this tutorial. "
+        "Note: There are other strategies you could use to select a threshold value above which test examples should be classified as anomalous, the correct approach will depend on your dataset. You can learn more with the links at the end of this tutorial."
       ]
     },
     {
@@ -915,7 +924,7 @@
         "id": "zpLSDAeb51D_"
       },
       "source": [
-        "If you examine the reconstruction error for the anomalous examples in the test set, you'll notice most have greater reconstruction error than the threshold. By varing the threshold, you can adjust the [precision](https://developers.google.com/machine-learning/glossary#precision) and [recall](https://developers.google.com/machine-learning/glossary#recall) of your classifier. "
+        "If you examine the reconstruction error for the anomalous examples in the test set, you'll notice most have greater reconstruction error than the threshold. By varing the threshold, you can adjust the [precision](https://developers.google.com/machine-learning/glossary#precision) and [recall](https://developers.google.com/machine-learning/glossary#recall) of your classifier."
       ]
     },
     {
@@ -990,8 +999,18 @@
   "metadata": {
     "accelerator": "GPU",
     "colab": {
-      "collapsed_sections": [],
-      "name": "autoencoder.ipynb",
+      "gpuType": "T4",
+      "private_outputs": true,
+      "provenance": [
+        {
+          "file_id": "17gKB2bKebV2DzoYIMFzyEXA5uDnwWOvT",
+          "timestamp": 1712793165979
+        },
+        {
+          "file_id": "https://github.com/tensorflow/docs/blob/master/site/en/tutorials/generative/autoencoder.ipynb",
+          "timestamp": 1712792176273
+        }
+      ],
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/tutorials/generative/cyclegan.ipynb b/site/en/tutorials/generative/cyclegan.ipynb
index 7136dd143ef..313be519591 100644
--- a/site/en/tutorials/generative/cyclegan.ipynb
+++ b/site/en/tutorials/generative/cyclegan.ipynb
@@ -154,7 +154,7 @@
         "This is similar to what was done in [pix2pix](https://www.tensorflow.org/tutorials/generative/pix2pix#load_the_dataset)\n",
         "\n",
         "* In random jittering, the image is resized to `286 x 286` and then randomly cropped to `256 x 256`.\n",
-        "* In random mirroring, the image is randomly flipped horizontally i.e left to right."
+        "* In random mirroring, the image is randomly flipped horizontally i.e., left to right."
       ]
     },
     {
@@ -634,7 +634,7 @@
       "source": [
         "## Training\n",
         "\n",
-        "Note: This example model is trained for fewer epochs (40) than the paper (200) to keep training time reasonable for this tutorial. Predictions may be less accurate. "
+        "Note: This example model is trained for fewer epochs (10) than the paper (200) to keep training time reasonable for this tutorial. The generated images will have much lower quality."
       ]
     },
     {
@@ -645,7 +645,7 @@
       },
       "outputs": [],
       "source": [
-        "EPOCHS = 40"
+        "EPOCHS = 10"
       ]
     },
     {
@@ -830,8 +830,7 @@
     "colab": {
       "collapsed_sections": [],
       "name": "cyclegan.ipynb",
-      "provenance": [],
-      "toc_visible": true
+            "toc_visible": true
     },
     "kernelspec": {
       "display_name": "Python 3",
diff --git a/site/en/tutorials/generative/data_compression.ipynb b/site/en/tutorials/generative/data_compression.ipynb
new file mode 100644
index 00000000000..f756f088acd
--- /dev/null
+++ b/site/en/tutorials/generative/data_compression.ipynb
@@ -0,0 +1,901 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Tce3stUlHN0L"
+      },
+      "source": [
+        "##### Copyright 2022 The TensorFlow Compression Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "tuOe1ymfHZPu"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "qFdPvlXBOdUN"
+      },
+      "source": [
+        "# Learned data compression"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfBg1C5NB3X0"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/tutorials/generative/data_compression\">\n",
+        "    <img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />\n",
+        "    View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/generative/data_compression.ipynb\">\n",
+        "    <img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />\n",
+        "    Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/tutorials/generative/data_compression.ipynb\">\n",
+        "    <img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />\n",
+        "    View source on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/tutorials/generative/data_compression.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "xHxb-dlhMIzW"
+      },
+      "source": [
+        "## Overview\n",
+        "\n",
+        "This notebook shows how to do lossy data compression using neural networks and [TensorFlow Compression](https://github.com/tensorflow/compression).\n",
+        "\n",
+        "Lossy compression involves making a trade-off between **rate**, the expected number of bits needed to encode a sample, and **distortion**, the expected error in the reconstruction of the sample.\n",
+        "\n",
+        "The examples below use an autoencoder-like model to compress images from the MNIST dataset. The method is based on the paper [End-to-end Optimized Image Compression](https://arxiv.org/abs/1611.01704).\n",
+        "\n",
+        "More background on learned data compression can be found in [this paper](https://arxiv.org/abs/2007.03034) targeted at people familiar with classical data compression, or [this survey](https://arxiv.org/abs/2202.06533) targeted at a machine learning audience.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MUXex9ctTuDB"
+      },
+      "source": [
+        "## Setup\n",
+        "\n",
+        "Install Tensorflow Compression via `pip`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "K489KsEgxuLI"
+      },
+      "outputs": [],
+      "source": [
+        "%%bash\n",
+        "# Installs the latest version of TFC compatible with the installed TF version.\n",
+        "\n",
+        "read MAJOR MINOR <<< \"$(pip show tensorflow | perl -p -0777 -e 's/.*Version: (\\d+)\\.(\\d+).*/\\1 \\2/sg')\"\n",
+        "pip install \"tensorflow-compression<$MAJOR.$(($MINOR+1))\"\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WfVAmHCVxpTS"
+      },
+      "source": [
+        "Import library dependencies."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "IqR2PQG4ZaZ0"
+      },
+      "outputs": [],
+      "source": [
+        "import matplotlib.pyplot as plt\n",
+        "import tensorflow as tf\n",
+        "import tensorflow_compression as tfc\n",
+        "import tensorflow_datasets as tfds\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wsncKT2iymgQ"
+      },
+      "source": [
+        "## Define the trainer model.\n",
+        "\n",
+        "Because the model resembles an autoencoder, and we need to perform a different set of functions during training and inference, the setup is a little different from, say, a classifier.\n",
+        "\n",
+        "The training model consists of three parts:\n",
+        "- the **analysis** (or encoder) transform, converting from the image into a latent space,\n",
+        "- the **synthesis** (or decoder) transform, converting from the latent space back into image space, and\n",
+        "- a **prior** and entropy model, modeling the marginal probabilities of the latents.\n",
+        "\n",
+        "First, define the transforms:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "8yZESLgW-vp1"
+      },
+      "outputs": [],
+      "source": [
+        "def make_analysis_transform(latent_dims):\n",
+        "  \"\"\"Creates the analysis (encoder) transform.\"\"\"\n",
+        "  return tf.keras.Sequential([\n",
+        "      tf.keras.layers.Conv2D(\n",
+        "          20, 5, use_bias=True, strides=2, padding=\"same\",\n",
+        "          activation=\"leaky_relu\", name=\"conv_1\"),\n",
+        "      tf.keras.layers.Conv2D(\n",
+        "          50, 5, use_bias=True, strides=2, padding=\"same\",\n",
+        "          activation=\"leaky_relu\", name=\"conv_2\"),\n",
+        "      tf.keras.layers.Flatten(),\n",
+        "      tf.keras.layers.Dense(\n",
+        "          500, use_bias=True, activation=\"leaky_relu\", name=\"fc_1\"),\n",
+        "      tf.keras.layers.Dense(\n",
+        "          latent_dims, use_bias=True, activation=None, name=\"fc_2\"),\n",
+        "  ], name=\"analysis_transform\")\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2sHdYBzF2xcu"
+      },
+      "outputs": [],
+      "source": [
+        "def make_synthesis_transform():\n",
+        "  \"\"\"Creates the synthesis (decoder) transform.\"\"\"\n",
+        "  return tf.keras.Sequential([\n",
+        "      tf.keras.layers.Dense(\n",
+        "          500, use_bias=True, activation=\"leaky_relu\", name=\"fc_1\"),\n",
+        "      tf.keras.layers.Dense(\n",
+        "          2450, use_bias=True, activation=\"leaky_relu\", name=\"fc_2\"),\n",
+        "      tf.keras.layers.Reshape((7, 7, 50)),\n",
+        "      tf.keras.layers.Conv2DTranspose(\n",
+        "          20, 5, use_bias=True, strides=2, padding=\"same\",\n",
+        "          activation=\"leaky_relu\", name=\"conv_1\"),\n",
+        "      tf.keras.layers.Conv2DTranspose(\n",
+        "          1, 5, use_bias=True, strides=2, padding=\"same\",\n",
+        "          activation=\"leaky_relu\", name=\"conv_2\"),\n",
+        "  ], name=\"synthesis_transform\")\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "lYC8tHhkxTlK"
+      },
+      "source": [
+        "The trainer holds an instance of both transforms, as well as the parameters of the prior.\n",
+        "\n",
+        "Its `call` method is set up to compute:\n",
+        "- **rate**, an estimate of the number of bits needed to represent the batch of digits, and\n",
+        "- **distortion**, the mean absolute difference between the pixels of the original digits and their reconstructions.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ROn2DbzsBirI"
+      },
+      "outputs": [],
+      "source": [
+        "class MNISTCompressionTrainer(tf.keras.Model):\n",
+        "  \"\"\"Model that trains a compressor/decompressor for MNIST.\"\"\"\n",
+        "\n",
+        "  def __init__(self, latent_dims):\n",
+        "    super().__init__()\n",
+        "    self.analysis_transform = make_analysis_transform(latent_dims)\n",
+        "    self.synthesis_transform = make_synthesis_transform()\n",
+        "    self.prior_log_scales = tf.Variable(tf.zeros((latent_dims,)))\n",
+        "\n",
+        "  @property\n",
+        "  def prior(self):\n",
+        "    return tfc.NoisyLogistic(loc=0., scale=tf.exp(self.prior_log_scales))\n",
+        "\n",
+        "  def call(self, x, training):\n",
+        "    \"\"\"Computes rate and distortion losses.\"\"\"\n",
+        "    # Ensure inputs are floats in the range (0, 1).\n",
+        "    x = tf.cast(x, self.compute_dtype) / 255.\n",
+        "    x = tf.reshape(x, (-1, 28, 28, 1))\n",
+        "\n",
+        "    # Compute latent space representation y, perturb it and model its entropy,\n",
+        "    # then compute the reconstructed pixel-level representation x_hat.\n",
+        "    y = self.analysis_transform(x)\n",
+        "    entropy_model = tfc.ContinuousBatchedEntropyModel(\n",
+        "        self.prior, coding_rank=1, compression=False)\n",
+        "    y_tilde, rate = entropy_model(y, training=training)\n",
+        "    x_tilde = self.synthesis_transform(y_tilde)\n",
+        "\n",
+        "    # Average number of bits per MNIST digit.\n",
+        "    rate = tf.reduce_mean(rate)\n",
+        "\n",
+        "    # Mean absolute difference across pixels.\n",
+        "    distortion = tf.reduce_mean(abs(x - x_tilde))\n",
+        "\n",
+        "    return dict(rate=rate, distortion=distortion)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vEXbp9RV3kRX"
+      },
+      "source": [
+        "### Compute rate and distortion.\n",
+        "\n",
+        "Let's walk through this step by step, using one image from the training set. Load the MNIST dataset for training and validation:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "7FV99WTrIBen"
+      },
+      "outputs": [],
+      "source": [
+        "training_dataset, validation_dataset = tfds.load(\n",
+        "    \"mnist\",\n",
+        "    split=[\"train\", \"test\"],\n",
+        "    shuffle_files=True,\n",
+        "    as_supervised=True,\n",
+        "    with_info=False,\n",
+        ")\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "SwKgNTg_QfjH"
+      },
+      "source": [
+        "And extract one image $x$:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "O-BSdeHcPBBf"
+      },
+      "outputs": [],
+      "source": [
+        "(x, _), = validation_dataset.take(1)\n",
+        "\n",
+        "plt.imshow(tf.squeeze(x))\n",
+        "print(f\"Data type: {x.dtype}\")\n",
+        "print(f\"Shape: {x.shape}\")\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "V8IvuFkrRJIa"
+      },
+      "source": [
+        "To get the latent representation $y$, we need to cast it to `float32`, add a batch dimension, and pass it through the analysis transform."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "jA0DOWq23lEq"
+      },
+      "outputs": [],
+      "source": [
+        "x = tf.cast(x, tf.float32) / 255.\n",
+        "x = tf.reshape(x, (-1, 28, 28, 1))\n",
+        "y = make_analysis_transform(10)(x)\n",
+        "\n",
+        "print(\"y:\", y)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rTojJQvZT8SX"
+      },
+      "source": [
+        "The latents will be quantized at test time. To model this in a differentiable way during training, we add uniform noise in the interval $(-.5, .5)$ and call the result $\\tilde y$. This is the same terminology as used in the paper [End-to-end Optimized Image Compression](https://arxiv.org/abs/1611.01704)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Spr3503OUOFQ"
+      },
+      "outputs": [],
+      "source": [
+        "y_tilde = y + tf.random.uniform(y.shape, -.5, .5)\n",
+        "\n",
+        "print(\"y_tilde:\", y_tilde)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "7hRN89R7SA3U"
+      },
+      "source": [
+        "The \"prior\" is a probability density that we train to model the marginal distribution of the noisy latents. For example, it could be a set of independent [logistic distributions](https://en.wikipedia.org/wiki/Logistic_distribution) with different scales for each latent dimension. `tfc.NoisyLogistic` accounts for the fact that the latents have additive noise. As the scale approaches zero, a logistic distribution approaches a dirac delta (spike), but the added noise causes the \"noisy\" distribution to approach the uniform distribution instead."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2tmA1Bw7ReMY"
+      },
+      "outputs": [],
+      "source": [
+        "prior = tfc.NoisyLogistic(loc=0., scale=tf.linspace(.01, 2., 10))\n",
+        "\n",
+        "_ = tf.linspace(-6., 6., 501)[:, None]\n",
+        "plt.plot(_, prior.prob(_));\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2NSWtBZmUvVY"
+      },
+      "source": [
+        "During training, `tfc.ContinuousBatchedEntropyModel` adds uniform noise, and uses the noise and the prior to compute a (differentiable) upper bound on the rate (the average number of bits necessary to encode the latent representation). That bound can be minimized as a loss."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "hFuGlyJuThBC"
+      },
+      "outputs": [],
+      "source": [
+        "entropy_model = tfc.ContinuousBatchedEntropyModel(\n",
+        "    prior, coding_rank=1, compression=False)\n",
+        "y_tilde, rate = entropy_model(y, training=True)\n",
+        "\n",
+        "print(\"rate:\", rate)\n",
+        "print(\"y_tilde:\", y_tilde)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Cyr8DGgmWd32"
+      },
+      "source": [
+        "Lastly, the noisy latents are passed back through the synthesis transform to produce an image reconstruction $\\tilde x$. Distortion is the error between original image and reconstruction. Obviously, with the transforms untrained, the reconstruction is not very useful."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "gtmI0xGEVym0"
+      },
+      "outputs": [],
+      "source": [
+        "x_tilde = make_synthesis_transform()(y_tilde)\n",
+        "\n",
+        "# Mean absolute difference across pixels.\n",
+        "distortion = tf.reduce_mean(abs(x - x_tilde))\n",
+        "print(\"distortion:\", distortion)\n",
+        "\n",
+        "x_tilde = tf.saturate_cast(x_tilde[0] * 255, tf.uint8)\n",
+        "plt.imshow(tf.squeeze(x_tilde))\n",
+        "print(f\"Data type: {x_tilde.dtype}\")\n",
+        "print(f\"Shape: {x_tilde.shape}\")\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "UVz3I7E8ecij"
+      },
+      "source": [
+        "For every batch of digits, calling the `MNISTCompressionTrainer` produces the rate and distortion as an average over that batch:\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ICJnjj1LeB8L"
+      },
+      "outputs": [],
+      "source": [
+        "(example_batch, _), = validation_dataset.batch(32).take(1)\n",
+        "trainer = MNISTCompressionTrainer(10)\n",
+        "example_output = trainer(example_batch)\n",
+        "\n",
+        "print(\"rate: \", example_output[\"rate\"])\n",
+        "print(\"distortion: \", example_output[\"distortion\"])\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "lgdfRtmee5Mn"
+      },
+      "source": [
+        "In the next section, we set up the model to do gradient descent on these two losses."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fKGVwv5MAq6w"
+      },
+      "source": [
+        "## Train the model.\n",
+        "\n",
+        "We compile the trainer in a way that it optimizes the rate–distortion Lagrangian, that is, a sum of rate and distortion, where one of the terms is weighted by Lagrange parameter $\\lambda$.\n",
+        "\n",
+        "This loss function affects the different parts of the model differently:\n",
+        "- The analysis transform is trained to produce a latent representation that achieves the desired trade-off between rate and distortion.\n",
+        "- The synthesis transform is trained to minimize distortion, given the latent representation.\n",
+        "- The parameters of the prior are trained to minimize the rate given the latent representation. This is identical to fitting the prior to the marginal distribution of latents in a maximum likelihood sense."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "k5mm1aDkcgAf"
+      },
+      "outputs": [],
+      "source": [
+        "def pass_through_loss(_, x):\n",
+        "  # Since rate and distortion are unsupervised, the loss doesn't need a target.\n",
+        "  return x\n",
+        "\n",
+        "def make_mnist_compression_trainer(lmbda, latent_dims=50):\n",
+        "  trainer = MNISTCompressionTrainer(latent_dims)\n",
+        "  trainer.compile(\n",
+        "    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),\n",
+        "    # Just pass through rate and distortion as losses/metrics.\n",
+        "    loss=dict(rate=pass_through_loss, distortion=pass_through_loss),\n",
+        "    metrics=dict(rate=pass_through_loss, distortion=pass_through_loss),\n",
+        "    loss_weights=dict(rate=1., distortion=lmbda),\n",
+        "  )\n",
+        "  return trainer\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DPwd4DTs3Mfr"
+      },
+      "source": [
+        "Next, train the model. The human annotations are not necessary here, since we just want to compress the images, so we drop them using a `map` and instead add \"dummy\" targets for rate and distortion."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "QNBpCTgzAV7M"
+      },
+      "outputs": [],
+      "source": [
+        "def add_rd_targets(image, label):\n",
+        "  # Training is unsupervised, so labels aren't necessary here. However, we\n",
+        "  # need to add \"dummy\" targets for rate and distortion.\n",
+        "  return image, dict(rate=0., distortion=0.)\n",
+        "\n",
+        "def train_mnist_model(lmbda):\n",
+        "  trainer = make_mnist_compression_trainer(lmbda)\n",
+        "  trainer.fit(\n",
+        "      training_dataset.map(add_rd_targets).batch(128).prefetch(8),\n",
+        "      epochs=15,\n",
+        "      validation_data=validation_dataset.map(add_rd_targets).batch(128).cache(),\n",
+        "      validation_freq=1,\n",
+        "      verbose=1,\n",
+        "  )\n",
+        "  return trainer\n",
+        "\n",
+        "trainer = train_mnist_model(lmbda=2000)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Td4xuttmCd7T"
+      },
+      "source": [
+        "## Compress some MNIST images.\n",
+        "\n",
+        "For compression and decompression at test time, we split the trained model in two parts:\n",
+        "\n",
+        "- The encoder side consists of the analysis transform and the entropy model.\n",
+        "- The decoder side consists of the synthesis transform and the same entropy model.\n",
+        "\n",
+        "At test time, the latents will not have additive noise, but they will be quantized and then losslessly compressed, so we give them new names. We call them and the image reconstruction $\\hat x$ and $\\hat y$, respectively (following [End-to-end Optimized Image Compression](https://arxiv.org/abs/1611.01704))."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "sBRAPa5jksss"
+      },
+      "outputs": [],
+      "source": [
+        "class MNISTCompressor(tf.keras.Model):\n",
+        "  \"\"\"Compresses MNIST images to strings.\"\"\"\n",
+        "\n",
+        "  def __init__(self, analysis_transform, entropy_model):\n",
+        "    super().__init__()\n",
+        "    self.analysis_transform = analysis_transform\n",
+        "    self.entropy_model = entropy_model\n",
+        "\n",
+        "  def call(self, x):\n",
+        "    # Ensure inputs are floats in the range (0, 1).\n",
+        "    x = tf.cast(x, self.compute_dtype) / 255.\n",
+        "    y = self.analysis_transform(x)\n",
+        "    # Also return the exact information content of each digit.\n",
+        "    _, bits = self.entropy_model(y, training=False)\n",
+        "    return self.entropy_model.compress(y), bits\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "sSZ0X2xPnkN-"
+      },
+      "outputs": [],
+      "source": [
+        "class MNISTDecompressor(tf.keras.Model):\n",
+        "  \"\"\"Decompresses MNIST images from strings.\"\"\"\n",
+        "\n",
+        "  def __init__(self, entropy_model, synthesis_transform):\n",
+        "    super().__init__()\n",
+        "    self.entropy_model = entropy_model\n",
+        "    self.synthesis_transform = synthesis_transform\n",
+        "\n",
+        "  def call(self, string):\n",
+        "    y_hat = self.entropy_model.decompress(string, ())\n",
+        "    x_hat = self.synthesis_transform(y_hat)\n",
+        "    # Scale and cast back to 8-bit integer.\n",
+        "    return tf.saturate_cast(tf.round(x_hat * 255.), tf.uint8)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GI7rxeOUDnaC"
+      },
+      "source": [
+        "When instantiated with `compression=True`, the entropy model converts the learned prior into tables for a range coding algorithm. When calling `compress()`, this algorithm is invoked to convert the latent space vector into bit sequences. The length of each binary string approximates the information content of the latent (the negative log likelihood of the latent under the prior).\n",
+        "\n",
+        "The entropy model for compression and decompression must be the same instance, because the range coding tables need to be exactly identical on both sides. Otherwise, decoding errors can occur."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Dnm_p7mbnigo"
+      },
+      "outputs": [],
+      "source": [
+        "def make_mnist_codec(trainer, **kwargs):\n",
+        "  # The entropy model must be created with `compression=True` and the same\n",
+        "  # instance must be shared between compressor and decompressor.\n",
+        "  entropy_model = tfc.ContinuousBatchedEntropyModel(\n",
+        "      trainer.prior, coding_rank=1, compression=True, **kwargs)\n",
+        "  compressor = MNISTCompressor(trainer.analysis_transform, entropy_model)\n",
+        "  decompressor = MNISTDecompressor(entropy_model, trainer.synthesis_transform)\n",
+        "  return compressor, decompressor\n",
+        "\n",
+        "compressor, decompressor = make_mnist_codec(trainer)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "SYu5sVVH3YMv"
+      },
+      "source": [
+        "Grab 16 images from the validation dataset. You can select a different subset by changing the argument to `skip`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "qAxArlU728K5"
+      },
+      "outputs": [],
+      "source": [
+        "(originals, _), = validation_dataset.batch(16).skip(3).take(1)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "CHeN_ny929YS"
+      },
+      "source": [
+        "Compress them to strings, and keep track of each of their information content in bits."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "smOk42gQ3IXv"
+      },
+      "outputs": [],
+      "source": [
+        "strings, entropies = compressor(originals)\n",
+        "\n",
+        "print(f\"String representation of first digit in hexadecimal: 0x{strings[0].numpy().hex()}\")\n",
+        "print(f\"Number of bits actually needed to represent it: {entropies[0]:0.2f}\")\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5j9R4bTT3Qhl"
+      },
+      "source": [
+        "Decompress the images back from the strings."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "yOP6pEqU3P0w"
+      },
+      "outputs": [],
+      "source": [
+        "reconstructions = decompressor(strings)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JWo0Q-vy23tt"
+      },
+      "source": [
+        "Display each of the 16 original digits together with its compressed binary representation, and the reconstructed digit."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "jU5IqzZzeEpf"
+      },
+      "outputs": [],
+      "source": [
+        "#@title\n",
+        "\n",
+        "def display_digits(originals, strings, entropies, reconstructions):\n",
+        "  \"\"\"Visualizes 16 digits together with their reconstructions.\"\"\"\n",
+        "  fig, axes = plt.subplots(4, 4, sharex=True, sharey=True, figsize=(12.5, 5))\n",
+        "  axes = axes.ravel()\n",
+        "  for i in range(len(axes)):\n",
+        "    image = tf.concat([\n",
+        "        tf.squeeze(originals[i]),\n",
+        "        tf.zeros((28, 14), tf.uint8),\n",
+        "        tf.squeeze(reconstructions[i]),\n",
+        "    ], 1)\n",
+        "    axes[i].imshow(image)\n",
+        "    axes[i].text(\n",
+        "        .5, .5, f\"→ 0x{strings[i].numpy().hex()} →\\n{entropies[i]:0.2f} bits\",\n",
+        "        ha=\"center\", va=\"top\", color=\"white\", fontsize=\"small\",\n",
+        "        transform=axes[i].transAxes)\n",
+        "    axes[i].axis(\"off\")\n",
+        "  plt.subplots_adjust(wspace=0, hspace=0, left=0, right=1, bottom=0, top=1)\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "km9PqVEtPJPc"
+      },
+      "outputs": [],
+      "source": [
+        "display_digits(originals, strings, entropies, reconstructions)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "EzlrIOiYOzJc"
+      },
+      "source": [
+        "Note that the length of the encoded string differs from the information content of each digit.\n",
+        "\n",
+        "This is because the range coding process works with discrete probabilities, and has a small amount of overhead. So, especially for short strings, the correspondence is only approximate. However, range coding is **asymptotically optimal**: in the limit, the expected bit count will approach the cross entropy (the expected information content), for which the rate term in the training model is an upper bound."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "78qIG8t8FvJW"
+      },
+      "source": [
+        "## The rate–distortion trade-off\n",
+        "\n",
+        "Above, the model was trained for a specific trade-off (given by `lmbda=2000`) between the average number of bits used to represent each digit and the incurred error in the reconstruction.\n",
+        "\n",
+        "What happens when we repeat the experiment with different values?\n",
+        "\n",
+        "Let's start by reducing $\\lambda$ to 500."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "1iFcAD0WF78p"
+      },
+      "outputs": [],
+      "source": [
+        "def train_and_visualize_model(lmbda):\n",
+        "  trainer = train_mnist_model(lmbda=lmbda)\n",
+        "  compressor, decompressor = make_mnist_codec(trainer)\n",
+        "  strings, entropies = compressor(originals)\n",
+        "  reconstructions = decompressor(strings)\n",
+        "  display_digits(originals, strings, entropies, reconstructions)\n",
+        "\n",
+        "train_and_visualize_model(lmbda=500)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Uy5OkgJMObMc"
+      },
+      "source": [
+        "The bit rate of our code goes down, as does the fidelity of the digits. However, most of the digits remain recognizable.\n",
+        "\n",
+        "Let's reduce $\\lambda$ further."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "NQp9_9_5GcxH"
+      },
+      "outputs": [],
+      "source": [
+        "train_and_visualize_model(lmbda=300)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3ELLMANN1OwMQ"
+      },
+      "source": [
+        "The strings begin to get much shorter now, on the order of one byte per digit. However, this comes at a cost. More digits are becoming unrecognizable.\n",
+        "\n",
+        "This demonstrates that this model is agnostic to human perceptions of error, it just measures the absolute deviation in terms of pixel values. To achieve a better perceived image quality, we would need to replace the pixel loss with a perceptual loss."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "v9cWHtH0LP_r"
+      },
+      "source": [
+        "## Use the decoder as a generative model.\n",
+        "\n",
+        "If we feed the decoder random bits, this will effectively sample from the distribution that the model learned to represent digits.\n",
+        "\n",
+        "First, re-instantiate the compressor/decompressor without a sanity check that would detect if the input string isn't completely decoded."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "qnic8YsM0_ke"
+      },
+      "outputs": [],
+      "source": [
+        "compressor, decompressor = make_mnist_codec(trainer, decode_sanity_check=False)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "86uc9_Is1eeo"
+      },
+      "source": [
+        "Now, feed long enough random strings into the decompressor so that it can decode/sample digits from them."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "o4fP7BkqKCHY"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "\n",
+        "strings = tf.constant([os.urandom(8) for _ in range(16)])\n",
+        "samples = decompressor(strings)\n",
+        "\n",
+        "fig, axes = plt.subplots(4, 4, sharex=True, sharey=True, figsize=(5, 5))\n",
+        "axes = axes.ravel()\n",
+        "for i in range(len(axes)):\n",
+        "  axes[i].imshow(tf.squeeze(samples[i]))\n",
+        "  axes[i].axis(\"off\")\n",
+        "plt.subplots_adjust(wspace=0, hspace=0, left=0, right=1, bottom=0, top=1)\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [],
+      "name": "data_compression.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/tutorials/generative/deepdream.ipynb b/site/en/tutorials/generative/deepdream.ipynb
index e6d0d85fc17..e4a675ebed6 100644
--- a/site/en/tutorials/generative/deepdream.ipynb
+++ b/site/en/tutorials/generative/deepdream.ipynb
@@ -103,9 +103,7 @@
         "import matplotlib as mpl\n",
         "\n",
         "import IPython.display as display\n",
-        "import PIL.Image\n",
-        "\n",
-        "from tensorflow.keras.preprocessing import image"
+        "import PIL.Image"
       ]
     },
     {
@@ -514,19 +512,20 @@
         "  @tf.function(\n",
         "      input_signature=(\n",
         "        tf.TensorSpec(shape=[None,None,3], dtype=tf.float32),\n",
+        "        tf.TensorSpec(shape=[2], dtype=tf.int32),\n",
         "        tf.TensorSpec(shape=[], dtype=tf.int32),)\n",
         "  )\n",
-        "  def __call__(self, img, tile_size=512):\n",
+        "  def __call__(self, img, img_size, tile_size=512):\n",
         "    shift, img_rolled = random_roll(img, tile_size)\n",
         "\n",
         "    # Initialize the image gradients to zero.\n",
         "    gradients = tf.zeros_like(img_rolled)\n",
         "    \n",
         "    # Skip the last tile, unless there's only one tile.\n",
-        "    xs = tf.range(0, img_rolled.shape[0], tile_size)[:-1]\n",
+        "    xs = tf.range(0, img_size[1], tile_size)[:-1]\n",
         "    if not tf.cast(len(xs), bool):\n",
         "      xs = tf.constant([0])\n",
-        "    ys = tf.range(0, img_rolled.shape[1], tile_size)[:-1]\n",
+        "    ys = tf.range(0, img_size[0], tile_size)[:-1]\n",
         "    if not tf.cast(len(ys), bool):\n",
         "      ys = tf.constant([0])\n",
         "\n",
@@ -539,7 +538,7 @@
         "          tape.watch(img_rolled)\n",
         "\n",
         "          # Extract a tile out of the image.\n",
-        "          img_tile = img_rolled[x:x+tile_size, y:y+tile_size]\n",
+        "          img_tile = img_rolled[y:y+tile_size, x:x+tile_size]\n",
         "          loss = calc_loss(img_tile, self.model)\n",
         "\n",
         "        # Update the image gradients for this tile.\n",
@@ -585,7 +584,7 @@
         "def run_deep_dream_with_octaves(img, steps_per_octave=100, step_size=0.01, \n",
         "                                octaves=range(-2,3), octave_scale=1.3):\n",
         "  base_shape = tf.shape(img)\n",
-        "  img = tf.keras.preprocessing.image.img_to_array(img)\n",
+        "  img = tf.keras.utils.img_to_array(img)\n",
         "  img = tf.keras.applications.inception_v3.preprocess_input(img)\n",
         "\n",
         "  initial_shape = img.shape[:-1]\n",
@@ -593,10 +592,11 @@
         "  for octave in octaves:\n",
         "    # Scale the image based on the octave\n",
         "    new_size = tf.cast(tf.convert_to_tensor(base_shape[:-1]), tf.float32)*(octave_scale**octave)\n",
-        "    img = tf.image.resize(img, tf.cast(new_size, tf.int32))\n",
+        "    new_size = tf.cast(new_size, tf.int32)\n",
+        "    img = tf.image.resize(img, new_size)\n",
         "\n",
         "    for step in range(steps_per_octave):\n",
-        "      gradients = get_tiled_gradients(img)\n",
+        "      gradients = get_tiled_gradients(img, new_size)\n",
         "      img = img + gradients*step_size\n",
         "      img = tf.clip_by_value(img, -1, 1)\n",
         "\n",
diff --git a/site/en/tutorials/generative/pix2pix.ipynb b/site/en/tutorials/generative/pix2pix.ipynb
index 82cf08dfab7..e380924d04d 100644
--- a/site/en/tutorials/generative/pix2pix.ipynb
+++ b/site/en/tutorials/generative/pix2pix.ipynb
@@ -72,13 +72,13 @@
       "source": [
         "This tutorial demonstrates how to build and train a conditional generative adversarial network (cGAN) called pix2pix that learns a mapping from input images to output images, as described in [Image-to-image translation with conditional adversarial networks](https://arxiv.org/abs/1611.07004) by Isola et al. (2017). pix2pix is not application specific—it can be applied to a wide range of tasks, including synthesizing photos from label maps, generating colorized photos from black and white images, turning Google Maps photos into aerial images, and even transforming sketches into photos.\n",
         "\n",
-        "In this example, your network will generate images of building facades using the [CMP Facade Database](http://cmp.felk.cvut.cz/~tylecr1/facade/) provided by the [Center for Machine Perception](http://cmp.felk.cvut.cz/) at the [Czech Technical University in Prague](https://www.cvut.cz/). To keep it short, you will use a [preprocessed copy]((https://people.eecs.berkeley.edu/~tinghuiz/projects/pix2pix/datasets/)) of this dataset created by the pix2pix authors.\n",
+        "In this example, your network will generate images of building facades using the [CMP Facade Database](http://cmp.felk.cvut.cz/~tylecr1/facade/) provided by the [Center for Machine Perception](http://cmp.felk.cvut.cz/) at the [Czech Technical University in Prague](https://www.cvut.cz/). To keep it short, you will use a [preprocessed copy](https://efrosgans.eecs.berkeley.edu/pix2pix/datasets/) of this dataset created by the pix2pix authors.\n",
         "\n",
         "In the pix2pix cGAN, you condition on input images and generate corresponding output images. cGANs were first proposed in [Conditional Generative Adversarial Nets](https://arxiv.org/abs/1411.1784) (Mirza and Osindero, 2014)\n",
         "\n",
         "The architecture of your network will contain:\n",
         "\n",
-        "- A generator with a [U-Net]([U-Net](https://arxiv.org/abs/1505.04597))-based architecture.\n",
+        "- A generator with a [U-Net](https://arxiv.org/abs/1505.04597)-based architecture.\n",
         "- A discriminator represented by a convolutional PatchGAN classifier (proposed in the [pix2pix paper](https://arxiv.org/abs/1611.07004)).\n",
         "\n",
         "Note that each epoch can take around 15 seconds on a single V100 GPU.\n",
@@ -125,7 +125,7 @@
       "source": [
         "## Load the dataset\n",
         "\n",
-        "Download the CMP Facade Database data (30MB). Additional datasets are available in the same format [here](http://efrosgans.eecs.berkeley.edu/pix2pix/datasets/). In Colab you can select other datasets from the drop-down menu. Note that some of the other datasets are significantly larger (`edges2handbags` is 8GB). "
+        "Download the CMP Facade Database data (30MB). Additional datasets are available in the same format [here](http://efrosgans.eecs.berkeley.edu/pix2pix/datasets/). In Colab you can select other datasets from the drop-down menu. Note that some of the other datasets are significantly larger (`edges2handbags` is 8GB in size). "
       ]
     },
     {
@@ -156,7 +156,9 @@
         "\n",
         "path_to_zip  = pathlib.Path(path_to_zip)\n",
         "\n",
-        "PATH = path_to_zip.parent/dataset_name"
+        "extraction_dir = f'{dataset_name}_extracted/{dataset_name}'\n",
+        "\n",
+        "PATH = path_to_zip.parent/extraction_dir"
       ]
     },
     {
@@ -226,7 +228,7 @@
         "def load(image_file):\n",
         "  # Read and decode an image file to a uint8 tensor\n",
         "  image = tf.io.read_file(image_file)\n",
-        "  image = tf.image.decode_jpeg(image)\n",
+        "  image = tf.io.decode_jpeg(image)\n",
         "\n",
         "  # Split each image tensor into two tensors:\n",
         "  # - one with a real building facade image\n",
@@ -280,7 +282,7 @@
         "\n",
         "1. Resize each `256 x 256` image to a larger height and width—`286 x 286`.\n",
         "2. Randomly crop it back to `256 x 256`.\n",
-        "3. Randomly flip the image horizontally i.e. left to right (random mirroring).\n",
+        "3. Randomly flip the image horizontally i.e., left to right (random mirroring).\n",
         "4. Normalize the images to the `[-1, 1]` range."
       ]
     },
@@ -490,7 +492,7 @@
       "source": [
         "## Build the generator\n",
         "\n",
-        "The generator of your pix2pix cGAN is a _modified_ [U-Net](https://arxiv.org/abs/1505.04597). A U-Net consists of an encoder (downsampler) and decoder (upsampler). (You can find out more about it in the [Image segmentation](https://www.tensorflow.org/tutorials/images/segmentation) tutorial and on the [U-Net project website](https://lmb.informatik.uni-freiburg.de/people/ronneber/u-net/).)\n",
+        "The generator of your pix2pix cGAN is a _modified_ [U-Net](https://arxiv.org/abs/1505.04597). A U-Net consists of an encoder (downsampler) and decoder (upsampler). (You can find out more about it in the [Image segmentation](../images/segmentation.ipynb) tutorial and on the [U-Net project website](https://lmb.informatik.uni-freiburg.de/people/ronneber/u-net/).)\n",
         "\n",
         "- Each block in the encoder is: Convolution -> Batch normalization -> Leaky ReLU\n",
         "- Each block in the decoder is: Transposed convolution -> Batch normalization -> Dropout (applied to the first 3 blocks) -> ReLU\n",
@@ -1007,8 +1009,7 @@
         "id": "Rb0QQFHF-JfS"
       },
       "source": [
-        "Note: The `training=True` is intentional here since\n",
-        "you want the batch statistics, while running the model on the test dataset. If you use `training=False`, you get the accumulated statistics learned from the training dataset (which you don't want)."
+        "Note: The `training=True` is intentional here since you want the batch statistics, while running the model on the test dataset. If you use `training=False`, you get the accumulated statistics learned from the training dataset (which you don't want)."
       ]
     },
     {
@@ -1181,7 +1182,8 @@
         "\n",
         "If you work on a local machine, you would launch a separate TensorBoard process. When working in a notebook, launch the viewer before starting the training to monitor with TensorBoard.\n",
         "\n",
-        "To launch the viewer paste the following into a code-cell:"
+        "Launch the TensorBoard viewer (Sorry, this doesn't\n",
+        "display on tensorflow.org):"
       ]
     },
     {
@@ -1199,72 +1201,30 @@
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "Pe0-8Bzg22ox"
-      },
-      "source": [
-        "Finally, run the training loop:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "a1zZmKmvOH85"
-      },
-      "outputs": [],
-      "source": [
-        "fit(train_dataset, test_dataset, steps=40000)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "oeq9sByu86-B"
-      },
-      "source": [
-        "If you want to share the TensorBoard results _publicly_, you can upload the logs to [TensorBoard.dev](https://tensorboard.dev/) by copying the following into a code-cell.\n",
-        "\n",
-        "Note: This requires a Google account.\n",
-        "\n",
-        "```\n",
-        "!tensorboard dev upload --logdir {log_dir}\n",
-        "```"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "l-kT7WHRKz-E"
+        "id": "fyjixlMlBybN"
       },
       "source": [
-        "Caution: This command does not terminate. It's designed to continuously upload the results of long-running experiments. Once your data is uploaded you need to stop it using the \"interrupt execution\" option in your notebook tool."
+        "You can view the [results of a previous run](https://tensorboard.dev/experiment/lZ0C6FONROaUMfjYkVyJqw) of this notebook on [TensorBoard.dev](https://tensorboard.dev/)."
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "-lGhS_LfwQoL"
+        "id": "Pe0-8Bzg22ox"
       },
       "source": [
-        "You can view the [results of a previous run](https://tensorboard.dev/experiment/lZ0C6FONROaUMfjYkVyJqw) of this notebook on [TensorBoard.dev](https://tensorboard.dev/).\n",
-        "\n",
-        "TensorBoard.dev is a managed experience for hosting, tracking, and sharing ML experiments with everyone.\n",
-        "\n",
-        "It can also included inline using an `<iframe>`:"
+        "Finally, run the training loop:"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "8IS4c93guQ8E"
+        "id": "a1zZmKmvOH85"
       },
       "outputs": [],
       "source": [
-        "display.IFrame(\n",
-        "    src=\"https://tensorboard.dev/experiment/lZ0C6FONROaUMfjYkVyJqw\",\n",
-        "    width=\"100%\",\n",
-        "    height=\"1000px\")"
+        "fit(train_dataset, test_dataset, steps=40000)"
       ]
     },
     {
@@ -1340,11 +1300,8 @@
   "metadata": {
     "accelerator": "GPU",
     "colab": {
-      "collapsed_sections": [],
       "name": "pix2pix.ipynb",
-      "private_outputs": true,
-      "provenance": [],
-      "toc_visible": true
+            "toc_visible": true
     },
     "kernelspec": {
       "display_name": "Python 3",
diff --git a/site/en/tutorials/generative/style_transfer.ipynb b/site/en/tutorials/generative/style_transfer.ipynb
index fca7d8a40d7..c8f1376624e 100644
--- a/site/en/tutorials/generative/style_transfer.ipynb
+++ b/site/en/tutorials/generative/style_transfer.ipynb
@@ -73,9 +73,9 @@
       "source": [
         "This tutorial uses deep learning to compose one image in the style of another image (ever wish you could paint like Picasso or Van Gogh?). This is known as *neural style transfer* and the technique is outlined in <a href=\"https://arxiv.org/abs/1508.06576\" class=\"external\">A Neural Algorithm of Artistic Style</a> (Gatys et al.). \n",
         "\n",
-        "Note: This tutorial demonstrates the original style-transfer algorithm. It optimizes the image content to a particular style. Modern approaches train a model to generate the stylized image directly (similar to [cyclegan](cyclegan.ipynb)). This approach is much faster (up to 1000x).\n",
+        "Note: This tutorial demonstrates the original style-transfer algorithm. It optimizes the image content to a particular style. Modern approaches train a model to generate the stylized image directly (similar to [CycleGAN](./cyclegan.ipynb)). This approach is much faster (up to 1000x).\n",
         "\n",
-        "For a simple application of style transfer check out this [tutorial](https://www.tensorflow.org/hub/tutorials/tf2_arbitrary_image_stylization) to learn more about how to use the pretrained [Arbitrary Image Stylization model](https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2) from [TensorFlow Hub](https://tfhub.dev) or how to use a style transfer model with [TensorFlow Lite](https://www.tensorflow.org/lite/models/style_transfer/overview). "
+        "For a simple application of style transfer with a pretrained model from [TensorFlow Hub](https://tfhub.dev), check out the [Fast style transfer for arbitrary styles](https://www.tensorflow.org/hub/tutorials/tf2_arbitrary_image_stylization) tutorial that uses an [arbitrary image stylization model](https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2). For an example of style transfer with [TensorFlow Lite](https://www.tensorflow.org/lite), refer to [Artistic style transfer with TensorFlow Lite](https://www.tensorflow.org/lite/examples/style_transfer/overview)."
       ]
     },
     {
@@ -89,6 +89,13 @@
         "This is implemented by optimizing the output image to match the content statistics of the content image and the style statistics of the style reference image. These statistics are extracted from the images using a convolutional network."
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "28W8ggyO1KER"
+      },
+      "source": []
+    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -104,7 +111,7 @@
         "<img src=\"https://storage.googleapis.com/download.tensorflow.org/example_images/Vassily_Kandinsky%2C_1913_-_Composition_7.jpg\" width=\"500px\"/>\n",
         "\n",
         "\n",
-        "Now how would it look like if Kandinsky decided to paint the picture of this Dog exclusively with this style? Something like this?\n",
+        "Now, what would it look like if Kandinsky decided to paint the picture of this Dog exclusively with this style? Something like this?\n",
         "\n",
         "<img src=\"https://tensorflow.org/tutorials/generative/images/stylized-image.png\" style=\"width: 500px;\"/>"
       ]
@@ -453,8 +460,8 @@
       "outputs": [],
       "source": [
         "def vgg_layers(layer_names):\n",
-        "  \"\"\" Creates a vgg model that returns a list of intermediate output values.\"\"\"\n",
-        "  # Load our model. Load pretrained VGG, trained on imagenet data\n",
+        "  \"\"\" Creates a VGG model that returns a list of intermediate output values.\"\"\"\n",
+        "  # Load our model. Load pretrained VGG, trained on ImageNet data\n",
         "  vgg = tf.keras.applications.VGG19(include_top=False, weights='imagenet')\n",
         "  vgg.trainable = False\n",
         "  \n",
@@ -694,7 +701,7 @@
         "id": "MBU5RFpcAo7W"
       },
       "source": [
-        "Create an optimizer. The paper recommends LBFGS, but `Adam` works okay, too:"
+        "Create an optimizer. The paper recommends LBFGS, but Adam works okay, too:"
       ]
     },
     {
@@ -705,7 +712,7 @@
       },
       "outputs": [],
       "source": [
-        "opt = tf.optimizers.Adam(learning_rate=0.02, beta_1=0.99, epsilon=1e-1)"
+        "opt = tf.keras.optimizers.Adam(learning_rate=0.02, beta_1=0.99, epsilon=1e-1)"
       ]
     },
     {
@@ -1029,7 +1036,7 @@
         "id": "lcLWBQChsutQ"
       },
       "source": [
-        "Reinitialize the optimization variable:"
+        "Reinitialize the image-variable and the optimizer:"
       ]
     },
     {
@@ -1040,6 +1047,7 @@
       },
       "outputs": [],
       "source": [
+        "opt = tf.keras.optimizers.Adam(learning_rate=0.02, beta_1=0.99, epsilon=1e-1)\n",
         "image = tf.Variable(content_image)"
       ]
     },
@@ -1102,10 +1110,9 @@
         "\n",
         "try:\n",
         "  from google.colab import files\n",
-        "except ImportError:\n",
-        "   pass\n",
-        "else:\n",
-        "  files.download(file_name)"
+        "  files.download(file_name)\n",
+        "except (ImportError, AttributeError):\n",
+        "  pass"
       ]
     },
     {
@@ -1125,7 +1132,7 @@
     "colab": {
       "collapsed_sections": [],
       "name": "style_transfer.ipynb",
-      "toc_visible": true
+            "toc_visible": true
     },
     "kernelspec": {
       "display_name": "Python 3",
diff --git a/site/en/tutorials/images/classification.ipynb b/site/en/tutorials/images/classification.ipynb
index 2cb13f317b3..f006b81d756 100644
--- a/site/en/tutorials/images/classification.ipynb
+++ b/site/en/tutorials/images/classification.ipynb
@@ -68,7 +68,8 @@
         "id": "gN7G9GFmVrVY"
       },
       "source": [
-        "This tutorial shows how to classify images of flowers. It creates an image classifier using a `tf.keras.Sequential` model, and loads data using `tf.keras.utils.image_dataset_from_directory`. You will gain practical experience with the following concepts:\n",
+        "This tutorial shows how to classify images of flowers using a `tf.keras.Sequential` model and load data using `tf.keras.utils.image_dataset_from_directory`. It demonstrates the following concepts:\n",
+        "\n",
         "\n",
         "* Efficiently loading a dataset off disk.\n",
         "* Identifying overfitting and applying techniques to mitigate it, including data augmentation and dropout.\n",
@@ -80,7 +81,9 @@
         "3. Build the model\n",
         "4. Train the model\n",
         "5. Test the model\n",
-        "6. Improve the model and repeat the process"
+        "6. Improve the model and repeat the process\n",
+        "\n",
+        "In addition, the notebook demonstrates how to convert a [saved model](../../../guide/saved_model.ipynb) to a [TensorFlow Lite](https://www.tensorflow.org/lite/) model for on-device machine learning on mobile, embedded, and IoT devices."
       ]
     },
     {
@@ -89,7 +92,9 @@
         "id": "zF9uvbXNVrVY"
       },
       "source": [
-        "## Import TensorFlow and other libraries"
+        "## Setup\n",
+        "\n",
+        "Import TensorFlow and other necessary libraries:"
       ]
     },
     {
@@ -102,7 +107,6 @@
       "source": [
         "import matplotlib.pyplot as plt\n",
         "import numpy as np\n",
-        "import os\n",
         "import PIL\n",
         "import tensorflow as tf\n",
         "\n",
@@ -147,9 +151,10 @@
       "outputs": [],
       "source": [
         "import pathlib\n",
+        "\n",
         "dataset_url = \"https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz\"\n",
-        "data_dir = tf.keras.utils.get_file('flower_photos', origin=dataset_url, untar=True)\n",
-        "data_dir = pathlib.Path(data_dir)"
+        "data_dir = tf.keras.utils.get_file(\"flower_photos.tgz\", origin=dataset_url, extract=True)\n",
+        "data_dir = pathlib.Path(data_dir).parent / \"flower_photos_extracted\" / \"flower_photos\""
       ]
     },
     {
@@ -243,9 +248,9 @@
         "id": "gIjgz7_JIo_m"
       },
       "source": [
-        "# Load data using a Keras utility\n",
+        "## Load data using a Keras utility\n",
         "\n",
-        "Let's load these images off disk using the helpful `tf.keras.utils.image_dataset_from_directory` utility. This will take you from a directory of images on disk to a `tf.data.Dataset` in just a couple lines of code. If you like, you can also write your own data loading code from scratch by visiting the [Load and preprocess images](../load_data/images.ipynb) tutorial."
+        "Next, load these images off disk using the helpful `tf.keras.utils.image_dataset_from_directory` utility. This will take you from a directory of images on disk to a `tf.data.Dataset` in just a couple lines of code. If you like, you can also write your own data loading code from scratch by visiting the [Load and preprocess images](../load_data/images.ipynb) tutorial."
       ]
     },
     {
@@ -254,7 +259,7 @@
         "id": "xyDNn9MbIzfT"
       },
       "source": [
-        "## Create a dataset"
+        "### Create a dataset"
       ]
     },
     {
@@ -285,7 +290,7 @@
         "id": "pFBhRrrEI49z"
       },
       "source": [
-        "It's good practice to use a validation split when developing your model. Let's use 80% of the images for training, and 20% for validation."
+        "It's good practice to use a validation split when developing your model. Use 80% of the images for training and 20% for validation."
       ]
     },
     {
@@ -379,7 +384,7 @@
         "id": "5M6BXtXFJdW0"
       },
       "source": [
-        "You will train a model using these datasets by passing them to `Model.fit` in a moment. If you like, you can also manually iterate over the dataset and retrieve batches of images:"
+        "You will pass these datasets to the Keras `Model.fit` method for training later in this tutorial. If you like, you can also manually iterate over the dataset and retrieve batches of images:"
       ]
     },
     {
@@ -415,7 +420,7 @@
       "source": [
         "## Configure the dataset for performance\n",
         "\n",
-        "Let's make sure to use buffered prefetching so you can yield data from disk without having I/O become blocking. These are two important methods you should use when loading data:\n",
+        "Make sure to use buffered prefetching, so you can yield data from disk without having I/O become blocking. These are two important methods you should use when loading data:\n",
         "\n",
         "- `Dataset.cache` keeps the images in memory after they're loaded off disk during the first epoch. This will ensure the dataset does not become a bottleneck while training your model. If your dataset is too large to fit into memory, you can also use this method to create a performant on-disk cache.\n",
         "- `Dataset.prefetch` overlaps data preprocessing and model execution while training.\n",
@@ -489,7 +494,7 @@
         "image_batch, labels_batch = next(iter(normalized_ds))\n",
         "first_image = image_batch[0]\n",
         "# Notice the pixel values are now in `[0,1]`.\n",
-        "print(np.min(first_image), np.max(first_image)) "
+        "print(np.min(first_image), np.max(first_image))"
       ]
     },
     {
@@ -498,7 +503,7 @@
         "id": "XWEOmRSBJ9J8"
       },
       "source": [
-        "Or, you can include the layer inside your model definition, which can simplify deployment. Let's use the second approach here."
+        "Or, you can include the layer inside your model definition, which can simplify deployment. Use the second approach here."
       ]
     },
     {
@@ -516,9 +521,11 @@
         "id": "WcUTyDOPKucd"
       },
       "source": [
-        "# Create the model\n",
+        "## A basic Keras model\n",
+        "\n",
+        "### Create the model\n",
         "\n",
-        "The [Sequential](../../guide/keras/sequential_model.ipynb) model consists of three convolution blocks (`tf.keras.layers.Conv2D`) with a max pooling layer (`tf.keras.layers.MaxPooling2D`) in each of them. There's a fully-connected layer (`tf.keras.layers.Dense`) with 128 units on top of it that is activated by a ReLU activation function (`'relu'`). This model has not been tuned for high accuracy—the goal of this tutorial is to show a standard approach."
+        "The Keras [Sequential](https://www.tensorflow.org/guide/keras/sequential_model) model consists of three convolution blocks (`tf.keras.layers.Conv2D`) with a max pooling layer (`tf.keras.layers.MaxPooling2D`) in each of them. There's a fully-connected layer (`tf.keras.layers.Dense`) with 128 units on top of it that is activated by a ReLU activation function (`'relu'`). This model has not been tuned for high accuracy; the goal of this tutorial is to show a standard approach."
       ]
     },
     {
@@ -529,7 +536,7 @@
       },
       "outputs": [],
       "source": [
-        "num_classes = 5\n",
+        "num_classes = len(class_names)\n",
         "\n",
         "model = Sequential([\n",
         "  layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)),\n",
@@ -551,7 +558,7 @@
         "id": "EaKFzz72Lqpg"
       },
       "source": [
-        "## Compile the model\n",
+        "### Compile the model\n",
         "\n",
         "For this tutorial, choose the `tf.keras.optimizers.Adam` optimizer and `tf.keras.losses.SparseCategoricalCrossentropy` loss function. To view training and validation accuracy for each training epoch, pass the `metrics` argument to `Model.compile`."
       ]
@@ -575,9 +582,9 @@
         "id": "aMJ4DnuJL55A"
       },
       "source": [
-        "## Model summary\n",
+        "### Model summary\n",
         "\n",
-        "View all the layers of the network using the model's `Model.summary` method:"
+        "View all the layers of the network using the Keras `Model.summary` method:"
       ]
     },
     {
@@ -597,7 +604,16 @@
         "id": "NiYHcbvaL9H-"
       },
       "source": [
-        "## Train the model"
+        "### Train the model"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "j30F69T4sIVN"
+      },
+      "source": [
+        "Train the model for 10 epochs with the Keras `Model.fit` method:"
       ]
     },
     {
@@ -631,7 +647,7 @@
         "id": "dFvOvmAmMK9w"
       },
       "source": [
-        "Create plots of loss and accuracy on the training and validation sets:"
+        "Create plots of the loss and accuracy on the training and validation sets:"
       ]
     },
     {
@@ -673,7 +689,7 @@
       "source": [
         "The plots show that training accuracy and validation accuracy are off by large margins, and the model has achieved only around 60% accuracy on the validation set.\n",
         "\n",
-        "Let's inspect what went wrong and try to increase the overall performance of the model."
+        "The following tutorial sections show how to inspect what went wrong and try to increase the overall performance of the model."
       ]
     },
     {
@@ -695,7 +711,7 @@
         "\n",
         "When there are a small number of training examples, the model sometimes learns from noises or unwanted details from training examples—to an extent that it negatively impacts the performance of the model on new examples. This phenomenon is known as overfitting. It means that the model will have a difficult time generalizing on a new dataset.\n",
         "\n",
-        "There are multiple ways to fight overfitting in the training process. In this tutorial, you'll use *data augmentation* and add *Dropout* to your model."
+        "There are multiple ways to fight overfitting in the training process. In this tutorial, you'll use *data augmentation* and add *dropout* to your model."
       ]
     },
     {
@@ -744,7 +760,7 @@
         "id": "PN4k1dK3S6eV"
       },
       "source": [
-        "Let's visualize what a few augmented examples look like by applying data augmentation to the same image several times:"
+        "Visualize a few augmented examples by applying data augmentation to the same image several times:"
       ]
     },
     {
@@ -770,7 +786,7 @@
         "id": "tsjXCBLYYNs5"
       },
       "source": [
-        "You will use data augmentation to train a model in a moment."
+        "You will add data augmentation to your model before training in the next step."
       ]
     },
     {
@@ -785,7 +801,7 @@
         "\n",
         "When you apply dropout to a layer, it randomly drops out (by setting the activation to zero) a number of output units from the layer during the training process. Dropout takes a fractional number as its input value, in the form such as 0.1, 0.2, 0.4, etc. This means dropping out 10%, 20% or 40% of the output units randomly from the applied layer.\n",
         "\n",
-        "Let's create a new neural network with `tf.keras.layers.Dropout` before training it using the augmented images:"
+        "Create a new neural network with `tf.keras.layers.Dropout` before training it using the augmented images:"
       ]
     },
     {
@@ -808,7 +824,7 @@
         "  layers.Dropout(0.2),\n",
         "  layers.Flatten(),\n",
         "  layers.Dense(128, activation='relu'),\n",
-        "  layers.Dense(num_classes)\n",
+        "  layers.Dense(num_classes, name=\"outputs\")\n",
         "])"
       ]
     },
@@ -918,7 +934,7 @@
         "id": "10buWpJbcCQz"
       },
       "source": [
-        "Finally, let's use our model to classify an image that wasn't included in the training or validation sets."
+        "Use your model to classify an image that wasn't included in the training or validation sets."
       ]
     },
     {
@@ -955,6 +971,205 @@
         "    .format(class_names[np.argmax(score)], 100 * np.max(score))\n",
         ")"
       ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "aOc3PZ2N2r18"
+      },
+      "source": [
+        "## Use TensorFlow Lite\n",
+        "\n",
+        "TensorFlow Lite is a set of tools that enables on-device machine learning by helping developers run their models on mobile, embedded, and edge devices."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "cThu25rh4LPP"
+      },
+      "source": [
+        "### Convert the Keras Sequential model to a TensorFlow Lite model\n",
+        "\n",
+        "To use the trained model with on-device applications, first [convert it](https://www.tensorflow.org/lite/models/convert) to a smaller and more efficient model format called a [TensorFlow Lite](https://www.tensorflow.org/lite/) model.\n",
+        "\n",
+        "In this example, take the trained Keras Sequential model and use `tf.lite.TFLiteConverter.from_keras_model` to generate a [TensorFlow Lite](https://www.tensorflow.org/lite/) model:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "mXo6ftuL2ufx"
+      },
+      "outputs": [],
+      "source": [
+        "# Convert the model.\n",
+        "converter = tf.lite.TFLiteConverter.from_keras_model(model)\n",
+        "tflite_model = converter.convert()\n",
+        "\n",
+        "# Save the model.\n",
+        "with open('model.tflite', 'wb') as f:\n",
+        "  f.write(tflite_model)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4R26OU4gGKhh"
+      },
+      "source": [
+        "The TensorFlow Lite model you saved in the previous step can contain several function signatures. The Keras model converter API uses the default signature automatically. Learn more about [TensorFlow Lite signatures](https://www.tensorflow.org/lite/guide/signatures)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "7fjQfXaV2l-5"
+      },
+      "source": [
+        "### Run the TensorFlow Lite model\n",
+        "\n",
+        "You can access the TensorFlow Lite saved model signatures in Python via the `tf.lite.Interpreter` class.\n",
+        "\n",
+        "Load the model with the `Interpreter`:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "cHYcip_FOaHq"
+      },
+      "outputs": [],
+      "source": [
+        "TF_MODEL_FILE_PATH = 'model.tflite' # The default path to the saved TensorFlow Lite model\n",
+        "\n",
+        "interpreter = tf.lite.Interpreter(model_path=TF_MODEL_FILE_PATH)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nPUXY6BdHDHo"
+      },
+      "source": [
+        "Print the signatures from the converted model to obtain the names of the inputs (and outputs):\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ZdDl00E2OaHq"
+      },
+      "outputs": [],
+      "source": [
+        "interpreter.get_signature_list()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4eVFqT0je3YG"
+      },
+      "source": [
+        "In this example, you have one default signature called `serving_default`. In addition, the name of the `'inputs'` is `'sequential_1_input'`, while the `'outputs'` are called `'outputs'`. You can look up these first and last Keras layer names when running `Model.summary`, as demonstrated earlier in this tutorial.\n",
+        "\n",
+        "Now you can test the loaded TensorFlow Model by performing inference on a sample image with `tf.lite.Interpreter.get_signature_runner` by passing the signature name as follows:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "yFoT_7W_OaHq"
+      },
+      "outputs": [],
+      "source": [
+        "classify_lite = interpreter.get_signature_runner('serving_default')\n",
+        "print(classify_lite.get_input_details())\n",
+        "print(classify_lite.get_output_details())"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "b1mfRcBOnEx0"
+      },
+      "source": [
+        "Similar to what you did earlier in the tutorial, you can use the TensorFlow Lite model to classify images that weren't included in the training or validation sets.\n",
+        "\n",
+        "You have already tensorized that image and saved it as `img_array`. Now, pass it to the first argument (the name of the `'inputs'`) of the loaded TensorFlow Lite model (`predictions_lite`), compute softmax activations, and then print the prediction for the class with the highest computed probability."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "sEqR27YcnFvc"
+      },
+      "outputs": [],
+      "source": [
+        "predictions_lite = classify_lite(keras_tensor_15=img_array)['output_0']\n",
+        "score_lite = tf.nn.softmax(predictions_lite)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ZKP_GFeKUWb5"
+      },
+      "outputs": [],
+      "source": [
+        "print(\n",
+        "    \"This image most likely belongs to {} with a {:.2f} percent confidence.\"\n",
+        "    .format(class_names[np.argmax(score_lite)], 100 * np.max(score_lite))\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Poz_iYgeUg_U"
+      },
+      "source": [
+        "The prediction generated by the lite model should be almost identical to the predictions generated by the original model:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "InXXDJL8UYC1"
+      },
+      "outputs": [],
+      "source": [
+        "print(np.max(np.abs(predictions - predictions_lite)))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5hJzY8XijM7N"
+      },
+      "source": [
+        "Of the five classes—`'daisy'`, `'dandelion'`, `'roses'`, `'sunflowers'`, and `'tulips'`—the model should predict the image belongs to sunflowers, which is the same result as before the TensorFlow Lite conversion.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "1RlfCY9v2_ir"
+      },
+      "source": [
+        "## Next steps\n",
+        "\n",
+        "This tutorial showed how to train a model for image classification, test it, convert it to the TensorFlow Lite format for on-device applications (such as an image classification app), and perform inference with the TensorFlow Lite model with the Python API.\n",
+        "\n",
+        "You can learn more about TensorFlow Lite through [tutorials](https://www.tensorflow.org/lite/tutorials) and [guides](https://www.tensorflow.org/lite/guide)."
+      ]
     }
   ],
   "metadata": {
diff --git a/site/en/tutorials/images/data_augmentation.ipynb b/site/en/tutorials/images/data_augmentation.ipynb
index 5ee06bf6c89..8a1eaaabec4 100644
--- a/site/en/tutorials/images/data_augmentation.ipynb
+++ b/site/en/tutorials/images/data_augmentation.ipynb
@@ -318,7 +318,7 @@
       "outputs": [],
       "source": [
         "# Add the image to a batch.\n",
-        "image = tf.expand_dims(image, 0)"
+        "image = tf.cast(tf.expand_dims(image, 0), tf.float32)"
       ]
     },
     {
@@ -529,7 +529,7 @@
         "\n",
         "For completeness, you will now train a model using the datasets you have just prepared.\n",
         "\n",
-        "The [Sequential](../../guide/keras/sequential_model.ipynb) model consists of three convolution blocks (`tf.keras.layers.Conv2D`) with a max pooling layer (`tf.keras.layers.MaxPooling2D`) in each of them. There's a fully-connected layer (`tf.keras.layers.Dense`) with 128 units on top of it that is activated by a ReLU activation function (`'relu'`). This model has not been tuned for accuracy (the goal is to show you the mechanics)."
+        "The [Sequential](https://www.tensorflow.org/guide/keras/sequential_model) model consists of three convolution blocks (`tf.keras.layers.Conv2D`) with a max pooling layer (`tf.keras.layers.MaxPooling2D`) in each of them. There's a fully-connected layer (`tf.keras.layers.Dense`) with 128 units on top of it that is activated by a ReLU activation function (`'relu'`). This model has not been tuned for accuracy (the goal is to show you the mechanics)."
       ]
     },
     {
@@ -625,7 +625,7 @@
         "This section of the tutorial shows two ways of doing so:\n",
         "\n",
         "- First, you will create a `tf.keras.layers.Lambda` layer. This is a good way to write concise code.\n",
-        "- Next, you will write a new layer via [subclassing](../..guide/keras/custom_layers_and_models.ipynb), which gives you more control.\n",
+        "- Next, you will write a new layer via [subclassing](https://www.tensorflow.org/guide/keras/custom_layers_and_models), which gives you more control.\n",
         "\n",
         "Both layers will randomly invert the colors in an image, according to some probability."
       ]
@@ -682,7 +682,7 @@
         "id": "Xd9XG2PLM5ZJ"
       },
       "source": [
-        "Next, implement a custom layer by [subclassing](../../guide/keras/custom_layers_and_models.ipynb):"
+        "Next, implement a custom layer by [subclassing](https://www.tensorflow.org/guide/keras/custom_layers_and_models):"
       ]
     },
     {
@@ -937,7 +937,7 @@
       "outputs": [],
       "source": [
         "cropped = tf.image.central_crop(image, central_fraction=0.5)\n",
-        "visualize(image,cropped)"
+        "visualize(image, cropped)"
       ]
     },
     {
@@ -989,7 +989,7 @@
         "Note: `seed` is a `Tensor` of shape `(2,)`  whose values are any integers.\n",
         "\n",
         "In the following sections, you will:\n",
-        "1.   Go over examples of using random image operations to transform an image; and\n",
+        "1.   Go over examples of using random image operations to transform an image.\n",
         "2.   Demonstrate how to apply random transformations to a training dataset."
       ]
     },
@@ -1144,7 +1144,7 @@
         "  image, label = resize_and_rescale(image, label)\n",
         "  image = tf.image.resize_with_crop_or_pad(image, IMG_SIZE + 6, IMG_SIZE + 6)\n",
         "  # Make a new seed.\n",
-        "  new_seed = tf.random.experimental.stateless_split(seed, num=1)[0, :]\n",
+        "  new_seed = tf.random.split(seed, num=1)[0, :]\n",
         "  # Random crop back to the original size.\n",
         "  image = tf.image.stateless_random_crop(\n",
         "      image, size=[IMG_SIZE, IMG_SIZE, 3], seed=seed)\n",
@@ -1174,7 +1174,7 @@
       },
       "outputs": [],
       "source": [
-        "# Create a `Counter` object and `Dataset.zip` it together with the trainining set.\n",
+        "# Create a `Counter` object and `Dataset.zip` it together with the training set.\n",
         "counter = tf.data.experimental.Counter()\n",
         "train_ds = tf.data.Dataset.zip((train_datasets, (counter, counter)))"
       ]
@@ -1273,7 +1273,7 @@
       "source": [
         "# Create a wrapper function for updating seeds.\n",
         "def f(x, y):\n",
-        "  seed = rng.make_seeds(2)[0]\n",
+        "  seed = rng.make_seeds(1)[:, 0]\n",
         "  image, label = augment((x, y), seed)\n",
         "  return image, label"
       ]
@@ -1357,7 +1357,7 @@
         "\n",
         "- To learn how to include preprocessing layers inside your model, refer to the [Image classification](classification.ipynb) tutorial.\n",
         "- You may also be interested in learning how preprocessing layers can help you classify text, as shown in the [Basic text classification](../keras/text_classification.ipynb) tutorial.\n",
-        "- You can learn more about `tf.data` in this [guide](../..guide/data.ipynb), and you can learn how to configure your input pipelines for performance [here](../..guide/data_performance.ipynb)."
+        "- You can learn more about `tf.data` in this [guide](../../guide/data.ipynb), and you can learn how to configure your input pipelines for performance [here](../../guide/data_performance.ipynb)."
       ]
     }
   ],
diff --git a/site/en/tutorials/images/index.md b/site/en/tutorials/images/index.md
new file mode 100644
index 00000000000..d1bf3e01839
--- /dev/null
+++ b/site/en/tutorials/images/index.md
@@ -0,0 +1,113 @@
+# Computer vision with TensorFlow
+
+TensorFlow provides a number of computer vision (CV) and image classification
+tools. This document introduces some of these tools and provides an overview of
+resources to help you get started with common CV tasks.
+
+## Vision libraries and tools
+
+TensorFlow provides CV tools through the higher-level Keras libraries and the
+lower-level `tf.image` module. For most use cases, the Keras libraries
+will be more convenient than the built-in TensorFlow alternatives.
+But if the Keras options don't fit your use case, or you want lower-level
+control over image preprocessing, you might need the lower-level TensorFlow
+tools.
+
+### KerasCV
+
+If you're just getting started with a CV project, and you're not sure which
+libraries and tools you'll need, [KerasCV](https://keras.io/keras_cv/) is a good
+place to start. KerasCV is a library of modular CV components built on Keras
+Core. KerasCV includes models, layers, metrics, callbacks, and other tools that
+extend the high-level Keras API for CV tasks. The KerasCV APIs can help with
+data augmentation, classification, object detection, segmentation,
+image generation, and other common CV workflows. You can use KerasCV to quickly
+assemble production-grade, state-of-the-art training and inference pipelines.
+
+### Keras utilities
+
+`tf.keras.utils` provides several high-level image preprocessing utilities. For
+example, `tf.keras.utils.image_dataset_from_directory` generates a
+`tf.data.Dataset` from a directory of images on disk.
+
+### `tf.image`
+
+If KerasCV doesn't fit your use case, you can use `tf.image` and `tf.data` to
+write your own data augmentation pipelines or layers.
+
+The `tf.image` module contains various functions for image processing, such as
+`tf.image.flip_left_right`, `tf.image.rgb_to_grayscale`,
+`tf.image.adjust_brightness`, `tf.image.central_crop`, and
+`tf.image.stateless_random*`.
+
+The `tf.data` API enables you to build complex input pipelines from simple,
+reusable pieces.
+
+### TensorFlow Datasets
+
+[TensorFlow Datasets](https://www.tensorflow.org/datasets) is a collection of
+datasets ready to use with TensorFlow. Many of the datasets (for example,
+[MNIST](https://www.tensorflow.org/datasets/catalog/mnist),
+[Fashion-MNIST](https://www.tensorflow.org/datasets/catalog/fashion_mnist), and
+[TF Flowers](https://www.tensorflow.org/datasets/catalog/tf_flowers)) can be
+used to develop and test computer vision algorithms.
+
+## Where to start
+
+The following resources will help you get up and running with TensorFlow and
+Keras CV tools.
+
+* [KerasCV](https://keras.io/keras_cv/): Documentation and resources for
+  KerasCV.
+* [KerasCV developer guides](https://keras.io/guides/keras_cv/): Guides to
+  performing common CV tasks using KerasCV. If you're new to KerasCV,
+  [Classification with KerasCV](https://keras.io/guides/keras_cv/classification_with_keras_cv/)
+  is a good place to start.
+* [TensorFlow tutorials](https://www.tensorflow.org/tutorials): The core
+  TensorFlow documentation (this guide) includes a number of CV and image
+  processing tutorials.
+  * [Basic classification: Classify images of clothing](https://www.tensorflow.org/tutorials/keras/classification):
+    Train a neural network model to classify images of clothing, like sneakers
+    and shirts.
+  * [Load and preprocess images](https://www.tensorflow.org/tutorials/load_data/images):
+    Load and preprocess an image dataset in three ways:
+
+      1. Use high-level Keras preprocessing utilities to read a directory of
+         images on disk.
+      2. Write your own input pipeline from scratch
+         [using `tf.data`](https://www.tensorflow.org/guide/data).
+      3. Download a dataset from the large
+         [catalog](https://www.tensorflow.org/datasets/catalog/overview)
+         available in
+         [TensorFlow Datasets](https://www.tensorflow.org/datasets).
+  
+  * [Load video data](https://www.tensorflow.org/tutorials/load_data/video):
+    Load and preprocess AVI video data using the
+    [UCF101 human action dataset](https://www.tensorflow.org/datasets/catalog/ucf101).
+  * [Convolutional Neural Network (CNN)](https://www.tensorflow.org/tutorials/images/cnn):
+    Train a simple [Convolutional Neural Network](https://developers.google.com/machine-learning/glossary/#convolutional_neural_network)
+    (CNN) to classify
+    [CIFAR images](https://www.cs.toronto.edu/~kriz/cifar.html)
+    using the
+    [Keras API](https://www.tensorflow.org/guide/keras/overview).
+  * [Image classification](https://www.tensorflow.org/tutorials/images/classification):
+    Classify images of flowers using a `tf.keras.Sequential` model and load data
+    using `tf.keras.utils.image_dataset_from_directory`.
+  * [Transfer learning and fine-tuning](https://www.tensorflow.org/tutorials/images/transfer_learning):
+    Classify images of cats and dogs by using transfer learning from a
+    pre-trained network.
+  * [Data augmentation](https://www.tensorflow.org/tutorials/images/data_augmentation):
+    Increase the diversity of your training set by applying random (but
+    realistic) transformations, such as image rotation.
+  * [Image segmentation](https://www.tensorflow.org/tutorials/images/segmentation):
+    Perform image segmentation, using a modified
+    [U-Net](https://lmb.informatik.uni-freiburg.de/people/ronneber/u-net/){: .external}.
+  * [Video classification with a 3D convolutional neural network](https://www.tensorflow.org/tutorials/video/video_classification):
+    Train a 3D convolutional neural network (CNN) for video classification using
+    the [UCF101](https://www.crcv.ucf.edu/data/UCF101.php){: .external} action
+    recognition dataset.
+  * [Transfer learning for video classification with MoViNet](https://www.tensorflow.org/tutorials/video/transfer_learning_with_movinet):
+    Use a pre-trained MoViNet model and the
+    [UCF101 dataset](https://www.crcv.ucf.edu/data/UCF101.php){: .external} to
+    classify videos for an action recognition task.
+
diff --git a/site/en/tutorials/images/segmentation.ipynb b/site/en/tutorials/images/segmentation.ipynb
index c6ec5716442..285ef538664 100644
--- a/site/en/tutorials/images/segmentation.ipynb
+++ b/site/en/tutorials/images/segmentation.ipynb
@@ -80,13 +80,13 @@
         "\n",
         "## What is image segmentation?\n",
         "\n",
-        "In an image classification task the network assigns a label (or class) to each input image. However, suppose you want to know the shape of that object, which pixel belongs to which object, etc. In this case you will want to assign a class to each pixel of the image. This task is known as segmentation. A segmentation model returns much more detailed intofmation about the image. Image segmentation has many applications in medical imaging, self-driving cars and satellite imaging to name a few.\n",
+        "In an image classification task, the network assigns a label (or class) to each input image. However, suppose you want to know the shape of that object, which pixel belongs to which object, etc. In this case, you need to assign a class to each pixel of the image—this task is known as segmentation. A segmentation model returns much more detailed information about the image. Image segmentation has many applications in medical imaging, self-driving cars and satellite imaging, just to name a few.\n",
         "\n",
-        "This tutorial uses the [Oxford-IIIT Pet Dataset](https://www.robots.ox.ac.uk/~vgg/data/pets/), created by Parkhi *et al*. The dataset consists of images of 37 pet breeds, with 200 images per breed (~100 each in the train and test split). Each image includes the corresponding labels, and pixel-wise masks. The masks are class-labels for each pixel. Each pixel is given one of three categories :\n",
+        "This tutorial uses the [Oxford-IIIT Pet Dataset](https://www.robots.ox.ac.uk/~vgg/data/pets/) ([Parkhi et al, 2012](https://www.robots.ox.ac.uk/~vgg/publications/2012/parkhi12a/parkhi12a.pdf)). The dataset consists of images of 37 pet breeds, with 200 images per breed (~100 each in the training and test splits). Each image includes the corresponding labels, and pixel-wise masks. The masks are class-labels for each pixel. Each pixel is given one of three categories:\n",
         "\n",
-        "*   Class 1 : Pixel belonging to the pet.\n",
-        "*   Class 2 : Pixel bordering the pet.\n",
-        "*   Class 3 : None of the above/ Surrounding pixel."
+        "- Class 1: Pixel belonging to the pet.\n",
+        "- Class 2: Pixel bordering the pet.\n",
+        "- Class 3: None of the above/a surrounding pixel."
       ]
     },
     {
@@ -97,7 +97,10 @@
       },
       "outputs": [],
       "source": [
-        "!pip install git+https://github.com/tensorflow/examples.git"
+        "!pip install git+https://github.com/tensorflow/examples.git\n",
+        "!pip install -U keras\n",
+        "!pip install -q tensorflow_datasets\n",
+        "!pip install -q -U tensorflow-text tensorflow"
       ]
     },
     {
@@ -108,9 +111,9 @@
       },
       "outputs": [],
       "source": [
-        "import tensorflow as tf\n",
-        "from tensorflow.keras.layers.experimental import preprocessing \n",
+        "import numpy as np\n",
         "\n",
+        "import tensorflow as tf\n",
         "import tensorflow_datasets as tfds"
       ]
     },
@@ -156,7 +159,7 @@
         "id": "rJcVdj_U4vzf"
       },
       "source": [
-        " In addition, the image color values are normalized to the `[0,1]` range. Finally, as mentioned above the pixels in the segmentation mask are labeled either {1, 2, 3}. For the sake of convenience, subtract 1 from the segmentation mask, resulting in labels that are : {0, 1, 2}."
+        " In addition, the image color values are normalized to the `[0, 1]` range. Finally, as mentioned above the pixels in the segmentation mask are labeled either {1, 2, 3}. For the sake of convenience, subtract 1 from the segmentation mask, resulting in labels that are : {0, 1, 2}."
       ]
     },
     {
@@ -183,7 +186,11 @@
       "source": [
         "def load_image(datapoint):\n",
         "  input_image = tf.image.resize(datapoint['image'], (128, 128))\n",
-        "  input_mask = tf.image.resize(datapoint['segmentation_mask'], (128, 128))\n",
+        "  input_mask = tf.image.resize(\n",
+        "    datapoint['segmentation_mask'],\n",
+        "    (128, 128),\n",
+        "    method = tf.image.ResizeMethod.NEAREST_NEIGHBOR,\n",
+        "  )\n",
         "\n",
         "  input_image, input_mask = normalize(input_image, input_mask)\n",
         "\n",
@@ -196,7 +203,7 @@
         "id": "65-qHTjX5VZh"
       },
       "source": [
-        "The dataset already contains the required splits of test and train and so continue to use the same split."
+        "The dataset already contains the required training and test splits, so continue to use the same splits:"
       ]
     },
     {
@@ -232,7 +239,7 @@
       },
       "source": [
         "The following class performs a simple augmentation by randomly-flipping an image.\n",
-        "See the [image augmentation tutorial](https://www.tensorflow.org/tutorials/images/data_augmentation) for more on image augmentation.\n"
+        "Go to the [Image augmentation](data_augmentation.ipynb) tutorial to learn more.\n"
       ]
     },
     {
@@ -247,9 +254,9 @@
         "  def __init__(self, seed=42):\n",
         "    super().__init__()\n",
         "    # both use the same seed, so they'll make the same random changes.\n",
-        "    self.augment_inputs = preprocessing.RandomFlip(mode=\"horizontal\", seed=seed)\n",
-        "    self.augment_labels = preprocessing.RandomFlip(mode=\"horizontal\", seed=seed)\n",
-        "  \n",
+        "    self.augment_inputs = tf.keras.layers.RandomFlip(mode=\"horizontal\", seed=seed)\n",
+        "    self.augment_labels = tf.keras.layers.RandomFlip(mode=\"horizontal\", seed=seed)\n",
+        "\n",
         "  def call(self, inputs, labels):\n",
         "    inputs = self.augment_inputs(inputs)\n",
         "    labels = self.augment_labels(labels)\n",
@@ -262,7 +269,7 @@
         "id": "xTIbNIBdcgL3"
       },
       "source": [
-        "Build the input pipeline, applying the Augmentation after batching the inputs."
+        "Build the input pipeline, applying the augmentation after batching the inputs:"
       ]
     },
     {
@@ -291,7 +298,7 @@
         "id": "Xa3gMAE_9qNa"
       },
       "source": [
-        "Take a look at an image example and it's correponding mask from the dataset."
+        "Visualize an image example and its corresponding mask from the dataset:"
       ]
     },
     {
@@ -310,7 +317,7 @@
         "  for i in range(len(display_list)):\n",
         "    plt.subplot(1, len(display_list), i+1)\n",
         "    plt.title(title[i])\n",
-        "    plt.imshow(tf.keras.preprocessing.image.array_to_img(display_list[i]))\n",
+        "    plt.imshow(tf.keras.utils.array_to_img(display_list[i]))\n",
         "    plt.axis('off')\n",
         "  plt.show()"
       ]
@@ -335,7 +342,7 @@
       },
       "source": [
         "## Define the model\n",
-        "The model being used here is a modified [U-Net](https://arxiv.org/abs/1505.04597). A U-Net consists of an encoder (downsampler) and decoder (upsampler). In-order to learn robust features and reduce the number of trainable parameters, you will use a pretrained model - MobileNetV2 - as the encoder. For the decoder, you will use the upsample block, which is already implemented in the [Pix2pix tutorial](https://github.com/tensorflow/examples/blob/master/tensorflow_examples/models/pix2pix/pix2pix.py) tutorial in the TensorFlow Examples repo.\n"
+        "The model being used here is a modified [U-Net](https://arxiv.org/abs/1505.04597). A U-Net consists of an encoder (downsampler) and decoder (upsampler). To learn robust features and reduce the number of trainable parameters, use a pretrained model—[MobileNetV2](https://arxiv.org/abs/1801.04381)—as the encoder. For the decoder, you will use the upsample block, which is already implemented in the [pix2pix](https://github.com/tensorflow/examples/blob/master/tensorflow_examples/models/pix2pix/pix2pix.py) example in the TensorFlow Examples repo. (Check out the [pix2pix: Image-to-image translation with a conditional GAN](../generative/pix2pix.ipynb) tutorial in a notebook.)\n"
       ]
     },
     {
@@ -344,7 +351,7 @@
         "id": "W4mQle3lthit"
       },
       "source": [
-        "As mentioned, the encoder will be a pretrained MobileNetV2 model which is prepared and ready to use in `tf.keras.applications`. The encoder consists of specific outputs from intermediate layers in the model. Note that the encoder will not be trained during the training process."
+        "As mentioned, the encoder is a pretrained MobileNetV2 model. You will use the model from `tf.keras.applications`. The encoder consists of specific outputs from intermediate layers in the model. Note that the encoder will not be trained during the training process."
       ]
     },
     {
@@ -379,7 +386,7 @@
         "id": "KPw8Lzra5_T9"
       },
       "source": [
-        "The decoder/upsampler is simply a series of upsample blocks implemented in TensorFlow examples."
+        "The decoder/upsampler is simply a series of upsample blocks implemented in TensorFlow examples:"
       ]
     },
     {
@@ -436,7 +443,7 @@
         "id": "LRsjdZuEnZfA"
       },
       "source": [
-        "Note that on the number of filters on the last layer is set to the number of `output_channels`. This will be one output channel per class."
+        "Note that the number of filters on the last layer is set to the number of `output_channels`. This will be one output channel per class."
       ]
     },
     {
@@ -447,9 +454,9 @@
       "source": [
         "## Train the model\n",
         "\n",
-        "Now, all that is left to do is to compile and train the model. \n",
+        "Now, all that is left to do is to compile and train the model.\n",
         "\n",
-        "SInce this is a multiclass classification problem a `CategoricalCrossentropy` with `from_logits=True` is the standard loss function. Use `losses.SparseCategoricalCrossentropy(from_logits=True)` since the labels are scalar integers instead of vectors of scores for each pixel of every class. \n",
+        "Since this is a multiclass classification problem, use the `tf.keras.losses.SparseCategoricalCrossentropy` loss function with the `from_logits` argument set to `True`, since the labels are scalar integers instead of vectors of scores for each pixel of every class.\n",
         "\n",
         "When running inference, the label assigned to the pixel is the channel with the highest value. This is what the `create_mask` function is doing."
       ]
@@ -476,7 +483,7 @@
         "id": "xVMzbIZLcyEF"
       },
       "source": [
-        "Have a quick look at the resulting model architecture:"
+        "Plot the resulting model architecture:"
       ]
     },
     {
@@ -487,7 +494,7 @@
       },
       "outputs": [],
       "source": [
-        "tf.keras.utils.plot_model(model, show_shapes=True)"
+        "tf.keras.utils.plot_model(model, show_shapes=True, expand_nested=True, dpi=64)"
       ]
     },
     {
@@ -496,7 +503,7 @@
         "id": "Tc3MiEO2twLS"
       },
       "source": [
-        "Try out the model to see what it predicts before training."
+        "Try out the model to check what it predicts before training:"
       ]
     },
     {
@@ -508,7 +515,7 @@
       "outputs": [],
       "source": [
         "def create_mask(pred_mask):\n",
-        "  pred_mask = tf.argmax(pred_mask, axis=-1)\n",
+        "  pred_mask = tf.math.argmax(pred_mask, axis=-1)\n",
         "  pred_mask = pred_mask[..., tf.newaxis]\n",
         "  return pred_mask[0]"
       ]
@@ -548,7 +555,7 @@
         "id": "22AyVYWQdkgk"
       },
       "source": [
-        "The callback defined below is used to observe how the model improves while it is training."
+        "The callback defined below is used to observe how the model improves while it is training:"
       ]
     },
     {
@@ -622,7 +629,7 @@
         "id": "7BVXldSo-0mW"
       },
       "source": [
-        "Now make some predictions. In the interest of saving time, the number of epochs was kept small, but you may set this higher to achieve more accurate results."
+        "Now, make some predictions. In the interest of saving time, the number of epochs was kept small, but you may set this higher to achieve more accurate results."
       ]
     },
     {
@@ -651,9 +658,9 @@
         "id": "eqtFPqqu2kxP"
       },
       "source": [
-        "Semantic segmentation datasets can be highly imbalanced meaning that particular class pixels can be present more inside images than that of other classes. Since segmentation problems can be treated as per-pixel classification problems, you can deal with the imbalance problem by weighing the loss function to account for this. It's a simple and elegant way to deal with this problem. See the [imbalanced classes tutorial](https://www.tensorflow.org/tutorials/structured_data/imbalanced_data).\n",
+        "Semantic segmentation datasets can be highly imbalanced meaning that particular class pixels can be present more inside images than that of other classes. Since segmentation problems can be treated as per-pixel classification problems, you can deal with the imbalance problem by weighing the loss function to account for this. It's a simple and elegant way to deal with this problem. Refer to the [Classification on imbalanced data](../structured_data/imbalanced_data.ipynb) tutorial to learn more.\n",
         "\n",
-        "To [avoid ambiguity](https://github.com/keras-team/keras/issues/3653#issuecomment-243939748), `Model.fit` does not support the `class_weight` argument for inputs with 3+ dimensions."
+        "To [avoid ambiguity](https://github.com/keras-team/keras/issues/3653#issuecomment-243939748), `Model.fit` does not support the `class_weight` argument for targets with 3+ dimensions."
       ]
     },
     {
@@ -679,9 +686,9 @@
         "id": "brbhYODCsvbe"
       },
       "source": [
-        "So in this case you need to implement the weighting yourself. You'll do this using sample weights: In addition to `(data, label)` pairs, `Model.fit` also accepts `(data, label, sample_weight)` triples.\n",
+        "So, in this case you need to implement the weighting yourself. You'll do this using sample weights: In addition to `(data, label)` pairs, `Model.fit` also accepts `(data, label, sample_weight)` triples.\n",
         "\n",
-        "`Model.fit` propagates the `sample_weight` to the losses and metrics, which also accept a `sample_weight` argument. The sample weight is multiplied by the sample's value before the reduction step. For example:"
+        "Keras `Model.fit` propagates the `sample_weight` to the losses and metrics, which also accept a `sample_weight` argument. The sample weight is multiplied by the sample's value before the reduction step. For example:"
       ]
     },
     {
@@ -692,12 +699,14 @@
       },
       "outputs": [],
       "source": [
-        "label = [0,0]\n",
-        "prediction = [[-3., 0], [-3, 0]] \n",
-        "sample_weight = [1, 10] \n",
+        "label = np.array([0,0])\n",
+        "prediction = np.array([[-3., 0], [-3, 0]])\n",
+        "sample_weight = [1, 10]\n",
         "\n",
-        "loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True,\n",
-        "                                               reduction=tf.losses.Reduction.NONE)\n",
+        "loss = tf.keras.losses.SparseCategoricalCrossentropy(\n",
+        "    from_logits=True,\n",
+        "    reduction=tf.keras.losses.Reduction.NONE\n",
+        ")\n",
         "loss(label, prediction, sample_weight).numpy()"
       ]
     },
@@ -707,7 +716,7 @@
         "id": "Gbwo3DZ-9TxM"
       },
       "source": [
-        "So to make sample weights for this tutorial you need a function that takes a `(data, label)` pair and returns a `(data, label, sample_weight)` triple. Where the `sample_weight` is a 1-channel image containing the class weight for each pixel. \n",
+        "So, to make sample weights for this tutorial, you need a function that takes a `(data, label)` pair and returns a `(data, label, sample_weight)` triple where the `sample_weight` is a 1-channel image containing the class weight for each pixel.\n",
         "\n",
         "The simplest possible implementation is to use the label as an index into a `class_weight` list:"
       ]
@@ -726,7 +735,7 @@
         "  class_weights = tf.constant([2.0, 2.0, 1.0])\n",
         "  class_weights = class_weights/tf.reduce_sum(class_weights)\n",
         "\n",
-        "  # Create an image of `sample_weights` by using the label at each pixel as an \n",
+        "  # Create an image of `sample_weights` by using the label at each pixel as an\n",
         "  # index into the `class weights` .\n",
         "  sample_weights = tf.gather(class_weights, indices=tf.cast(label, tf.int32))\n",
         "\n",
@@ -759,7 +768,7 @@
         "id": "Yc-EpIzaRbSL"
       },
       "source": [
-        "Now you can train a model on this weighted dataset:"
+        "Now, you can train a model on this weighted dataset:"
       ]
     },
     {
@@ -798,16 +807,16 @@
       },
       "source": [
         "## Next steps\n",
-        "Now that you have an understanding of what image segmentation is and how it works, you can try this tutorial out with different intermediate layer outputs, or even different pretrained model. You may also challenge yourself by trying out the [Carvana](https://www.kaggle.com/c/carvana-image-masking-challenge/overview) image masking challenge hosted on Kaggle.\n",
         "\n",
-        "You may also want to see the [Tensorflow Object Detection API](https://github.com/tensorflow/models/blob/master/research/object_detection/README.md) for another model you can retrain on your own data. Pretrained models are available on [TensorFlow Hub](https://www.tensorflow.org/hub/tutorials/tf2_object_detection#optional)"
+        "Now that you have an understanding of what image segmentation is and how it works, you can try this tutorial out with different intermediate layer outputs, or even different pretrained models. You may also challenge yourself by trying out the [Carvana](https://www.kaggle.com/c/carvana-image-masking-challenge/overview) image masking challenge hosted on Kaggle.\n",
+        "\n",
+        "You may also want to see the [Tensorflow Object Detection API](https://github.com/tensorflow/models/blob/master/research/object_detection/README.md) for another model you can retrain on your own data. Pretrained models are available on [TensorFlow Hub](https://www.tensorflow.org/hub/tutorials/tf2_object_detection#optional)."
       ]
     }
   ],
   "metadata": {
     "accelerator": "GPU",
     "colab": {
-      "collapsed_sections": [],
       "name": "segmentation.ipynb",
       "toc_visible": true
     },
diff --git a/site/en/tutorials/images/transfer_learning.ipynb b/site/en/tutorials/images/transfer_learning.ipynb
index a796e9d7873..172bb2700b4 100644
--- a/site/en/tutorials/images/transfer_learning.ipynb
+++ b/site/en/tutorials/images/transfer_learning.ipynb
@@ -83,7 +83,7 @@
         "    <a target=\"_blank\" href=\"https://www.tensorflow.org/tutorials/images/transfer_learning\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
         "  </td>\n",
         "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/images/transfer_learning.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/images/transfer_learning.ipynb?force_kitty_mode=1&force_corgi_mode=1\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
         "  </td>\n",
         "  <td>\n",
         "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/tutorials/images/transfer_learning.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
@@ -136,9 +136,7 @@
         "import matplotlib.pyplot as plt\n",
         "import numpy as np\n",
         "import os\n",
-        "import tensorflow as tf\n",
-        "\n",
-        "from tensorflow.keras.preprocessing import image_dataset_from_directory"
+        "import tensorflow as tf"
       ]
     },
     {
@@ -165,7 +163,7 @@
         "id": "vHP9qMJxt2oz"
       },
       "source": [
-        "In this tutorial, you will use a dataset containing several thousand images of cats and dogs. Download and extract a zip file containing the images, then create a `tf.data.Dataset` for training and validation using the `tf.keras.preprocessing.image_dataset_from_directory` utility. You can learn more about loading images in this [tutorial](https://www.tensorflow.org/tutorials/load_data/images)."
+        "In this tutorial, you will use a dataset containing several thousand images of cats and dogs. Download and extract a zip file containing the images, then create a `tf.data.Dataset` for training and validation using the `tf.keras.utils.image_dataset_from_directory` utility. You can learn more about loading images in this [tutorial](https://www.tensorflow.org/tutorials/load_data/images)."
       ]
     },
     {
@@ -186,10 +184,10 @@
         "BATCH_SIZE = 32\n",
         "IMG_SIZE = (160, 160)\n",
         "\n",
-        "train_dataset = image_dataset_from_directory(train_dir,\n",
-        "                                             shuffle=True,\n",
-        "                                             batch_size=BATCH_SIZE,\n",
-        "                                             image_size=IMG_SIZE)"
+        "train_dataset = tf.keras.utils.image_dataset_from_directory(train_dir,\n",
+        "                                                            shuffle=True,\n",
+        "                                                            batch_size=BATCH_SIZE,\n",
+        "                                                            image_size=IMG_SIZE)"
       ]
     },
     {
@@ -200,10 +198,10 @@
       },
       "outputs": [],
       "source": [
-        "validation_dataset = image_dataset_from_directory(validation_dir,\n",
-        "                                                  shuffle=True,\n",
-        "                                                  batch_size=BATCH_SIZE,\n",
-        "                                                  image_size=IMG_SIZE)"
+        "validation_dataset = tf.keras.utils.image_dataset_from_directory(validation_dir,\n",
+        "                                                                 shuffle=True,\n",
+        "                                                                 batch_size=BATCH_SIZE,\n",
+        "                                                                 image_size=IMG_SIZE)"
       ]
     },
     {
@@ -240,7 +238,7 @@
         "id": "EZqCX_mpV3Mx"
       },
       "source": [
-        "As the original dataset doesn't contain a test set, you will create one. To do so, determine how many batches of data are available in the validation set using ```tf.data.experimental.cardinality```, then move 20% of them to a test set."
+        "As the original dataset doesn't contain a test set, you will create one. To do so, determine how many batches of data are available in the validation set using `tf.data.experimental.cardinality`, then move 20% of them to a test set."
       ]
     },
     {
@@ -328,8 +326,8 @@
       "outputs": [],
       "source": [
         "data_augmentation = tf.keras.Sequential([\n",
-        "  tf.keras.layers.experimental.preprocessing.RandomFlip('horizontal'),\n",
-        "  tf.keras.layers.experimental.preprocessing.RandomRotation(0.2),\n",
+        "  tf.keras.layers.RandomFlip('horizontal'),\n",
+        "  tf.keras.layers.RandomRotation(0.2),\n",
         "])"
       ]
     },
@@ -339,7 +337,7 @@
         "id": "s9SlcbhrarOO"
       },
       "source": [
-        "Note: These layers are active only during training, when you call `model.fit`. They are inactive when the model is used in inference mode in `model.evaulate` or `model.fit`."
+        "Note: These layers are active only during training, when you call `Model.fit`. They are inactive when the model is used in inference mode in `Model.evaluate`, `Model.predict`, or `Model.call`."
       ]
     },
     {
@@ -397,7 +395,7 @@
         "id": "xnr81qRMzcs5"
       },
       "source": [
-        "Note: Alternatively, you could rescale pixel values from `[0, 255]` to `[-1, 1]` using a [Rescaling](https://www.tensorflow.org/api_docs/python/tf/keras/layers/experimental/preprocessing/Rescaling) layer."
+        "Note: Alternatively, you could rescale pixel values from `[0, 255]` to `[-1, 1]` using `tf.keras.layers.Rescaling`."
       ]
     },
     {
@@ -408,7 +406,7 @@
       },
       "outputs": [],
       "source": [
-        "rescale = tf.keras.layers.experimental.preprocessing.Rescaling(1./127.5, offset= -1)"
+        "rescale = tf.keras.layers.Rescaling(1./127.5, offset=-1)"
       ]
     },
     {
@@ -429,7 +427,7 @@
         "## Create the base model from the pre-trained convnets\n",
         "You will create the base model from the **MobileNet V2** model developed at Google. This is pre-trained on the ImageNet dataset, a large dataset consisting of 1.4M images and 1000 classes. ImageNet is a research training dataset with a wide variety of categories like `jackfruit` and `syringe`. This base of knowledge will help us classify cats and dogs from our specific dataset.\n",
         "\n",
-        "First, you need to pick which layer of MobileNet V2 you will use for feature extraction. The very last classification layer (on \"top\", as most diagrams of machine learning models go from bottom to top) is not very useful.  Instead, you will follow the common practice to depend on the very last layer before the flatten operation. This layer is called the \"bottleneck layer\". The bottleneck layer features retain more generality as compared to the final/top layer.\n",
+        "First, you need to pick which layer of MobileNet V2 you will use for feature extraction. The very last classification layer (on \"top\", as most diagrams of machine learning models go from bottom to top) is not very useful. Instead, you will follow the common practice to depend on the very last layer before the flatten operation. This layer is called the \"bottleneck layer\". The bottleneck layer features retain more generality as compared to the final/top layer.\n",
         "\n",
         "First, instantiate a MobileNet V2 model pre-loaded with weights trained on ImageNet. By specifying the **include_top=False** argument, you load a network that doesn't include the classification layers at the top, which is ideal for feature extraction."
       ]
@@ -518,9 +516,9 @@
       "source": [
         "### Important note about BatchNormalization layers\n",
         "\n",
-        "Many models contain `tf.keras.layers.BatchNormalization` layers. This layer is a special case and precautions should be taken in the context of fine-tuning, as shown later in this tutorial. \n",
+        "Many models contain `tf.keras.layers.BatchNormalization` layers. This layer is a special case and precautions should be taken in the context of fine-tuning, as shown later in this tutorial.\n",
         "\n",
-        "When you set `layer.trainable = False`, the `BatchNormalization` layer will run in inference mode, and will not update its mean and variance statistics. \n",
+        "When you set `layer.trainable = False`, the `BatchNormalization` layer will run in inference mode, and will not update its mean and variance statistics.\n",
         "\n",
         "When you unfreeze a model that contains BatchNormalization layers in order to do fine-tuning, you should keep the BatchNormalization layers in inference mode by passing `training = False` when calling the base model. Otherwise, the updates applied to the non-trainable weights will destroy what the model has learned.\n",
         "\n",
@@ -576,7 +574,7 @@
         "id": "O1p0OJBR6dOT"
       },
       "source": [
-        "Apply a `tf.keras.layers.Dense` layer to convert these features into a single prediction per image. You don't need an activation function here because this prediction will be treated as a `logit`, or a raw prediction value.  Positive numbers predict class 1, negative numbers predict class 0."
+        "Apply a `tf.keras.layers.Dense` layer to convert these features into a single prediction per image. You don't need an activation function here because this prediction will be treated as a `logit`, or a raw prediction value. Positive numbers predict class 1, negative numbers predict class 0."
       ]
     },
     {
@@ -587,7 +585,7 @@
       },
       "outputs": [],
       "source": [
-        "prediction_layer = tf.keras.layers.Dense(1)\n",
+        "prediction_layer = tf.keras.layers.Dense(1, activation='sigmoid')\n",
         "prediction_batch = prediction_layer(feature_batch_average)\n",
         "print(prediction_batch.shape)"
       ]
@@ -598,7 +596,7 @@
         "id": "HXvz-ZkTa9b3"
       },
       "source": [
-        "Build a model by chaining together the data augmentation, rescaling, base_model and feature extractor layers using the [Keras Functional API](https://www.tensorflow.org/guide/keras/functional). As previously mentioned, use `training=False` as our model contains a `BatchNormalization` layer."
+        "Build a model by chaining together the data augmentation, rescaling, `base_model` and feature extractor layers using the [Keras Functional API](https://www.tensorflow.org/guide/keras/functional). As previously mentioned, use `training=False` as our model contains a `BatchNormalization` layer."
       ]
     },
     {
@@ -619,60 +617,71 @@
         "model = tf.keras.Model(inputs, outputs)"
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "I8ARiyMFsgbH"
+      },
+      "outputs": [],
+      "source": [
+        "model.summary()"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "g0ylJXE_kRLi"
+        "id": "lxOcmVr0ydFZ"
       },
       "source": [
-        "### Compile the model\n",
-        "\n",
-        "Compile the model before training it. Since there are two classes, use the `tf.keras.losses.BinaryCrossentropy` loss with `from_logits=True` since the model provides a linear output."
+        "The 8+ million parameters in MobileNet are frozen, but there are 1.2 thousand _trainable_ parameters in the Dense layer. These are divided between two `tf.Variable` objects, the weights and biases."
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "RpR8HdyMhukJ"
+        "id": "krvBumovycVA"
       },
       "outputs": [],
       "source": [
-        "base_learning_rate = 0.0001\n",
-        "model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate),\n",
-        "              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),\n",
-        "              metrics=['accuracy'])"
+        "len(model.trainable_variables)"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "I8ARiyMFsgbH"
+        "id": "jeGk93R2ahav"
       },
       "outputs": [],
       "source": [
-        "model.summary()"
+        "tf.keras.utils.plot_model(model, show_shapes=True)"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "lxOcmVr0ydFZ"
+        "id": "g0ylJXE_kRLi"
       },
       "source": [
-        "The 2.5M parameters in MobileNet are frozen, but there are 1.2K _trainable_ parameters in the Dense layer.  These are divided between two `tf.Variable` objects, the weights and biases."
+        "### Compile the model\n",
+        "\n",
+        "Compile the model before training it. Since there are two classes and a sigmoid oputput, use the `BinaryAccuracy`."
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "krvBumovycVA"
+        "id": "RpR8HdyMhukJ"
       },
       "outputs": [],
       "source": [
-        "len(model.trainable_variables)"
+        "base_learning_rate = 0.0001\n",
+        "model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate),\n",
+        "              loss=tf.keras.losses.BinaryCrossentropy(),\n",
+        "              metrics=[tf.keras.metrics.BinaryAccuracy(threshold=0.5, name='accuracy')])"
       ]
     },
     {
@@ -683,7 +692,7 @@
       "source": [
         "### Train the model\n",
         "\n",
-        "After training for 10 epochs, you should see ~94% accuracy on the validation set.\n"
+        "After training for 10 epochs, you should see ~96% accuracy on the validation set.\n"
       ]
     },
     {
@@ -732,7 +741,7 @@
       "source": [
         "### Learning curves\n",
         "\n",
-        "Let's take a look at the learning curves of the training and validation accuracy/loss when using the MobileNet V2 base model as a fixed feature extractor."
+        "Let's take a look at the learning curves of the training and validation accuracy/loss when using the MobileNetV2 base model as a fixed feature extractor."
       ]
     },
     {
@@ -787,7 +796,7 @@
       },
       "source": [
         "## Fine tuning\n",
-        "In the feature extraction experiment, you were only training a few layers on top of an MobileNet V2 base model. The weights of the pre-trained network were **not** updated during training.\n",
+        "In the feature extraction experiment, you were only training a few layers on top of an MobileNetV2 base model. The weights of the pre-trained network were **not** updated during training.\n",
         "\n",
         "One way to increase performance even further is to train (or \"fine-tune\") the weights of the top layers of the pre-trained model alongside the training of the classifier you added. The training process will force the weights to be tuned from generic feature maps to features associated specifically with the dataset.\n",
         "\n",
@@ -841,7 +850,7 @@
         "\n",
         "# Freeze all the layers before the `fine_tune_at` layer\n",
         "for layer in base_model.layers[:fine_tune_at]:\n",
-        "  layer.trainable =  False"
+        "  layer.trainable = False"
       ]
     },
     {
@@ -863,9 +872,9 @@
       },
       "outputs": [],
       "source": [
-        "model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),\n",
-        "              optimizer = tf.keras.optimizers.RMSprop(lr=base_learning_rate/10),\n",
-        "              metrics=['accuracy'])"
+        "model.compile(loss=tf.keras.losses.BinaryCrossentropy(),\n",
+        "              optimizer = tf.keras.optimizers.RMSprop(learning_rate=base_learning_rate/10),\n",
+        "              metrics=[tf.keras.metrics.BinaryAccuracy(threshold=0.5, name='accuracy')])"
       ]
     },
     {
@@ -921,7 +930,7 @@
         "\n",
         "history_fine = model.fit(train_dataset,\n",
         "                         epochs=total_epochs,\n",
-        "                         initial_epoch=history.epoch[-1],\n",
+        "                         initial_epoch=len(history.epoch),\n",
         "                         validation_data=validation_dataset)"
       ]
     },
@@ -931,9 +940,9 @@
         "id": "TfXEmsxQf6eP"
       },
       "source": [
-        "Let's take a look at the learning curves of the training and validation accuracy/loss when fine-tuning the last few layers of the MobileNet V2 base model and training the classifier on top of it. The validation loss is much higher than the training loss, so you may get some overfitting.\n",
+        "Let's take a look at the learning curves of the training and validation accuracy/loss when fine-tuning the last few layers of the MobileNetV2 base model and training the classifier on top of it. The validation loss is much higher than the training loss, so you may get some overfitting.\n",
         "\n",
-        "You may also get some overfitting as the new training set is relatively small and similar to the original MobileNet V2 datasets.\n"
+        "You may also get some overfitting as the new training set is relatively small and similar to the original MobileNetV2 datasets.\n"
       ]
     },
     {
@@ -1005,7 +1014,7 @@
         "id": "PSXH7PRMxOi5"
       },
       "source": [
-        "Finaly you can verify the performance of the model on new data using test set."
+        "Finally you can verify the performance of the model on new data using test set."
       ]
     },
     {
@@ -1040,9 +1049,6 @@
         "# Retrieve a batch of images from the test set\n",
         "image_batch, label_batch = test_dataset.as_numpy_iterator().next()\n",
         "predictions = model.predict_on_batch(image_batch).flatten()\n",
-        "\n",
-        "# Apply a sigmoid since our model returns logits\n",
-        "predictions = tf.nn.sigmoid(predictions)\n",
         "predictions = tf.where(predictions < 0.5, 0, 1)\n",
         "\n",
         "print('Predictions:\\n', predictions.numpy())\n",
@@ -1077,7 +1083,6 @@
   "metadata": {
     "accelerator": "GPU",
     "colab": {
-      "collapsed_sections": [],
       "name": "transfer_learning.ipynb",
       "toc_visible": true
     },
diff --git a/site/en/tutorials/images/transfer_learning_with_hub.ipynb b/site/en/tutorials/images/transfer_learning_with_hub.ipynb
index 028cc69f951..58ebfc2d20f 100644
--- a/site/en/tutorials/images/transfer_learning_with_hub.ipynb
+++ b/site/en/tutorials/images/transfer_learning_with_hub.ipynb
@@ -68,9 +68,9 @@
         "\n",
         "This tutorial demonstrates how to:\n",
         "\n",
-        "1. Use models from TensorFlow Hub with `tf.keras`\n",
-        "1. Use an image classification model from TensorFlow Hub\n",
-        "1. Do simple transfer learning to fine-tune a model for your own image classes"
+        "1. Use models from TensorFlow Hub with `tf.keras`.\n",
+        "1. Use an image classification model from TensorFlow Hub.\n",
+        "1. Do simple transfer learning to fine-tune a model for your own image classes."
       ]
     },
     {
@@ -321,10 +321,15 @@
       },
       "outputs": [],
       "source": [
-        "data_root = tf.keras.utils.get_file(\n",
-        "  'flower_photos',\n",
+        "import pathlib\n",
+        "\n",
+        "data_file = tf.keras.utils.get_file(\n",
+        "  'flower_photos.tgz',\n",
         "  'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz',\n",
-        "   untar=True)"
+        "  cache_dir='.',\n",
+        "   extract=True)\n",
+        "\n",
+        "data_root = pathlib.Path(data_file).with_suffix('')"
       ]
     },
     {
@@ -333,7 +338,7 @@
         "id": "jFHdp18ccah7"
       },
       "source": [
-        "First, load this data into the model using the image data off disk with `tf.keras.preprocessing.image_dataset_from_directory`, which will generate a `tf.data.Dataset`:"
+        "First, load this data into the model using the image data off disk with `tf.keras.utils.image_dataset_from_directory`, which will generate a `tf.data.Dataset`:"
       ]
     },
     {
@@ -348,7 +353,7 @@
         "img_height = 224\n",
         "img_width = 224\n",
         "\n",
-        "train_ds = tf.keras.preprocessing.image_dataset_from_directory(\n",
+        "train_ds = tf.keras.utils.image_dataset_from_directory(\n",
         "  str(data_root),\n",
         "  validation_split=0.2,\n",
         "  subset=\"training\",\n",
@@ -357,7 +362,7 @@
         "  batch_size=batch_size\n",
         ")\n",
         "\n",
-        "val_ds = tf.keras.preprocessing.image_dataset_from_directory(\n",
+        "val_ds = tf.keras.utils.image_dataset_from_directory(\n",
         "  str(data_root),\n",
         "  validation_split=0.2,\n",
         "  subset=\"validation\",\n",
@@ -394,7 +399,7 @@
         "id": "L0Btd0V3C8h4"
       },
       "source": [
-        "Second, because TensorFlow Hub's convention for image models is to expect float inputs in the `[0, 1]` range, use the `tf.keras.layers.experimental.preprocessing.Rescaling` layer to achieve this."
+        "Second, because TensorFlow Hub's convention for image models is to expect float inputs in the `[0, 1]` range, use the `tf.keras.layers.Rescaling` preprocessing layer to achieve this."
       ]
     },
     {
@@ -403,7 +408,7 @@
         "id": "Rs6gfO-ApTQW"
       },
       "source": [
-        "Note: You could also include the `tf.keras.layers.experimental.preprocessing.Rescaling` layer inside the model. Refer to the [Working with preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers) guide for a discussion of the tradeoffs."
+        "Note: You could also include the `tf.keras.layers.Rescaling` layer inside the model. Refer to the [Working with preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers) guide for a discussion of the tradeoffs."
       ]
     },
     {
@@ -414,7 +419,7 @@
       },
       "outputs": [],
       "source": [
-        "normalization_layer = tf.keras.layers.experimental.preprocessing.Rescaling(1./255)\n",
+        "normalization_layer = tf.keras.layers.Rescaling(1./255)\n",
         "train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y)) # Where x—images, y—labels.\n",
         "val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y)) # Where x—images, y—labels."
       ]
@@ -906,7 +911,7 @@
       "source": [
         "## Next steps\n",
         "\n",
-        "You can use the SavedModel to load for inference or convert it to a [TensorFlow Lite](https://www.tensorflow.org/lite/convert/)  model (for on-device machine learning) or a [TensorFlow.js](https://www.tensorflow.org/js/tutorials#convert_pretrained_models_to_tensorflowjs) model (for machine learning in JavaScript).\n",
+        "You can use the SavedModel to load for inference or convert it to a [TensorFlow Lite](https://www.tensorflow.org/lite/models/convert/)  model (for on-device machine learning) or a [TensorFlow.js](https://www.tensorflow.org/js/tutorials#convert_pretrained_models_to_tensorflowjs) model (for machine learning in JavaScript).\n",
         "\n",
         "Discover [more tutorials](https://www.tensorflow.org/hub/tutorials) to learn how to use pre-trained models from TensorFlow Hub on image, text, audio, and video tasks."
       ]
@@ -919,7 +924,7 @@
         "W_tvPdyfA-BL"
       ],
       "name": "transfer_learning_with_hub.ipynb",
-      "toc_visible": true
+            "toc_visible": true
     },
     "kernelspec": {
       "display_name": "Python 3",
diff --git a/site/en/tutorials/interpretability/integrated_gradients.ipynb b/site/en/tutorials/interpretability/integrated_gradients.ipynb
index 3eb5fcf3db8..e63c8cdb7a2 100644
--- a/site/en/tutorials/interpretability/integrated_gradients.ipynb
+++ b/site/en/tutorials/interpretability/integrated_gradients.ipynb
@@ -73,7 +73,7 @@
       "source": [
         "This tutorial demonstrates how to implement **Integrated Gradients (IG)**, an [Explainable AI](https://en.wikipedia.org/wiki/Explainable_artificial_intelligence) technique introduced in the paper [Axiomatic Attribution for Deep Networks](https://arxiv.org/abs/1703.01365). IG aims to explain the relationship between a model's predictions in terms of its features. It has many use cases including understanding feature importances, identifying data skew, and debugging model performance.\n",
         "\n",
-        "IG has become a popular interpretability technique due to its broad applicability to any differentiable model (e.g. images, text, structured data), ease of implementation, theoretical justifications, and computational efficiency relative to alternative approaches that allows it to scale to large networks and feature spaces such as images.\n",
+        "IG has become a popular interpretability technique due to its broad applicability to any differentiable model (e.g. images, text, structured data), ease of implementation, theoretical justifications, and computational efficiency relative to alternative approaches that allow it to scale to large networks and feature spaces such as images.\n",
         "\n",
         "In this tutorial, you will walk through an implementation of IG step-by-step to understand the pixel feature importances of an image classifier. As an example, consider this [image](https://commons.wikimedia.org/wiki/File:San_Francisco_fireboat_showing_off.jpg) of a fireboat spraying jets of water. You would classify this image as a fireboat and might highlight the pixels making up the boat and water cannons as being important to your decision. Your model will also classify this image as a fireboat later on in this tutorial; however, does it highlight the same pixels as important when explaining its decision?\n",
         "\n",
@@ -151,7 +151,7 @@
         "\n",
         "**Inputs**: The expected input shape for the model is `(None, 224, 224, 3)`. This is a dense 4D tensor of dtype float32 and shape `(batch_size, height, width, RGB channels)` whose elements are RGB color values of pixels normalized to the range [0, 1]. The first element is `None` to indicate that the model can take any integer batch size.\n",
         "\n",
-        "**Outputs**: A `tf.Tensor` of logits in the shape of `(batch_size, 1001)`. Each row represents the model's predicted score for each of 1,001 classes from ImageNet. For the model's top predicted class index you can use `tf.argmax(predictions, axis=-1)`. Furthermore, you can also convert the model's logit output to predicted probabilities across all classes using `tf.nn.softmax(predictions, axis=-1)` to quantify the model's uncertainty as well as explore similar predicted classes for debugging."
+        "**Outputs**: A `tf.Tensor` of logits in the shape of `(batch_size, 1001)`. Each row represents the model's predicted score for 1,001 classes from ImageNet. For the model's top predicted class index you can use `tf.math.argmax(predictions, axis=-1)`. Furthermore, you can also convert the model's logit output to predicted probabilities across all classes using `tf.nn.softmax(predictions, axis=-1)` to quantify the model's uncertainty and explore similar predicted classes for debugging."
       ]
     },
     {
@@ -202,7 +202,7 @@
       "source": [
         "def read_image(file_name):\n",
         "  image = tf.io.read_file(file_name)\n",
-        "  image = tf.image.decode_jpeg(image, channels=3)\n",
+        "  image = tf.io.decode_jpeg(image, channels=3)\n",
         "  image = tf.image.convert_image_dtype(image, tf.float32)\n",
         "  image = tf.image.resize_with_pad(image, target_height=224, target_width=224)\n",
         "  return image"
@@ -249,7 +249,7 @@
       },
       "source": [
         "### Classify images\n",
-        "Let's start by classifying these images and displaying the top 3 most confident predictions. Following is a utility function to retrieve the top k predicted labels and probabilities."
+        "Start by classifying these images and displaying the top 3 most confident predictions. The following is a utility function to retrieve the top k predicted labels and probabilities."
       ]
     },
     {
@@ -334,6 +334,7 @@
       },
       "outputs": [],
       "source": [
+        "#@title\n",
         "fig = plt.figure(figsize=(12, 5))\n",
         "ax0 = fig.add_subplot(121)\n",
         "ax0.plot(x, f(x), marker='o')\n",
@@ -533,7 +534,7 @@
         "id": "s4zFzbUBj684"
       },
       "source": [
-        "Let's use the above function to generate interpolated images along a linear path at alpha intervals between a black baseline image and the example \"Fireboat\" image."
+        "Use the above function to generate interpolated images along a linear path at alpha intervals between a black baseline image and the example \"Fireboat\" image."
       ]
     },
     {
@@ -556,7 +557,7 @@
         "id": "QABFsuCvkO1h"
       },
       "source": [
-        "Let's visualize the interpolated images. Note: another way of thinking about the $\\alpha$ constant is that it is consistently increasing each interpolated image's intensity."
+        "Visualize the interpolated images. Note: another way of thinking about the $\\alpha$ constant is that it is consistently increasing each interpolated image's intensity."
       ]
     },
     {
@@ -595,7 +596,7 @@
         "id": "tps0eWc0REqL"
       },
       "source": [
-        "Now let's take a look at how to calculate gradients in order to measure the relationship between changes to a feature and changes in the model's predictions. In the case of images, the gradient tells us which pixels have the strongest effect on the models predicted class probabilities."
+        "This section explains how to calculate the gradients to measure the relationship between changes to a feature and changes in the model's predictions. In the case of images, the gradient tells us which pixels have the strongest effect on the model's predicted class probabilities."
       ]
     },
     {
@@ -642,7 +643,7 @@
         "id": "9BfRuzx4-c87"
       },
       "source": [
-        "Let's compute the gradients for each image along the interpolation path with respect to the correct output. Recall that your model returns a `(1, 1001)` shaped `Tensor` with logits that you convert to predicted probabilities for each class. You need to pass the correct ImageNet target class index to the `compute_gradients` function for your image."
+        "Compute the gradients for each image along the interpolation path with respect to the correct output. Recall that your model returns a `(1, 1001)` shaped `Tensor` with logits that you convert to predicted probabilities for each class. You need to pass the correct ImageNet target class index to the `compute_gradients` function for your image."
       ]
     },
     {
@@ -695,13 +696,23 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "mCH8sAf3TTJ2"
+        "id": "FQWwcI0Wr0AX"
       },
       "outputs": [],
       "source": [
         "pred = model(interpolated_images)\n",
-        "pred_proba = tf.nn.softmax(pred, axis=-1)[:, 555]\n",
-        "\n",
+        "pred_proba = tf.nn.softmax(pred, axis=-1)[:, 555]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "mCH8sAf3TTJ2"
+      },
+      "outputs": [],
+      "source": [
+        "#@title\n",
         "plt.figure(figsize=(10, 4))\n",
         "ax1 = plt.subplot(1, 2, 1)\n",
         "ax1.plot(alphas, pred_proba)\n",
@@ -713,7 +724,7 @@
         "ax2 = plt.subplot(1, 2, 2)\n",
         "# Average across interpolation steps\n",
         "average_grads = tf.reduce_mean(path_gradients, axis=[1, 2, 3])\n",
-        "# Normalize gradients to 0 to 1 scale. E.g. (x - min(x))/(max(x)-min(x))\n",
+        "# Normalize gradients to 0 to 1 scale. E.g., (x - min(x))/(max(x)-min(x))\n",
         "average_grads_norm = (average_grads-tf.math.reduce_min(average_grads))/(tf.math.reduce_max(average_grads)-tf.reduce_min(average_grads))\n",
         "ax2.plot(alphas, average_grads_norm)\n",
         "ax2.set_title('Average pixel gradients (normalized) over alpha')\n",
@@ -730,7 +741,7 @@
       "source": [
         "* **left**: This plot shows how your model's confidence in the \"Fireboat\" class varies across alphas. Notice how the gradients, or slope of the line, largely flattens or saturates between 0.6 and 1.0 before settling at the final \"Fireboat\" predicted probability of about 40%.\n",
         "\n",
-        "* **right**: The right plot shows the average gradients magnitudes over alpha more directly. Note how the values sharply approach and even briefly dip below zero. In fact, your model \"learns\" the most from gradients at lower values of alpha before saturating. Intuitively, you can think of this as your model has learned the pixels e.g. water cannons to make the correct prediction, sending these pixels gradients to zero, but is still quite uncertain and focused on spurious bridge or water jet pixels as the alpha values approach the original input image.\n",
+        "* **right**: The right plot shows the average gradients magnitudes over alpha more directly. Note how the values sharply approach and even briefly dip below zero. In fact, your model \"learns\" the most from gradients at lower values of alpha before saturating. Intuitively, you can think of this as your model has learned the pixels e.g. water cannons to make the correct prediction, sending these pixel gradients to zero, but is still quite uncertain and focused on spurious bridge or water jet pixels as the alpha values approach the original input image.\n",
         "\n",
         "To make sure these important water cannon pixels are reflected as important to the \"Fireboat\" prediction, you will continue on below to learn how to accumulate these gradients to accurately approximate how each pixel impacts your \"Fireboat\" predicted probability.\n"
       ]
@@ -759,8 +770,7 @@
         "id": "GshPZQgROs80"
       },
       "source": [
-        "$IntegratedGrads^{approx}_{i}(x)::=(x_{i}-x'_{i})\\times \\overbrace{\\sum_{k=1}^{m}}^\\text{Sum m local gradients}\n",
-        "\\text{gradients(interpolated images)} \\times \\overbrace{\\frac{1}{m}}^\\text{Divide by m steps}$\n",
+        "$IntegratedGrads^{approx}_{i}(x)::=(x_{i}-x'_{i})\\times \\overbrace{\\sum_{k=1}^{m}}^\\text{Sum m local gradients}\\text{gradients(interpolated images)} \\times \\overbrace{\\frac{1}{m}}^\\text{Divide by m steps}$\n",
         "\n",
         "From the equation, you can see you are summing over `m` gradients and dividing by `m` steps. You can implement the two operations together for part 3 as an *average of the local gradients of `m` interpolated predictions and input images*."
       ]
@@ -873,17 +883,16 @@
       },
       "outputs": [],
       "source": [
-        "@tf.function\n",
         "def integrated_gradients(baseline,\n",
         "                         image,\n",
         "                         target_class_idx,\n",
         "                         m_steps=50,\n",
         "                         batch_size=32):\n",
-        "  # 1. Generate alphas.\n",
+        "  # Generate alphas.\n",
         "  alphas = tf.linspace(start=0.0, stop=1.0, num=m_steps+1)\n",
         "\n",
-        "  # Initialize TensorArray outside loop to collect gradients.    \n",
-        "  gradient_batches = tf.TensorArray(tf.float32, size=m_steps+1)\n",
+        "  # Collect gradients.    \n",
+        "  gradient_batches = []\n",
         "    \n",
         "  # Iterate alphas range and batch computation for speed, memory efficiency, and scaling to larger m_steps.\n",
         "  for alpha in tf.range(0, len(alphas), batch_size):\n",
@@ -891,30 +900,42 @@
         "    to = tf.minimum(from_ + batch_size, len(alphas))\n",
         "    alpha_batch = alphas[from_:to]\n",
         "\n",
-        "    # 2. Generate interpolated inputs between baseline and input.\n",
-        "    interpolated_path_input_batch = interpolate_images(baseline=baseline,\n",
-        "                                                       image=image,\n",
-        "                                                       alphas=alpha_batch)\n",
-        "\n",
-        "    # 3. Compute gradients between model outputs and interpolated inputs.\n",
-        "    gradient_batch = compute_gradients(images=interpolated_path_input_batch,\n",
-        "                                       target_class_idx=target_class_idx)\n",
-        "    \n",
-        "    # Write batch indices and gradients to extend TensorArray.\n",
-        "    gradient_batches = gradient_batches.scatter(tf.range(from_, to), gradient_batch)    \n",
-        "  \n",
-        "  # Stack path gradients together row-wise into single tensor.\n",
-        "  total_gradients = gradient_batches.stack()\n",
+        "    gradient_batch = one_batch(baseline, image, alpha_batch, target_class_idx)\n",
+        "    gradient_batches.append(gradient_batch)\n",
+        "      \n",
+        "  # Concatenate path gradients together row-wise into single tensor.\n",
+        "  total_gradients = tf.concat(gradient_batches, axis=0)\n",
         "\n",
-        "  # 4. Integral approximation through averaging gradients.\n",
+        "  # Integral approximation through averaging gradients.\n",
         "  avg_gradients = integral_approximation(gradients=total_gradients)\n",
         "\n",
-        "  # 5. Scale integrated gradients with respect to input.\n",
+        "  # Scale integrated gradients with respect to input.\n",
         "  integrated_gradients = (image - baseline) * avg_gradients\n",
         "\n",
         "  return integrated_gradients"
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dszwB_Sp0CX0"
+      },
+      "outputs": [],
+      "source": [
+        "@tf.function\n",
+        "def one_batch(baseline, image, alpha_batch, target_class_idx):\n",
+        "    # Generate interpolated inputs between baseline and input.\n",
+        "    interpolated_path_input_batch = interpolate_images(baseline=baseline,\n",
+        "                                                       image=image,\n",
+        "                                                       alphas=alpha_batch)\n",
+        "\n",
+        "    # Compute gradients between model outputs and interpolated inputs.\n",
+        "    gradient_batch = compute_gradients(images=interpolated_path_input_batch,\n",
+        "                                       target_class_idx=target_class_idx)\n",
+        "    return gradient_batch"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -984,6 +1005,7 @@
       },
       "outputs": [],
       "source": [
+        "#@title\n",
         "def plot_img_attributions(baseline,\n",
         "                          image,\n",
         "                          target_class_idx,\n",
@@ -1093,9 +1115,9 @@
         "* Employing techniques like Integrated Gradients before deploying your model can help you develop intuition for how and why it works. Do the features highlighted by this technique match your intuition? If not, that may be indicative of a bug in your model or dataset, or overfitting.\n",
         "\n",
         "Limitations\n",
-        "* Integrated Gradients provides feature importances on individual examples, however, it does not provide global feature importances across an entire dataset.\n",
+        "* The Integrated Gradients technique provides feature importances on individual examples. However, it does not provide global feature importances across an entire dataset.\n",
         "\n",
-        "* Integrated Gradients provides individual feature importances, but it does not explain feature interactions and combinations."
+        "* The Integrated Gradients technique provides individual feature importances, but it does not explain feature interactions and combinations."
       ]
     },
     {
diff --git a/site/en/tutorials/keras/classification.ipynb b/site/en/tutorials/keras/classification.ipynb
index 3670961f68d..dee10f107c3 100644
--- a/site/en/tutorials/keras/classification.ipynb
+++ b/site/en/tutorials/keras/classification.ipynb
@@ -476,8 +476,8 @@
         "\n",
         "Before the model is ready for training, it needs a few more settings. These are added during the model's [*compile*](https://www.tensorflow.org/api_docs/python/tf/keras/Model#compile) step:\n",
         "\n",
-        "* [*Loss function*](https://www.tensorflow.org/api_docs/python/tf/keras/losses) —This measures how accurate the model is during training. You want to minimize this function to \"steer\" the model in the right direction.\n",
         "* [*Optimizer*](https://www.tensorflow.org/api_docs/python/tf/keras/optimizers) —This is how the model is updated based on the data it sees and its loss function.\n",
+        "* [*Loss function*](https://www.tensorflow.org/api_docs/python/tf/keras/losses) —This measures how accurate the model is during training. You want to minimize this function to \"steer\" the model in the right direction.\n",
         "* [*Metrics*](https://www.tensorflow.org/api_docs/python/tf/keras/metrics) —Used to monitor the training and testing steps. The following example uses *accuracy*, the fraction of the images that are correctly classified."
       ]
     },
@@ -585,7 +585,7 @@
         "### Make predictions\n",
         "\n",
         "With the model trained, you can use it to make predictions about some images.\n",
-        "The model's linear outputs, [logits](https://developers.google.com/machine-learning/glossary#logits). Attach a softmax layer to convert the logits to probabilities, which are easier to interpret. "
+        "Attach a softmax layer to convert the model's linear outputs—[logits](https://developers.google.com/machine-learning/glossary#logits)—to probabilities, which should be easier to interpret."
       ]
     },
     {
@@ -677,7 +677,7 @@
         "id": "ygh2yYC972ne"
       },
       "source": [
-        "Graph this to look at the full set of 10 class predictions."
+        "Define functions to graph the full set of 10 class predictions."
       ]
     },
     {
@@ -887,7 +887,6 @@
         "plot_value_array(1, predictions_single[0], test_labels)\n",
         "_ = plt.xticks(range(10), class_names, rotation=45)\n",
         "plt.show()"
-
       ]
     },
     {
@@ -916,7 +915,9 @@
         "id": "YFc2HbEVCaXd"
       },
       "source": [
-        "And the model predicts a label as expected."
+        "And the model predicts a label as expected.\n",
+        "\n",
+        "To learn more about building models with Keras, see the [Keras guides](https://www.tensorflow.org/guide/keras)."
       ]
     }
   ],
diff --git a/site/en/tutorials/keras/keras_tuner.ipynb b/site/en/tutorials/keras/keras_tuner.ipynb
index c7a6bc1dbe7..7e20fefb869 100644
--- a/site/en/tutorials/keras/keras_tuner.ipynb
+++ b/site/en/tutorials/keras/keras_tuner.ipynb
@@ -190,7 +190,7 @@
         "* By using a model builder function\n",
         "* By subclassing the `HyperModel` class of the Keras Tuner API\n",
         "\n",
-        "You can also use two pre-defined `HyperModel` classes - [HyperXception](https://keras-team.github.io/keras-tuner/documentation/hypermodels/#hyperxception-class) and [HyperResNet](https://keras-team.github.io/keras-tuner/documentation/hypermodels/#hyperresnet-class) for computer vision applications.\n",
+        "You can also use two pre-defined [HyperModel](https://keras.io/api/keras_tuner/hypermodels/) classes - [HyperXception](https://keras.io/api/keras_tuner/hypermodels/hyper_xception/) and [HyperResNet](https://keras.io/api/keras_tuner/hypermodels/hyper_resnet/) for computer vision applications.\n",
         "\n",
         "In this tutorial, you use a model builder function to define the image classification model. The model builder function returns a compiled model and uses hyperparameters you define inline to hypertune the model."
       ]
diff --git a/site/en/tutorials/keras/overfit_and_underfit.ipynb b/site/en/tutorials/keras/overfit_and_underfit.ipynb
index c4e8cc9e025..29fbfc49c80 100644
--- a/site/en/tutorials/keras/overfit_and_underfit.ipynb
+++ b/site/en/tutorials/keras/overfit_and_underfit.ipynb
@@ -102,19 +102,19 @@
       "source": [
         "As always, the code in this example will use the `tf.keras` API, which you can learn more about in the TensorFlow [Keras guide](https://www.tensorflow.org/guide/keras).\n",
         "\n",
-        "In both of the previous examples—[classifying text](https://www.tensorflow.org/tutorials/keras/text_classification_with_hub) and [predicting fuel efficiency](https://www.tensorflow.org/tutorials/keras/regression) — we saw that the accuracy of our model on the validation data would peak after training for a number of epochs, and would then stagnate or start decreasing.\n",
+        "In both of the previous examples—[classifying text](text_classification_with_hub.ipynb) and [predicting fuel efficiency](regression.ipynb)—the accuracy of models on the validation data would peak after training for a number of epochs and then stagnate or start decreasing.\n",
         "\n",
-        "In other words, our model would *overfit* to the training data. Learning how to deal with overfitting is important. Although it's often possible to achieve high accuracy on the *training set*, what we really want is to develop models that generalize well to a *testing set* (or data they haven't seen before).\n",
+        "In other words, your model would *overfit* to the training data. Learning how to deal with overfitting is important. Although it's often possible to achieve high accuracy on the *training set*, what you really want is to develop models that generalize well to a *testing set* (or data they haven't seen before).\n",
         "\n",
         "The opposite of overfitting is *underfitting*. Underfitting occurs when there is still room for improvement on the train data. This can happen for a number of reasons: If the model is not powerful enough, is over-regularized, or has simply not been trained long enough. This means the network has not learned the relevant patterns in the training data.\n",
         "\n",
-        "If you train for too long though, the model will start to overfit and learn patterns from the training data that don't generalize to the test data. We need to strike a balance. Understanding how to train for an appropriate number of epochs as we'll explore below is a useful skill.\n",
+        "If you train for too long though, the model will start to overfit and learn patterns from the training data that don't generalize to the test data. You need to strike a balance. Understanding how to train for an appropriate number of epochs as you'll explore below is a useful skill.\n",
         "\n",
         "To prevent overfitting, the best solution is to use more complete training data. The dataset should cover the full range of inputs that the model is expected to handle. Additional data may only be useful if it covers new and interesting cases.\n",
         "\n",
         "A model trained on more complete data will naturally generalize better. When that is no longer possible, the next best solution is to use techniques like regularization. These place constraints on the quantity and type of information your model can store.  If a network can only afford to memorize a small number of patterns, the optimization process will force it to focus on the most prominent patterns, which have a better chance of generalizing well.\n",
         "\n",
-        "In this notebook, we'll explore several common regularization techniques, and use them to improve on a classification model."
+        "In this notebook, you'll explore several common regularization techniques, and use them to improve on a classification model."
       ]
     },
     {
@@ -202,9 +202,9 @@
         "id": "1cweoTiruj8O"
       },
       "source": [
-        "## The Higgs Dataset\n",
+        "## The Higgs dataset\n",
         "\n",
-        "The goal of this tutorial is not to do particle physics, so don't dwell on the details of the dataset. It contains 11&#x202F;000&#x202F;000 examples, each with 28 features, and a binary class label."
+        "The goal of this tutorial is not to do particle physics, so don't dwell on the details of the dataset. It contains 11,000,000 examples, each with 28 features, and a binary class label."
       ]
     },
     {
@@ -280,7 +280,7 @@
       "source": [
         "TensorFlow is most efficient when operating on large batches of data.\n",
         "\n",
-        "So instead of repacking each row individually make a new `Dataset` that takes batches of 10000-examples, applies the `pack_row` function to each batch, and then splits the batches back up into individual records:"
+        "So, instead of repacking each row individually make a new `tf.data.Dataset` that takes batches of 10,000 examples, applies the `pack_row` function to each batch, and then splits the batches back up into individual records:"
       ]
     },
     {
@@ -300,7 +300,7 @@
         "id": "lUbxc5bxNSXV"
       },
       "source": [
-        "Have a look at some of the records from this new `packed_ds`.\n",
+        "Inspect some of the records from this new `packed_ds`.\n",
         "\n",
         "The features are not perfectly normalized, but this is sufficient for this tutorial."
       ]
@@ -324,7 +324,7 @@
         "id": "ICKZRY7gN-QM"
       },
       "source": [
-        "To keep this tutorial relatively short use just the first 1000 samples for validation, and the next 10 000 for training:"
+        "To keep this tutorial relatively short, use just the first 1,000 samples for validation, and the next 10,000 for training:"
       ]
     },
     {
@@ -382,7 +382,7 @@
         "id": "6PMliHoVO3OL"
       },
       "source": [
-        "These datasets return individual examples. Use the `.batch` method to create batches of an appropriate size for training. Before batching also remember to `.shuffle` and `.repeat` the training set."
+        "These datasets return individual examples. Use the `Dataset.batch` method to create batches of an appropriate size for training. Before batching, also remember to use `Dataset.shuffle` and `Dataset.repeat` on the training set."
       ]
     },
     {
@@ -417,7 +417,7 @@
         "\n",
         "To find an appropriate model size, it's best to start with relatively few layers and parameters, then begin increasing the size of the layers or adding new layers until you see diminishing returns on the validation loss.\n",
         "\n",
-        "Start with a simple model using only `layers.Dense` as a baseline, then create larger versions, and compare them."
+        "Start with a simple model using only densely-connected layers (`tf.keras.layers.Dense`) as a baseline, then create larger models, and compare them."
       ]
     },
     {
@@ -435,7 +435,7 @@
         "id": "pNzkSkkXSP5l"
       },
       "source": [
-        "Many models train better if you gradually reduce the learning rate during training. Use `optimizers.schedules` to reduce the learning rate over time:"
+        "Many models train better if you gradually reduce the learning rate during training. Use `tf.keras.optimizers.schedules` to reduce the learning rate over time:"
       ]
     },
     {
@@ -462,7 +462,7 @@
         "id": "kANLx6OYTQ8B"
       },
       "source": [
-        "The code above sets a `schedules.InverseTimeDecay` to hyperbolically decrease the learning rate to 1/2 of the base rate at 1000 epochs, 1/3 at 2000 epochs and so on."
+        "The code above sets a `tf.keras.optimizers.schedules.InverseTimeDecay` to hyperbolically decrease the learning rate to 1/2 of the base rate at 1,000 epochs, 1/3 at 2,000 epochs, and so on."
       ]
     },
     {
@@ -492,7 +492,7 @@
         "\n",
         "The training for this tutorial runs for many short epochs. To reduce the logging noise use the `tfdocs.EpochDots` which simply prints a `.` for each epoch, and a full set of metrics every 100 epochs.\n",
         "\n",
-        "Next include `callbacks.EarlyStopping` to avoid long and unnecessary training times. Note that this callback is set to monitor the `val_binary_crossentropy`, not the `val_loss`. This difference will be important later.\n",
+        "Next include `tf.keras.callbacks.EarlyStopping` to avoid long and unnecessary training times. Note that this callback is set to monitor the `val_binary_crossentropy`, not the `val_loss`. This difference will be important later.\n",
         "\n",
         "Use `callbacks.TensorBoard` to generate TensorBoard logs for the training.\n"
       ]
@@ -536,17 +536,17 @@
         "  model.compile(optimizer=optimizer,\n",
         "                loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),\n",
         "                metrics=[\n",
-        "                  tf.keras.losses.BinaryCrossentropy(\n",
+        "                  tf.keras.metrics.BinaryCrossentropy(\n",
         "                      from_logits=True, name='binary_crossentropy'),\n",
         "                  'accuracy'])\n",
         "\n",
         "  model.summary()\n",
         "\n",
         "  history = model.fit(\n",
-        "    train_ds,\n",
+        "    train_ds.map(lambda x, y: (x, tf.expand_dims(y, axis=-1))),\n",
         "    steps_per_epoch = STEPS_PER_EPOCH,\n",
         "    epochs=max_epochs,\n",
-        "    validation_data=validate_ds,\n",
+        "    validation_data=validate_ds.map(lambda x, y: (x, tf.expand_dims(y, axis=-1))),\n",
         "    callbacks=get_callbacks(name),\n",
         "    verbose=0)\n",
         "  return history"
@@ -643,7 +643,7 @@
         "id": "YjMb6E72f2pN"
       },
       "source": [
-        "To see if you can beat the performance of the small model, progressively train some larger models.\n",
+        "To check if you can beat the performance of the small model, progressively train some larger models.\n",
         "\n",
         "Try two hidden layers with 16 units each:"
       ]
@@ -690,7 +690,7 @@
         "id": "SrfoVQheYSO5"
       },
       "source": [
-        "Now try 3 hidden layers with 64 units each:"
+        "Now try three hidden layers with 64 units each:"
       ]
     },
     {
@@ -737,7 +737,7 @@
       "source": [
         "### Large model\n",
         "\n",
-        "As an exercise, you can create an even larger model, and see how quickly it begins overfitting.  Next, let's add to this benchmark a network that has much more capacity, far more than the problem would warrant:"
+        "As an exercise, you can create an even larger model and check how quickly it begins overfitting. Next, add to this benchmark a network that has much more capacity, far more than the problem would warrant:"
       ]
     },
     {
@@ -803,7 +803,7 @@
       "source": [
         "While building a larger model gives it more power, if this power is not constrained somehow it can easily overfit to the training set.\n",
         "\n",
-        "In this example, typically, only the `\"Tiny\"` model manages to avoid overfitting altogether, and each of the larger models overfit the data more quickly. This becomes so severe for the `\"large\"` model that you need to switch the plot to a log-scale to really see what's happening.\n",
+        "In this example, typically, only the `\"Tiny\"` model manages to avoid overfitting altogether, and each of the larger models overfit the data more quickly. This becomes so severe for the `\"large\"` model that you need to switch the plot to a log-scale to really figure out what's happening.\n",
         "\n",
         "This is apparent if you plot and compare the validation metrics to the training metrics.\n",
         "\n",
@@ -847,7 +847,8 @@
         "\n",
         "These models all wrote TensorBoard logs during training.\n",
         "\n",
-        "Open an embedded  TensorBoard viewer inside a notebook:"
+        "Open an embedded  TensorBoard viewer inside a notebook (Sorry, this doesn't\n",
+        "display on tensorflow.org):"
       ]
     },
     {
@@ -858,8 +859,6 @@
       },
       "outputs": [],
       "source": [
-        "#docs_infra: no_execute\n",
-        "\n",
         "# Load the TensorBoard notebook extension\n",
         "%load_ext tensorboard\n",
         "\n",
@@ -870,44 +869,10 @@
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "fjqx3bywDPjf"
-      },
-      "source": [
-        "You can view the [results of a previous run](https://tensorboard.dev/experiment/vW7jmmF9TmKmy3rbheMQpw/#scalars&_smoothingWeight=0.97) of this notebook on [TensorBoard.dev](https://tensorboard.dev/).\n",
-        "\n",
-        "TensorBoard.dev is a managed experience for hosting, tracking, and sharing ML experiments with everyone.\n",
-        "\n",
-        "It's also included in an `<iframe>` for convenience:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "dX5fcgrADwym"
-      },
-      "outputs": [],
-      "source": [
-        "display.IFrame(\n",
-        "    src=\"https://tensorboard.dev/experiment/vW7jmmF9TmKmy3rbheMQpw/#scalars&_smoothingWeight=0.97\",\n",
-        "    width=\"100%\", height=\"800px\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "RDQDBKYZBXF_"
+        "id": "HkIIzE5rBBY_"
       },
       "source": [
-        "If you want to share TensorBoard results you can upload the logs to [TensorBoard.dev](https://tensorboard.dev/) by copying the following into a code-cell.\n",
-        "\n",
-        "Note: This step requires a Google account.\n",
-        "\n",
-        "```\n",
-        "!tensorboard dev upload --logdir  {logdir}/sizes\n",
-        "```\n",
-        "\n",
-        "Caution: This command does not terminate. It's designed to continuously upload the results of long-running experiments. Once your data is uploaded you need to stop it using the \"interrupt execution\" option in your notebook tool."
+        "You can view the [results of a previous run](https://tensorboard.dev/experiment/vW7jmmF9TmKmy3rbheMQpw/#scalars&_smoothingWeight=0.97) of this notebook on [TensorBoard.dev](https://tensorboard.dev/)."
       ]
     },
     {
@@ -969,15 +934,15 @@
       "source": [
         "You may be familiar with Occam's Razor principle: given two explanations for something, the explanation most likely to be correct is the \"simplest\" one, the one that makes the least amount of assumptions. This also applies to the models learned by neural networks: given some training data and a network architecture, there are multiple sets of weights values (multiple models) that could explain the data, and simpler models are less likely to overfit than complex ones.\n",
         "\n",
-        "A \"simple model\" in this context is a model where the distribution of parameter values has less entropy (or a model with fewer parameters altogether, as we saw in the section above). Thus a common way to mitigate overfitting is to put constraints on the complexity of a network by forcing its weights only to take small values, which makes the distribution of weight values more \"regular\". This is called \"weight regularization\", and it is done by adding to the loss function of the network a cost associated with having large weights. This cost comes in two flavors:\n",
+        "A \"simple model\" in this context is a model where the distribution of parameter values has less entropy (or a model with fewer parameters altogether, as demonstrated in the section above). Thus a common way to mitigate overfitting is to put constraints on the complexity of a network by forcing its weights only to take small values, which makes the distribution of weight values more \"regular\". This is called \"weight regularization\", and it is done by adding to the loss function of the network a cost associated with having large weights. This cost comes in two flavors:\n",
         "\n",
         "* [L1 regularization](https://developers.google.com/machine-learning/glossary/#L1_regularization), where the cost added is proportional to the absolute value of the weights coefficients (i.e. to what is called the \"L1 norm\" of the weights).\n",
         "\n",
         "* [L2 regularization](https://developers.google.com/machine-learning/glossary/#L2_regularization), where the cost added is proportional to the square of the value of the weights coefficients (i.e. to what is called the squared \"L2 norm\" of the weights). L2 regularization is also called weight decay in the context of neural networks. Don't let the different name confuse you: weight decay is mathematically the exact same as L2 regularization.\n",
         "\n",
-        "L1 regularization pushes weights towards exactly zero encouraging a sparse model. L2 regularization will penalize the weights parameters without making them sparse since the penalty goes to zero for small weights-one reason why L2 is more common.\n",
+        "L1 regularization pushes weights towards exactly zero, encouraging a sparse model. L2 regularization will penalize the weights parameters without making them sparse since the penalty goes to zero for small weights—one reason why L2 is more common.\n",
         "\n",
-        "In `tf.keras`, weight regularization is added by passing weight regularizer instances to layers as keyword arguments. Let's add L2 weight regularization now."
+        "In `tf.keras`, weight regularization is added by passing weight regularizer instances to layers as keyword arguments. Add L2 weight regularization:"
       ]
     },
     {
@@ -1012,7 +977,7 @@
       "source": [
         "`l2(0.001)` means that every coefficient in the weight matrix of the layer will add `0.001 * weight_coefficient_value**2` to the total **loss** of the network.\n",
         "\n",
-        "That is why we're monitoring the `binary_crossentropy` directly. Because it doesn't have this regularization component mixed in.\n",
+        "That is why you need to monitor the `binary_crossentropy` directly. Because it doesn't have this regularization component mixed in.\n",
         "\n",
         "So, that same `\"Large\"` model with an `L2` regularization penalty performs much better:\n"
       ]
@@ -1035,7 +1000,7 @@
         "id": "Kx1YHMsVxWjP"
       },
       "source": [
-        "As you can see, the `\"L2\"` regularized model is now much more competitive with the the `\"Tiny\"` model. This `\"L2\"` model is also much more resistant to overfitting than the `\"Large\"` model it was based on despite having the same number of parameters."
+        "As demonstrated in the diagram above, the `\"L2\"` regularized model is now much more competitive with the `\"Tiny\"` model. This `\"L2\"` model is also much more resistant to overfitting than the `\"Large\"` model it was based on despite having the same number of parameters."
       ]
     },
     {
@@ -1046,9 +1011,9 @@
       "source": [
         "#### More info\n",
         "\n",
-        "There are two important things to note about this sort of regularization.\n",
+        "There are two important things to note about this sort of regularization:\n",
         "\n",
-        "**First:** if you are writing your own training loop, then you need to be sure to ask the model for its regularization losses."
+        "1. If you are writing your own training loop, then you need to be sure to ask the model for its regularization losses."
       ]
     },
     {
@@ -1069,9 +1034,9 @@
         "id": "MLhG6fMSjE-J"
       },
       "source": [
-        "**Second:** This implementation works by adding the weight penalties to the model's loss, and then applying a standard optimization procedure after that.\n",
+        "2. This implementation works by adding the weight penalties to the model's loss, and then applying a standard optimization procedure after that.\n",
         "\n",
-        "There is a second approach that instead only runs the optimizer on the raw loss, and then while applying the calculated step the optimizer also applies some weight decay. This \"Decoupled Weight Decay\" is seen in optimizers like `optimizers.FTRL` and `optimizers.AdamW`."
+        "There is a second approach that instead only runs the optimizer on the raw loss, and then while applying the calculated step the optimizer also applies some weight decay. This \"decoupled weight decay\" is used in optimizers like `tf.keras.optimizers.Ftrl` and `tfa.optimizers.AdamW`."
       ]
     },
     {
@@ -1086,14 +1051,13 @@
         "\n",
         "The intuitive explanation for dropout is that because individual nodes in the network cannot rely on the output of the others, each node must output features that are useful on their own.\n",
         "\n",
-        "Dropout, applied to a layer, consists of randomly \"dropping out\" (i.e. set to zero) a number of output features of the layer during training. Let's say a given layer would normally have returned a vector [0.2, 0.5, 1.3, 0.8, 1.1] for a given input sample during training; after applying dropout, this vector will have a few zero entries distributed at random, e.g. [0, 0.5,\n",
-        "1.3, 0, 1.1].\n",
+        "Dropout, applied to a layer, consists of randomly \"dropping out\" (i.e. set to zero) a number of output features of the layer during training. For example, a given layer would normally have returned a vector `[0.2, 0.5, 1.3, 0.8, 1.1]` for a given input sample during training; after applying dropout, this vector will have a few zero entries distributed at random, e.g. `[0, 0.5, 1.3, 0, 1.1]`.\n",
         "\n",
         "The \"dropout rate\" is the fraction of the features that are being zeroed-out; it is usually set between 0.2 and 0.5. At test time, no units are dropped out, and instead the layer's output values are scaled down by a factor equal to the dropout rate, so as to balance for the fact that more units are active than at training time.\n",
         "\n",
-        "In `tf.keras` you can introduce dropout in a network via the Dropout layer, which gets applied to the output of layer right before.\n",
+        "In Keras, you can introduce dropout in a network via the `tf.keras.layers.Dropout` layer, which gets applied to the output of layer right before.\n",
         "\n",
-        "Let's add two Dropout layers in our network to see how well they do at reducing overfitting:"
+        "Add two dropout layers to your network to check how well they do at reducing overfitting:"
       ]
     },
     {
@@ -1209,49 +1173,27 @@
         "\n",
         "These models also recorded TensorBoard logs.\n",
         "\n",
-        "To open an embedded  tensorboard viewer inside a notebook, copy the following into a code-cell:\n",
-        "\n",
-        "```\n",
-        "%tensorboard --logdir {logdir}/regularizers\n",
-        "```"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "KX3Voac-FEO4"
-      },
-      "source": [
-        "You can view the [results of a previous run](https://tensorboard.dev/experiment/fGInKDo8TXes1z7HQku9mw/#scalars&_smoothingWeight=0.97) of this notebook on [TensorDoard.dev](https://tensorboard.dev/).\n",
-        "\n",
-        "It's also included in an `<iframe>` for convenience:"
+        "To open an embedded run the following into a code-cell  (Sorry, this doesn't display on tensorflow.org):"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "doMtyYoqFEO5"
+        "id": "Op4vLqVWBK_y"
       },
       "outputs": [],
       "source": [
-        "display.IFrame(\n",
-        "    src=\"https://tensorboard.dev/experiment/fGInKDo8TXes1z7HQku9mw/#scalars&_smoothingWeight=0.97\",\n",
-        "    width = \"100%\",\n",
-        "    height=\"800px\")\n"
+        "%tensorboard --logdir {logdir}/regularizers"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "mds5RXGjIcSu"
+        "id": "_rx5b294BXBd"
       },
       "source": [
-        "This was uploaded with:\n",
-        "\n",
-        "```\n",
-        "!tensorboard dev upload --logdir  {logdir}/regularizers\n",
-        "```"
+        "You can view the [results of a previous run](https://tensorboard.dev/experiment/vW7jmmF9TmKmy3rbheMQpw/#scalars&_smoothingWeight=0.97) of this notebook on [TensorBoard.dev](https://tensorboard.dev/)."
       ]
     },
     {
@@ -1269,7 +1211,7 @@
         "id": "gjfnkEeQyAFG"
       },
       "source": [
-        "To recap: here are the most common ways to prevent overfitting in neural networks:\n",
+        "To recap, here are the most common ways to prevent overfitting in neural networks:\n",
         "\n",
         "* Get more training data.\n",
         "* Reduce the capacity of the network.\n",
@@ -1278,17 +1220,15 @@
         "\n",
         "Two important approaches not covered in this guide are:\n",
         "\n",
-        "* data-augmentation\n",
-        "* batch normalization\n",
+        "* [Data augmentation](../images/data_augmentation.ipynb)\n",
+        "* Batch normalization (`tf.keras.layers.BatchNormalization`)\n",
         "\n",
         "Remember that each method can help on its own, but often combining them can be even more effective."
       ]
     }
   ],
   "metadata": {
-    "accelerator": "GPU",
     "colab": {
-      "collapsed_sections": [],
       "name": "overfit_and_underfit.ipynb",
       "toc_visible": true
     },
diff --git a/site/en/tutorials/keras/regression.ipynb b/site/en/tutorials/keras/regression.ipynb
index aada27432cb..3010eb9afe0 100644
--- a/site/en/tutorials/keras/regression.ipynb
+++ b/site/en/tutorials/keras/regression.ipynb
@@ -102,9 +102,9 @@
       "source": [
         "In a *regression* problem, the aim is to predict the output of a continuous value, like a price or a probability. Contrast this with a *classification* problem, where the aim is to select a class from a list of classes (for example, where a picture contains an apple or an orange, recognizing which fruit is in the picture).\n",
         "\n",
-        "This notebook uses the classic [Auto MPG](https://archive.ics.uci.edu/ml/datasets/auto+mpg) Dataset and builds a model to predict the fuel efficiency of late-1970s and early 1980s automobiles. To do this, provide the model with a description of many automobiles from that time period. This description includes attributes like: cylinders, displacement, horsepower, and weight.\n",
+        "This tutorial uses the classic [Auto MPG](https://archive.ics.uci.edu/ml/datasets/auto+mpg) dataset and demonstrates how to build models to predict the fuel efficiency of the late-1970s and early 1980s automobiles. To do this, you will provide the models with a description of many automobiles from that time period. This description includes attributes like cylinders, displacement, horsepower, and weight.\n",
         "\n",
-        "This example uses the `tf.keras` API, see [this guide](https://www.tensorflow.org/guide/keras) for details."
+        "This example uses the Keras API. (Visit the Keras [tutorials](https://www.tensorflow.org/tutorials/keras) and [guides](https://www.tensorflow.org/guide/keras) to learn more.)"
       ]
     },
     {
@@ -115,7 +115,7 @@
       },
       "outputs": [],
       "source": [
-        "# Use seaborn for pairplot\n",
+        "# Use seaborn for pairplot.\n",
         "!pip install -q seaborn"
       ]
     },
@@ -132,8 +132,7 @@
         "import pandas as pd\n",
         "import seaborn as sns\n",
         "\n",
-        "\n",
-        "# Make numpy printouts easier to read.\n",
+        "# Make NumPy printouts easier to read.\n",
         "np.set_printoptions(precision=3, suppress=True)"
       ]
     },
@@ -149,7 +148,6 @@
         "\n",
         "from tensorflow import keras\n",
         "from tensorflow.keras import layers\n",
-        "from tensorflow.keras.layers.experimental import preprocessing\n",
         "\n",
         "print(tf.__version__)"
       ]
@@ -212,7 +210,7 @@
       "source": [
         "### Clean the data\n",
         "\n",
-        "The dataset contains a few unknown values."
+        "The dataset contains a few unknown values:"
       ]
     },
     {
@@ -232,7 +230,7 @@
         "id": "9UPN0KBHa_WI"
       },
       "source": [
-        "Drop those rows to keep this initial tutorial simple."
+        "Drop those rows to keep this initial tutorial simple:"
       ]
     },
     {
@@ -252,9 +250,11 @@
         "id": "8XKitwaH4v8h"
       },
       "source": [
-        "The `\"Origin\"` column is really categorical, not numeric. So convert that to a one-hot with [pd.get_dummies](https://pandas.pydata.org/docs/reference/api/pandas.get_dummies.html):\n",
+        "The `\"Origin\"` column is categorical, not numeric. So the next step is to one-hot encode the values in the column with [pd.get_dummies](https://pandas.pydata.org/docs/reference/api/pandas.get_dummies.html).\n",
+        "\n",
+        "Neglecting to specify a data type by way of a `dtype` argument will leave you with boolean values, causing errors during normalization when instantiating the Tensor object if the feature values are not cast to a uniform type when passing the array into `tf.keras.layers.Normalization.adapt()`.  [Tensor objects](https://www.tensorflow.org/guide/tensor) must house uniform data types.\n",
         "\n",
-        "Note: You can set up the `keras.Model` to do this kind of transformation for you. That's beyond the scope of this tutorial. See the [preprocessing layers](../structured_data/preprocessing_layers.ipynb) or [Loading CSV data](../load_data/csv.ipynb) tutorials for examples."
+        "Note: You can set up the `tf.keras.Model` to do this kind of transformation for you but that's beyond the scope of this tutorial. Check out the [Classify structured data using Keras preprocessing layers](../structured_data/preprocessing_layers.ipynb) or [Load CSV data](../load_data/csv.ipynb) tutorials for examples."
       ]
     },
     {
@@ -276,7 +276,7 @@
       },
       "outputs": [],
       "source": [
-        "dataset = pd.get_dummies(dataset, columns=['Origin'], prefix='', prefix_sep='')\n",
+        "dataset = pd.get_dummies(dataset, columns=['Origin'], prefix='', prefix_sep='', dtype=float)\n",
         "dataset.tail()"
       ]
     },
@@ -286,11 +286,9 @@
         "id": "Cuym4yvk76vU"
       },
       "source": [
-        "### Split the data into train and test\n",
-        "\n",
-        "Now split the dataset into a training set and a test set.\n",
+        "### Split the data into training and test sets\n",
         "\n",
-        "Use the test set in the final evaluation of your models."
+        "Now, split the dataset into a training set and a test set. You will use the test set in the final evaluation of your models."
       ]
     },
     {
@@ -313,9 +311,9 @@
       "source": [
         "### Inspect the data\n",
         "\n",
-        "Have a quick look at the joint distribution of a few pairs of columns from the training set.\n",
+        "Review the joint distribution of a few pairs of columns from the training set.\n",
         "\n",
-        "Looking at the top row it should be clear that the fuel efficiency (MPG) is a function of all the other parameters. Looking at the other rows it should be clear that they are functions of each other."
+        "The top row suggests that the fuel efficiency (MPG) is a function of all the other parameters. The other rows indicate they are functions of each other."
       ]
     },
     {
@@ -335,7 +333,7 @@
         "id": "gavKO_6DWRMP"
       },
       "source": [
-        "Also look at the overall statistics, note how each feature covers a very different range:"
+        "Let's also check the overall statistics. Note how each feature covers a very different range:"
       ]
     },
     {
@@ -357,7 +355,7 @@
       "source": [
         "### Split features from labels\n",
         "\n",
-        "Separate the target value, the \"label\", from the features. This label is the value that you will train the model to predict."
+        "Separate the target value—the \"label\"—from the features. This label is the value that you will train the model to predict."
       ]
     },
     {
@@ -383,7 +381,7 @@
       "source": [
         "## Normalization\n",
         "\n",
-        "In the table of statistics it's easy to see how different the ranges of each feature are."
+        "In the table of statistics it's easy to see how different the ranges of each feature are:"
       ]
     },
     {
@@ -403,13 +401,13 @@
         "id": "-ywmerQ6dSox"
       },
       "source": [
-        "It is good practice to normalize features that use different scales and ranges. \n",
+        "It is good practice to normalize features that use different scales and ranges.\n",
         "\n",
-        "One reason this is important is because the features are multiplied by the model weights. So the scale of the outputs and the scale of the gradients are affected by the scale of the inputs. \n",
+        "One reason this is important is because the features are multiplied by the model weights. So, the scale of the outputs and the scale of the gradients are affected by the scale of the inputs.\n",
         "\n",
         "Although a model *might* converge without feature normalization, normalization makes training much more stable.\n",
         "\n",
-        "Note: There is no advantage to normalizing the one-hot features, it is done here for simplicity. For more details on how to use the preprocessing layers, refer the [Working with preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers) guide and the [Classify structured data using Keras preprocessing layers](https://www.tensorflow.org/tutorials/structured_data/preprocessing_layers) tutorial."
+        "Note: There is no advantage to normalizing the one-hot features—it is done here for simplicity. For more details on how to use the preprocessing layers, refer to the [Working with preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers) guide and the [Classify structured data using Keras preprocessing layers](../structured_data/preprocessing_layers.ipynb) tutorial."
       ]
     },
     {
@@ -419,7 +417,8 @@
       },
       "source": [
         "### The Normalization layer\n",
-        "The `preprocessing.Normalization` layer is a clean and simple way to build that preprocessing into your model.\n",
+        "\n",
+        "The `tf.keras.layers.Normalization` is a clean and simple way to add feature normalization into your model.\n",
         "\n",
         "The first step is to create the layer:"
       ]
@@ -432,7 +431,7 @@
       },
       "outputs": [],
       "source": [
-        "normalizer = preprocessing.Normalization(axis=-1)"
+        "normalizer = tf.keras.layers.Normalization(axis=-1)"
       ]
     },
     {
@@ -441,7 +440,7 @@
         "id": "XYA2Ap6nVOha"
       },
       "source": [
-        "Then `.adapt()` it to the data:"
+        "Then, fit the state of the preprocessing layer to the data by calling `Normalization.adapt`:"
       ]
     },
     {
@@ -461,7 +460,7 @@
         "id": "oZccMR5yV9YV"
       },
       "source": [
-        "This calculates the mean and variance, and stores them in the layer. "
+        "Calculate the mean and variance, and store them in the layer:"
       ]
     },
     {
@@ -481,7 +480,7 @@
         "id": "oGWKaF9GSRuN"
       },
       "source": [
-        "When the layer is called it returns the input data, with each feature independently normalized:"
+        "When the layer is called, it returns the input data, with each feature independently normalized:"
       ]
     },
     {
@@ -508,7 +507,7 @@
       "source": [
         "## Linear regression\n",
         "\n",
-        "Before building a DNN model, start with a linear regression."
+        "Before building a deep neural network model, start with linear regression using one and several variables."
       ]
     },
     {
@@ -517,16 +516,16 @@
         "id": "lFby9n0tnHkw"
       },
       "source": [
-        "### One Variable\n",
+        "### Linear regression with one variable\n",
         "\n",
-        "Start with a single-variable linear regression, to predict `MPG` from `Horsepower`.\n",
+        "Begin with a single-variable linear regression to predict `'MPG'` from `'Horsepower'`.\n",
         "\n",
-        "Training a model with `tf.keras` typically starts by defining the model architecture.\n",
+        "Training a model with `tf.keras` typically starts by defining the model architecture. Use a `tf.keras.Sequential` model, which [represents a sequence of steps](https://www.tensorflow.org/guide/keras/sequential_model).\n",
         "\n",
-        "In this case use a `keras.Sequential` model. This model represents a sequence of steps. In this case there are two steps:\n",
+        "There are two steps in your single-variable linear regression model:\n",
         "\n",
-        "* Normalize the input `horsepower`.\n",
-        "* Apply a linear transformation ($y = mx+b$) to produce 1 output using `layers.Dense`.\n",
+        "- Normalize the `'Horsepower'` input features using the `tf.keras.layers.Normalization` preprocessing layer.\n",
+        "- Apply a linear transformation ($y = mx+b$) to produce 1 output using a linear layer (`tf.keras.layers.Dense`).\n",
         "\n",
         "The number of _inputs_ can either be set by the `input_shape` argument, or automatically when the model is run for the first time."
       ]
@@ -537,7 +536,7 @@
         "id": "Xp3gAFn3TPv8"
       },
       "source": [
-        "First create the horsepower `Normalization` layer:"
+        "First, create a NumPy array made of the `'Horsepower'` features. Then, instantiate the `tf.keras.layers.Normalization` and fit its state to the `horsepower` data:"
       ]
     },
     {
@@ -550,7 +549,7 @@
       "source": [
         "horsepower = np.array(train_features['Horsepower'])\n",
         "\n",
-        "horsepower_normalizer = preprocessing.Normalization(input_shape=[1,], axis=None)\n",
+        "horsepower_normalizer = layers.Normalization(input_shape=[1,], axis=None)\n",
         "horsepower_normalizer.adapt(horsepower)"
       ]
     },
@@ -560,7 +559,7 @@
         "id": "4NVlHJY2TWlC"
       },
       "source": [
-        "Build the sequential model:"
+        "Build the Keras Sequential model:"
       ]
     },
     {
@@ -585,9 +584,9 @@
         "id": "eObQu9fDnXGL"
       },
       "source": [
-        "This model will predict `MPG` from `Horsepower`.\n",
+        "This model will predict `'MPG'` from `'Horsepower'`.\n",
         "\n",
-        "Run the untrained model on the first 10 horse-power values. The output won't be good, but you'll see that it has the expected shape, `(10,1)`:"
+        "Run the untrained model on the first 10 'Horsepower' values. The output won't be good, but notice that it has the expected shape of `(10, 1)`:"
       ]
     },
     {
@@ -607,7 +606,7 @@
         "id": "CSkanJlmmFBX"
       },
       "source": [
-        "Once the model is built, configure the training procedure using the `Model.compile()` method. The most important arguments to compile are the `loss` and the `optimizer` since these define what will be optimized (`mean_absolute_error`) and how (using the `optimizers.Adam`)."
+        "Once the model is built, configure the training procedure using the Keras `Model.compile` method. The most important arguments to compile are the `loss` and the `optimizer`, since these define what will be optimized (`mean_absolute_error`) and how (using the `tf.keras.optimizers.Adam`)."
       ]
     },
     {
@@ -619,7 +618,7 @@
       "outputs": [],
       "source": [
         "horsepower_model.compile(\n",
-        "    optimizer=tf.optimizers.Adam(learning_rate=0.1),\n",
+        "    optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),\n",
         "    loss='mean_absolute_error')"
       ]
     },
@@ -629,7 +628,7 @@
         "id": "Z3q1I9TwnRSC"
       },
       "source": [
-        "Once the training is configured, use `Model.fit()` to execute the training:"
+        "Use Keras `Model.fit` to execute the training for 100 epochs:"
       ]
     },
     {
@@ -642,11 +641,12 @@
       "source": [
         "%%time\n",
         "history = horsepower_model.fit(\n",
-        "    train_features['Horsepower'], train_labels,\n",
+        "    train_features['Horsepower'],\n",
+        "    train_labels,\n",
         "    epochs=100,\n",
-        "    # suppress logging\n",
+        "    # Suppress logging.\n",
         "    verbose=0,\n",
-        "    # Calculate validation results on 20% of the training data\n",
+        "    # Calculate validation results on 20% of the training data.\n",
         "    validation_split = 0.2)"
       ]
     },
@@ -656,7 +656,7 @@
         "id": "tQm3pc0FYPQB"
       },
       "source": [
-        "Visualize the model's training progress using the stats stored in the `history` object."
+        "Visualize the model's training progress using the stats stored in the `history` object:"
       ]
     },
     {
@@ -707,7 +707,7 @@
         "id": "CMNrt8X2ebXd"
       },
       "source": [
-        "Collect the results on the test set, for later:"
+        "Collect the results on the test set for later:"
       ]
     },
     {
@@ -731,7 +731,7 @@
         "id": "F0qutYAKwoda"
       },
       "source": [
-        "Since this is a single variable regression it's easy to look at the model's predictions as a function of the input:"
+        "Since this is a single variable regression, it's easy to view the model's predictions as a function of the input:"
       ]
     },
     {
@@ -770,7 +770,7 @@
       },
       "outputs": [],
       "source": [
-        "plot_horsepower(x,y)"
+        "plot_horsepower(x, y)"
       ]
     },
     {
@@ -779,7 +779,7 @@
         "id": "Yk2RmlqPoM9u"
       },
       "source": [
-        "### Multiple inputs"
+        "### Linear regression with multiple inputs"
       ]
     },
     {
@@ -788,9 +788,9 @@
         "id": "PribnwDHUksC"
       },
       "source": [
-        "You can use an almost identical setup to make predictions based on multiple inputs. This model still does the same $y = mx+b$ except that $m$ is a matrix and $b$ is a vector.\n",
+        "You can use an almost identical setup to make predictions based on multiple inputs. This model still does the same $y = mx+b$ except that $m$ is a matrix and $x$ is a vector.\n",
         "\n",
-        "This time use the `Normalization` layer that was adapted to the whole dataset."
+        "Create a two-step Keras Sequential model again with the first layer being `normalizer` (`tf.keras.layers.Normalization(axis=-1)`) you defined earlier and adapted to the whole dataset:"
       ]
     },
     {
@@ -813,7 +813,7 @@
         "id": "IHlx6WeIWyAr"
       },
       "source": [
-        "When you call this model on a batch of inputs, it produces `units=1` outputs for each example."
+        "When you call `Model.predict` on a batch of inputs, it produces `units=1` outputs for each example:"
       ]
     },
     {
@@ -833,7 +833,7 @@
         "id": "hvHKH3rPXHmq"
       },
       "source": [
-        "When you call the model it's weight matrices will be built. Now you can see that the `kernel` (the $m$ in $y=mx+b$) has a shape of `(9,1)`."
+        "When you call the model, its weight matrices will be built—check that the `kernel` weights (the $m$ in $y=mx+b$) have a shape of `(9, 1)`:"
       ]
     },
     {
@@ -853,7 +853,7 @@
         "id": "eINAc6rZXzOt"
       },
       "source": [
-        "Use the same `compile` and `fit` calls as for the single input `horsepower` model:"
+        "Configure the model with Keras `Model.compile` and train with `Model.fit` for 100 epochs:"
       ]
     },
     {
@@ -865,7 +865,7 @@
       "outputs": [],
       "source": [
         "linear_model.compile(\n",
-        "    optimizer=tf.optimizers.Adam(learning_rate=0.1),\n",
+        "    optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),\n",
         "    loss='mean_absolute_error')"
       ]
     },
@@ -879,11 +879,12 @@
       "source": [
         "%%time\n",
         "history = linear_model.fit(\n",
-        "    train_features, train_labels, \n",
+        "    train_features,\n",
+        "    train_labels,\n",
         "    epochs=100,\n",
-        "    # suppress logging\n",
+        "    # Suppress logging.\n",
         "    verbose=0,\n",
-        "    # Calculate validation results on 20% of the training data\n",
+        "    # Calculate validation results on 20% of the training data.\n",
         "    validation_split = 0.2)"
       ]
     },
@@ -893,7 +894,7 @@
         "id": "EdxiCbiNYK2F"
       },
       "source": [
-        "Using all the inputs achieves a much lower training and validation error than the `horsepower` model: "
+        "Using all the inputs in this regression model achieves a much lower training and validation error than the `horsepower_model`, which had one input:"
       ]
     },
     {
@@ -913,7 +914,7 @@
         "id": "NyN49hIWe_NH"
       },
       "source": [
-        "Collect the results on the test set, for later:"
+        "Collect the results on the test set for later:"
       ]
     },
     {
@@ -934,7 +935,7 @@
         "id": "SmjdzxKzEu1-"
       },
       "source": [
-        "## A DNN regression"
+        "## Regression with a deep neural network (DNN)"
       ]
     },
     {
@@ -943,9 +944,11 @@
         "id": "DT_aHPsrzO1t"
       },
       "source": [
-        "The previous section implemented linear models for single and multiple inputs.\n",
+        "In the previous section, you implemented two linear models for single and multiple inputs.\n",
+        "\n",
+        "Here, you will implement single-input and multiple-input DNN models.\n",
         "\n",
-        "This section implements single-input and multiple-input DNN models. The code is basically the same except the model is expanded to include some \"hidden\"  non-linear layers. The name \"hidden\" here just means not directly connected to the inputs or outputs."
+        "The code is basically the same except the model is expanded to include some \"hidden\" non-linear layers. The name \"hidden\" here just means not directly connected to the inputs or outputs."
       ]
     },
     {
@@ -956,11 +959,11 @@
       "source": [
         "These models will contain a few more layers than the linear model:\n",
         "\n",
-        "* The normalization layer.\n",
-        "* Two hidden, nonlinear, `Dense` layers using the `relu` nonlinearity.\n",
-        "* A linear single-output layer.\n",
+        "* The normalization layer, as before (with `horsepower_normalizer` for a single-input model and `normalizer` for a multiple-input model).\n",
+        "* Two hidden, non-linear, `Dense` layers with the ReLU (`relu`) activation function nonlinearity.\n",
+        "* A linear `Dense` single-output layer.\n",
         "\n",
-        "Both will use the same training procedure so the `compile` method is included in the `build_and_compile_model` function below."
+        "Both models will use the same training procedure, so the `compile` method is included in the `build_and_compile_model` function below."
       ]
     },
     {
@@ -987,10 +990,10 @@
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "7T4RP1V36gVn"
+        "id": "6c51caebbc0d"
       },
       "source": [
-        "### One variable"
+        "### Regression using a DNN and a single input"
       ]
     },
     {
@@ -999,7 +1002,7 @@
         "id": "xvu9gtxTZR5V"
       },
       "source": [
-        "Start with a DNN model for a single input: \"Horsepower\""
+        "Create a DNN model with only `'Horsepower'` as input and `horsepower_normalizer` (defined earlier) as the normalization layer:"
       ]
     },
     {
@@ -1019,7 +1022,7 @@
         "id": "Sj49Og4YGULr"
       },
       "source": [
-        "This model has quite a few more trainable parameters than the linear models."
+        "This model has quite a few more trainable parameters than the linear models:"
       ]
     },
     {
@@ -1039,7 +1042,7 @@
         "id": "0-qWCsh6DlyH"
       },
       "source": [
-        "Train the model:"
+        "Train the model with Keras `Model.fit`:"
       ]
     },
     {
@@ -1052,7 +1055,8 @@
       "source": [
         "%%time\n",
         "history = dnn_horsepower_model.fit(\n",
-        "    train_features['Horsepower'], train_labels,\n",
+        "    train_features['Horsepower'],\n",
+        "    train_labels,\n",
         "    validation_split=0.2,\n",
         "    verbose=0, epochs=100)"
       ]
@@ -1063,7 +1067,7 @@
         "id": "dArGGxHxcKjN"
       },
       "source": [
-        "This model does slightly better than the linear-horsepower model."
+        "This model does slightly better than the linear single-input `horsepower_model`:"
       ]
     },
     {
@@ -1083,7 +1087,7 @@
         "id": "TG1snlpR2QCK"
       },
       "source": [
-        "If you plot the predictions as a function of `Horsepower`, you'll see how this model takes advantage of the nonlinearity provided by the hidden layers:"
+        "If you plot the predictions as a function of `'Horsepower'`, you should notice how this model takes advantage of the nonlinearity provided by the hidden layers:"
       ]
     },
     {
@@ -1115,7 +1119,7 @@
         "id": "WxCJKIUpe4io"
       },
       "source": [
-        "Collect the results on the test set, for later:"
+        "Collect the results on the test set for later:"
       ]
     },
     {
@@ -1137,7 +1141,7 @@
         "id": "S_2Btebp2e64"
       },
       "source": [
-        "### Full model"
+        "### Regression using a DNN and multiple inputs"
       ]
     },
     {
@@ -1146,7 +1150,7 @@
         "id": "aKFtezDldLSf"
       },
       "source": [
-        "If you repeat this process using all the inputs it slightly improves the performance on the validation dataset."
+        "Repeat the previous process using all the inputs. The model's performance slightly improves on the validation dataset."
       ]
     },
     {
@@ -1171,7 +1175,8 @@
       "source": [
         "%%time\n",
         "history = dnn_model.fit(\n",
-        "    train_features, train_labels,\n",
+        "    train_features,\n",
+        "    train_labels,\n",
         "    validation_split=0.2,\n",
         "    verbose=0, epochs=100)"
       ]
@@ -1222,7 +1227,7 @@
         "id": "rDf1xebEfWBw"
       },
       "source": [
-        "Now that all the models are trained check the test-set performance and see how they did:"
+        "Since all models have been trained, you can review their test set performance:"
       ]
     },
     {
@@ -1242,7 +1247,7 @@
         "id": "DABIVzsCf-QI"
       },
       "source": [
-        "These results match the validation error seen during training."
+        "These results match the validation error observed during training."
       ]
     },
     {
@@ -1253,7 +1258,7 @@
       "source": [
         "### Make predictions\n",
         "\n",
-        "Finally, predict have a look at the errors made by the model when making predictions on the test set:"
+        "You can now make predictions with the `dnn_model` on the test set using Keras `Model.predict` and review the loss:"
       ]
     },
     {
@@ -1282,9 +1287,9 @@
         "id": "19wyogbOSU5t"
       },
       "source": [
-        "It looks like the model predicts reasonably well. \n",
+        "It appears that the model predicts reasonably well.\n",
         "\n",
-        "Now take a look at the error distribution:"
+        "Now, check the error distribution:"
       ]
     },
     {
@@ -1307,7 +1312,7 @@
         "id": "KSyaHUfDT-mZ"
       },
       "source": [
-        "If you're happy with the model save it for later use:"
+        "If you're happy with the model, save it for later use with `Model.save`:"
       ]
     },
     {
@@ -1318,7 +1323,7 @@
       },
       "outputs": [],
       "source": [
-        "dnn_model.save('dnn_model')"
+        "dnn_model.save('dnn_model.keras')"
       ]
     },
     {
@@ -1338,7 +1343,7 @@
       },
       "outputs": [],
       "source": [
-        "reloaded = tf.keras.models.load_model('dnn_model')\n",
+        "reloaded = tf.keras.models.load_model('dnn_model.keras')\n",
         "\n",
         "test_results['reloaded'] = reloaded.evaluate(\n",
         "    test_features, test_labels, verbose=0)"
@@ -1365,10 +1370,10 @@
         "\n",
         "This notebook introduced a few techniques to handle a regression problem. Here are a few more tips that may help:\n",
         "\n",
-        "* [Mean Squared Error (MSE)](https://www.tensorflow.org/api_docs/python/tf/losses/MeanSquaredError) and [Mean Absolute Error (MAE)](https://www.tensorflow.org/api_docs/python/tf/losses/MeanAbsoluteError) are common loss functions used for regression problems. Mean Absolute Error is less sensitive to outliers. Different loss functions are used for classification problems.\n",
-        "* Similarly, evaluation metrics used for regression differ from classification.\n",
-        "* When numeric input data features have values with different ranges, each feature should be scaled independently to the same range.\n",
-        "* Overfitting is a common problem for DNN models, it wasn't a problem for this tutorial. See the [overfit and underfit](overfit_and_underfit.ipynb) tutorial for more help with this.\n"
+        "- Mean squared error (MSE) (`tf.keras.losses.MeanSquaredError`) and mean absolute error (MAE) (`tf.keras.losses.MeanAbsoluteError`) are common loss functions used for regression problems. MAE is less sensitive to outliers. Different loss functions are used for classification problems.\n",
+        "- Similarly, evaluation metrics used for regression differ from classification.\n",
+        "- When numeric input data features have values with different ranges, each feature should be scaled independently to the same range.\n",
+        "- Overfitting is a common problem for DNN models, though it wasn't a problem for this tutorial. Visit the [Overfit and underfit](overfit_and_underfit.ipynb) tutorial for more help with this."
       ]
     }
   ],
@@ -1376,8 +1381,6 @@
     "colab": {
       "collapsed_sections": [],
       "name": "regression.ipynb",
-      "private_outputs": true,
-      "provenance": [],
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/tutorials/keras/save_and_load.ipynb b/site/en/tutorials/keras/save_and_load.ipynb
index a4420c5df65..140ea1b59ac 100644
--- a/site/en/tutorials/keras/save_and_load.ipynb
+++ b/site/en/tutorials/keras/save_and_load.ipynb
@@ -111,7 +111,7 @@
         "\n",
         "### Options\n",
         "\n",
-        "There are different ways to save TensorFlow models depending on the API you're using. This guide uses [tf.keras](https://www.tensorflow.org/guide/keras), a high-level API to build and train models in TensorFlow. For other approaches see the TensorFlow [Save and Restore](https://www.tensorflow.org/guide/saved_model) guide or [Saving in eager](https://www.tensorflow.org/guide/eager#object-based_saving)."
+        "There are different ways to save TensorFlow models depending on the API you're using. This guide uses [tf.keras](https://www.tensorflow.org/guide/keras)—a high-level API to build and train models in TensorFlow. The new, high-level `.keras` format used in this tutorial is recommended for saving Keras objects, as it provides robust, efficient name-based saving that is often easier to debug than low-level or legacy formats. For more advanced saving or serialization workflows, especially those involving custom objects, please refer to the [Save and load Keras models guide](https://www.tensorflow.org/guide/keras/save_and_serialize). For other approaches, refer to the [Using the SavedModel format guide](../../guide/saved_model.ipynb)."
       ]
     },
     {
@@ -142,7 +142,7 @@
       },
       "outputs": [],
       "source": [
-        "!pip install pyyaml h5py  # Required to save models in HDF5 format"
+        "!pip install pyyaml h5py  # Required to save models in HDF5 format."
       ]
     },
     {
@@ -217,15 +217,15 @@
       "source": [
         "# Define a simple sequential model\n",
         "def create_model():\n",
-        "  model = tf.keras.models.Sequential([\n",
+        "  model = tf.keras.Sequential([\n",
         "    keras.layers.Dense(512, activation='relu', input_shape=(784,)),\n",
         "    keras.layers.Dropout(0.2),\n",
         "    keras.layers.Dense(10)\n",
         "  ])\n",
         "\n",
         "  model.compile(optimizer='adam',\n",
-        "                loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
-        "                metrics=[tf.metrics.SparseCategoricalAccuracy()])\n",
+        "                loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
+        "                metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])\n",
         "\n",
         "  return model\n",
         "\n",
@@ -266,7 +266,7 @@
       },
       "outputs": [],
       "source": [
-        "checkpoint_path = \"training_1/cp.ckpt\"\n",
+        "checkpoint_path = \"training_1/cp.weights.h5\" # Since you're only saving weights, you should use the .weights.h5 extension. If you're saving the whole model, you would use the .keras extension instead\n",
         "checkpoint_dir = os.path.dirname(checkpoint_path)\n",
         "\n",
         "# Create a callback that saves the model's weights\n",
@@ -275,8 +275,8 @@
         "                                                 verbose=1)\n",
         "\n",
         "# Train the model with the new callback\n",
-        "model.fit(train_images, \n",
-        "          train_labels,  \n",
+        "model.fit(train_images,\n",
+        "          train_labels,\n",
         "          epochs=10,\n",
         "          validation_data=(test_images, test_labels),\n",
         "          callbacks=[cp_callback])  # Pass callback to training\n",
@@ -312,7 +312,7 @@
         "id": "wlRN_f56Pqa9"
       },
       "source": [
-        "As long as two models share the same architecture you can share weights between them. So, when restoring a model from weights-only, create a model with the same architecture as the original model and then set its weights. \n",
+        "As long as two models share the same architecture you can share weights between them. So, when restoring a model from weights-only, create a model with the same architecture as the original model and then set its weights.\n",
         "\n",
         "Now rebuild a fresh, untrained model and evaluate it on the test set. An untrained model will perform at chance levels (~10% accuracy):"
       ]
@@ -380,17 +380,23 @@
       "outputs": [],
       "source": [
         "# Include the epoch in the file name (uses `str.format`)\n",
-        "checkpoint_path = \"training_2/cp-{epoch:04d}.ckpt\"\n",
+        "checkpoint_path = \"training_2/cp-{epoch:04d}.weights.h5\"\n",
         "checkpoint_dir = os.path.dirname(checkpoint_path)\n",
+        "os.mkdir(checkpoint_dir)\n",
         "\n",
         "batch_size = 32\n",
         "\n",
+        "# Calculate the number of batches per epoch\n",
+        "import math\n",
+        "n_batches = len(train_images) / batch_size\n",
+        "n_batches = math.ceil(n_batches)    # round up the number of batches to the nearest whole integer\n",
+        "\n",
         "# Create a callback that saves the model's weights every 5 epochs\n",
         "cp_callback = tf.keras.callbacks.ModelCheckpoint(\n",
-        "    filepath=checkpoint_path, \n",
-        "    verbose=1, \n",
+        "    filepath=checkpoint_path,\n",
+        "    verbose=1,\n",
         "    save_weights_only=True,\n",
-        "    save_freq=5*batch_size)\n",
+        "    save_freq=5*n_batches)\n",
         "\n",
         "# Create a new model instance\n",
         "model = create_model()\n",
@@ -399,10 +405,10 @@
         "model.save_weights(checkpoint_path.format(epoch=0))\n",
         "\n",
         "# Train the model with the new callback\n",
-        "model.fit(train_images, \n",
+        "model.fit(train_images,\n",
         "          train_labels,\n",
-        "          epochs=50, \n",
-        "          batch_size=batch_size, \n",
+        "          epochs=50,\n",
+        "          batch_size=batch_size,\n",
         "          callbacks=[cp_callback],\n",
         "          validation_data=(test_images, test_labels),\n",
         "          verbose=0)"
@@ -414,7 +420,7 @@
         "id": "1zFrKTjjavWI"
       },
       "source": [
-        "Now, look at the resulting checkpoints and choose the latest one:"
+        "Now, review the resulting checkpoints and choose the latest one:"
       ]
     },
     {
@@ -436,7 +442,11 @@
       },
       "outputs": [],
       "source": [
-        "latest = tf.train.latest_checkpoint(checkpoint_dir)\n",
+        "def load_latest_checkpoint(checkpoint_dir):\n",
+        "    latest = max(os.listdir(checkpoint_dir), key=lambda f: int(f.split('-')[1].split('.')[0]))\n",
+        "    return os.path.join(checkpoint_dir, latest)\n",
+        "\n",
+        "latest = load_latest_checkpoint(checkpoint_dir)\n",
         "latest"
       ]
     },
@@ -446,9 +456,9 @@
         "id": "Zk2ciGbKg561"
       },
       "source": [
-        "Note: the default TensorFlow format only saves the 5 most recent checkpoints.\n",
+        "Note: The default TensorFlow format only saves the 5 most recent checkpoints.\n",
         "\n",
-        "To test, reset the model and load the latest checkpoint:"
+        "To test, reset the model, and load the latest checkpoint:"
       ]
     },
     {
@@ -485,7 +495,7 @@
         "id": "JtdYhvWnH2ib"
       },
       "source": [
-        "The above code stores the weights to a collection of [checkpoint](https://www.tensorflow.org/guide/saved_model#save_and_restore_variables)-formatted files that contain only the trained weights in a binary format. Checkpoints contain:\n",
+        "The above code stores the weights to a collection of [checkpoint](../../guide/checkpoint.ipynb)-formatted files that contain only the trained weights in a binary format. Checkpoints contain:\n",
         "* One or more shards that contain your model's weights.\n",
         "* An index file that indicates which weights are stored in which shard.\n",
         "\n",
@@ -500,7 +510,7 @@
       "source": [
         "## Manually save weights\n",
         "\n",
-        "Manually saving weights with the `Model.save_weights` method. By default, `tf.keras`—and `save_weights` in particular—uses the TensorFlow [checkpoint](../../guide/checkpoint.ipynb) format with a `.ckpt` extension (saving in [HDF5](https://js.tensorflow.org/tutorials/import-keras.html) with a `.h5` extension is covered in the [Save and serialize models](../../guide/keras/save_and_serialize#weights-only_saving_in_savedmodel_format) guide):"
+        "To save weights manually, use `tf.keras.Model.save_weights`. You have to use .weights.h5 extension to save the weights. You can refer to the [Save and load models](https://www.tensorflow.org/guide/keras/save_and_serialize) guide."
       ]
     },
     {
@@ -512,13 +522,14 @@
       "outputs": [],
       "source": [
         "# Save the weights\n",
-        "model.save_weights('./checkpoints/my_checkpoint')\n",
+        "os.mkdir('./checkpoints')\n",
+        "model.save_weights('./checkpoints/my_checkpoint.weights.h5')\n",
         "\n",
         "# Create a new model instance\n",
         "model = create_model()\n",
         "\n",
         "# Restore the weights\n",
-        "model.load_weights('./checkpoints/my_checkpoint')\n",
+        "model.load_weights('./checkpoints/my_checkpoint.weights.h5')\n",
         "\n",
         "# Evaluate the model\n",
         "loss, acc = model.evaluate(test_images, test_labels, verbose=2)\n",
@@ -533,38 +544,52 @@
       "source": [
         "## Save the entire model\n",
         "\n",
-        "Call [`model.save`](https://www.tensorflow.org/api_docs/python/tf/keras/Model#save) to save a model's architecture, weights, and training configuration in a single file/folder. This allows you to export a model so it can be used without access to the original Python code*. Since the optimizer-state is recovered, you can resume training from exactly where you left off.\n",
+        "Call `tf.keras.Model.save` to save a model's architecture, weights, and training configuration in a single `model.keras` zip archive.\n",
+        "\n",
+        "An entire model can be saved in three different file formats (the new `.keras` format and two legacy formats: `SavedModel`, and `HDF5`). Saving a model as `path/to/model.keras` automatically saves in the latest format.\n",
+        "\n",
+        "**Note:** For Keras objects it's recommended to use the new high-level `.keras` format for richer, name-based saving and reloading, which is easier to debug. The low-level SavedModel format and legacy H5 format continue to be supported for existing code.\n",
+        "\n",
+        "You can switch to the SavedModel format by:\n",
         "\n",
-        "An entire model can be saved in two different file formats (`SavedModel` and `HDF5`). The TensorFlow `SavedModel` format is the default file format in TF2.x. However, models can be saved in `HDF5` format. More details on saving entire models in the two file formats is described below.\n",
+        "- Passing `save_format='tf'` to `save()`\n",
+        "- Passing a filename without an extension\n",
         "\n",
-        "Saving a fully-functional model is very useful—you can load them in TensorFlow.js ([Saved Model](https://www.tensorflow.org/js/tutorials/conversion/import_saved_model), [HDF5](https://www.tensorflow.org/js/tutorials/conversion/import_keras)) and then train and run them in web browsers, or convert them to run on mobile devices using TensorFlow Lite ([Saved Model](https://www.tensorflow.org/lite/convert/python_api#converting_a_savedmodel_), [HDF5](https://www.tensorflow.org/lite/convert/python_api#converting_a_keras_model_))\n",
+        "You can switch to the H5 format by:\n",
+        "- Passing `save_format='h5'` to `save()`\n",
+        "- Passing a filename that ends in `.h5`\n",
         "\n",
-        "\\*Custom objects (e.g. subclassed models or layers) require special attention when saving and loading. See the **Saving custom objects** section below "
+        "Saving a fully-functional model is very useful—you can load them in TensorFlow.js ([Saved Model](https://www.tensorflow.org/js/tutorials/conversion/import_saved_model), [HDF5](https://www.tensorflow.org/js/tutorials/conversion/import_keras)) and then train and run them in web browsers, or convert them to run on mobile devices using TensorFlow Lite ([Saved Model](https://www.tensorflow.org/lite/models/convert/#convert_a_savedmodel_recommended_), [HDF5](https://www.tensorflow.org/lite/models/convert/#convert_a_keras_model_))\n",
+        "\n",
+        "\\*Custom objects (for example, subclassed models or layers) require special attention when saving and loading. Refer to the **Saving custom objects** section below."
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "kPyhgcoVzqUB"
+        "id": "0fRGnlHMrkI7"
       },
       "source": [
-        "### SavedModel format"
+        "### New high-level `.keras` format"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "LtcN4VIb7JkK"
+        "id": "eqO8jj7GsCDn"
       },
       "source": [
-        "The SavedModel format is another way to serialize models. Models saved in this format can be restored using `tf.keras.models.load_model` and are compatible with TensorFlow Serving. The [SavedModel guide](https://www.tensorflow.org/guide/saved_model) goes into detail about how to serve/inspect the SavedModel. The section below illustrates the steps to save and restore the model."
+        "The new Keras v3 saving format, marked by the `.keras` extension, is a more\n",
+        "simple, efficient format that implements name-based saving, ensuring what you load is exactly what you saved, from Python's perspective. This makes debugging much easier, and it is the recommended format for Keras.\n",
+        "\n",
+        "The section below illustrates how to save and restore the model in the `.keras` format."
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "sI1YvCDFzpl3"
+        "id": "3f55mAXwukUX"
       },
       "outputs": [],
       "source": [
@@ -572,80 +597,135 @@
         "model = create_model()\n",
         "model.fit(train_images, train_labels, epochs=5)\n",
         "\n",
-        "# Save the entire model as a SavedModel.\n",
-        "!mkdir -p saved_model\n",
-        "model.save('saved_model/my_model') "
+        "# Save the entire model as a `.keras` zip archive.\n",
+        "model.save('my_model.keras')"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "iUvT_3qE8hV5"
+        "id": "iHqwaun5g8lD"
       },
       "source": [
-        "The SavedModel format is a directory containing a protobuf binary and a TensorFlow checkpoint. Inspect the saved model directory:"
+        "Reload a fresh Keras model from the `.keras` zip archive:"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "sq8fPglI1RWA"
+        "id": "HyfUMOZwux_-"
       },
       "outputs": [],
       "source": [
-        "# my_model directory\n",
-        "!ls saved_model\n",
+        "new_model = tf.keras.models.load_model('my_model.keras')\n",
         "\n",
-        "# Contains an assets folder, saved_model.pb, and variables folder.\n",
-        "!ls saved_model/my_model"
+        "# Show the model architecture\n",
+        "new_model.summary()"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "B7qfpvpY9HCe"
+        "id": "9Cn3pSBqvJ5f"
       },
       "source": [
-        "Reload a fresh Keras model from the saved model:"
+        "Try running evaluate and predict with the loaded model:"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "0YofwHdN0pxa"
+        "id": "8BT4mHNIvMdW"
       },
       "outputs": [],
       "source": [
-        "new_model = tf.keras.models.load_model('saved_model/my_model')\n",
+        "# Evaluate the restored model\n",
+        "loss, acc = new_model.evaluate(test_images, test_labels, verbose=2)\n",
+        "print('Restored model, accuracy: {:5.2f}%'.format(100 * acc))\n",
         "\n",
-        "# Check its architecture\n",
-        "new_model.summary()"
+        "print(new_model.predict(test_images).shape)"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "uWwgNaz19TH2"
+        "id": "kPyhgcoVzqUB"
       },
       "source": [
-        "The restored model is compiled with the same arguments as the original model. Try running evaluate and predict with the loaded model:"
+        "### SavedModel format"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "LtcN4VIb7JkK"
+      },
+      "source": [
+        "The SavedModel format is another way to serialize models. Models saved in this format can directly be used with TFLite/TFServing/etc for inferencing. The [SavedModel guide](../../guide/saved_model.ipynb) goes into detail about how to `serve/inspect` the SavedModel. The section below illustrates the steps to save and restore the model."
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "Yh5Mu0yOgE5J"
+        "id": "sI1YvCDFzpl3"
       },
       "outputs": [],
       "source": [
-        "# Evaluate the restored model\n",
-        "loss, acc = new_model.evaluate(test_images, test_labels, verbose=2)\n",
-        "print('Restored model, accuracy: {:5.2f}%'.format(100 * acc))\n",
+        "# Create and train a new model instance.\n",
+        "model = create_model()\n",
+        "model.fit(train_images, train_labels, epochs=5)\n",
         "\n",
-        "print(new_model.predict(test_images).shape)"
+        "# Save the entire model as a SavedModel.\n",
+        "!mkdir -p saved_model\n",
+        "tf.saved_model.save(model, 'saved_model/my_model')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "iUvT_3qE8hV5"
+      },
+      "source": [
+        "The SavedModel format is a directory containing a protobuf binary and a TensorFlow checkpoint. Inspect the saved model directory:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "sq8fPglI1RWA"
+      },
+      "outputs": [],
+      "source": [
+        "# my_model directory\n",
+        "!ls saved_model\n",
+        "\n",
+        "# Contains an assets folder, saved_model.pb, and variables folder.\n",
+        "!ls saved_model/my_model"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "B7qfpvpY9HCe"
+      },
+      "source": [
+        "Reload the saved SavedModel file:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "0YofwHdN0pxa"
+      },
+      "outputs": [],
+      "source": [
+        "saved_model = tf.saved_model.load('saved_model/my_model')\n",
+        "saved_model"
       ]
     },
     {
@@ -656,7 +736,7 @@
       "source": [
         "### HDF5 format\n",
         "\n",
-        "Keras provides a basic save format using the [HDF5](https://en.wikipedia.org/wiki/Hierarchical_Data_Format) standard. "
+        "Keras provides a basic legacy high-level save format using the [HDF5](https://en.wikipedia.org/wiki/Hierarchical_Data_Format) standard."
       ]
     },
     {
@@ -673,7 +753,7 @@
         "\n",
         "# Save the entire model to a HDF5 file.\n",
         "# The '.h5' extension indicates that the model should be saved to HDF5.\n",
-        "model.save('my_model.h5') "
+        "model.save('my_model.h5')"
       ]
     },
     {
@@ -745,23 +825,25 @@
       "source": [
         "### Saving custom objects\n",
         "\n",
-        "If you are using the SavedModel format, you can skip this section. The key difference between HDF5 and SavedModel is that HDF5 uses object configs to save the model architecture, while SavedModel saves the execution graph. Thus, SavedModels are able to save custom objects like subclassed models and custom layers without requiring the original code.\n",
+        "If you are using the SavedModel format, you can skip this section. The key difference between high-level `.keras`/HDF5 formats and the low-level SavedModel format is that the `.keras`/HDF5 formats uses object configs to save the model architecture, while SavedModel saves the execution graph. Thus, SavedModels are able to save custom objects like subclassed models and custom layers without requiring the original code. However, debugging low-level SavedModels can be more difficult as a result, and the recommended approach is using the high-level `.keras` format instead due to its name-based, Keras built-in nature.\n",
         "\n",
-        "To save custom objects to HDF5, you must do the following:\n",
+        "To save custom objects to `.keras` and HDF5, you must do the following:\n",
         "\n",
         "1. Define a `get_config` method in your object, and optionally a `from_config` classmethod.\n",
         "  * `get_config(self)` returns a JSON-serializable dictionary of parameters needed to recreate the object.\n",
         "  * `from_config(cls, config)` uses the returned config from `get_config` to create a new object. By default, this function will use the config as initialization kwargs (`return cls(**config)`).\n",
-        "2. Pass the object to the `custom_objects` argument when loading the model. The argument must be a dictionary mapping the string class name to the Python class. E.g. `tf.keras.models.load_model(path, custom_objects={'CustomLayer': CustomLayer})`\n",
+        "2. Pass the custom objects to the model in one of three ways:\n",
+        "  - Register the custom object with the `@tf.keras.utils.register_keras_serializable` decorator. **(recommended)**\n",
+        "  - Directly pass the object to the `custom_objects` argument when loading the model. The argument must be a dictionary mapping the string class name to the Python class. E.g., `tf.keras.models.load_model(path, custom_objects={'CustomLayer': CustomLayer})`\n",
+        "  - Use a `tf.keras.utils.custom_object_scope` with the object included in the `custom_objects` dictionary argument, and place a `tf.keras.models.load_model(path)` call within the scope.\n",
         "\n",
-        "See the [Writing layers and models from scratch](https://www.tensorflow.org/guide/keras/custom_layers_and_models) tutorial for examples of custom objects and `get_config`.\n"
+        "Refer to the [Writing layers and models from scratch](https://www.tensorflow.org/guide/keras/custom_layers_and_models) tutorial for examples of custom objects and `get_config`.\n"
       ]
     }
   ],
   "metadata": {
     "accelerator": "GPU",
     "colab": {
-      "collapsed_sections": [],
       "name": "save_and_load.ipynb",
       "toc_visible": true
     },
diff --git a/site/en/tutorials/keras/text_classification.ipynb b/site/en/tutorials/keras/text_classification.ipynb
index 3055364fefb..58f48cc2be0 100644
--- a/site/en/tutorials/keras/text_classification.ipynb
+++ b/site/en/tutorials/keras/text_classification.ipynb
@@ -119,8 +119,7 @@
         "import tensorflow as tf\n",
         "\n",
         "from tensorflow.keras import layers\n",
-        "from tensorflow.keras import losses\n",
-        "from tensorflow.keras import preprocessing"
+        "from tensorflow.keras import losses\n"
       ]
     },
     {
@@ -172,7 +171,7 @@
         "                                    untar=True, cache_dir='.',\n",
         "                                    cache_subdir='')\n",
         "\n",
-        "dataset_dir = os.path.join(os.path.dirname(dataset), 'aclImdb')"
+        "dataset_dir = os.path.join(os.path.dirname(dataset), 'aclImdb_v1')"
       ]
     },
     {
@@ -194,7 +193,8 @@
       },
       "outputs": [],
       "source": [
-        "train_dir = os.path.join(dataset_dir, 'train')\n",
+        "train_dir = os.path.join(dataset_dir, 'aclImdb', 'train')\n",
+        "test_dir = os.path.join(dataset_dir, 'aclImdb', 'test')\n",
         "os.listdir(train_dir)"
       ]
     },
@@ -215,7 +215,7 @@
       },
       "outputs": [],
       "source": [
-        "sample_file = os.path.join(train_dir, 'pos/1181_9.txt')\n",
+        "sample_file = os.path.join(train_dir, 'pos', '1181_9.txt')\n",
         "with open(sample_file) as f:\n",
         "  print(f.read())"
       ]
@@ -268,9 +268,9 @@
         "id": "95kkUdRoaeMw"
       },
       "source": [
-        "Next, you will use the `text_dataset_from_directory` utility to create a labeled `tf.data.Dataset`. [tf.data](https://www.tensorflow.org/guide/data) is a powerful collection of tools for working with data. \n",
+        "Next, you will use the `text_dataset_from_directory` utility to create a labeled `tf.data.Dataset`. [tf.data](https://www.tensorflow.org/guide/data) is a powerful collection of tools for working with data.\n",
         "\n",
-        "When running a machine learning experiment, it is a best practice to divide your dataset into three splits: [train](https://developers.google.com/machine-learning/glossary#training_set), [validation](https://developers.google.com/machine-learning/glossary#validation_set), and [test](https://developers.google.com/machine-learning/glossary#test-set). \n",
+        "When running a machine learning experiment, it is a best practice to divide your dataset into three splits: [train](https://developers.google.com/machine-learning/glossary#training_set), [validation](https://developers.google.com/machine-learning/glossary#validation_set), and [test](https://developers.google.com/machine-learning/glossary#test-set).\n",
         "\n",
         "The IMDB dataset has already been divided into train and test, but it lacks a validation set. Let's create a validation set using an 80:20 split of the training data by using the `validation_split` argument below."
       ]
@@ -286,11 +286,11 @@
         "batch_size = 32\n",
         "seed = 42\n",
         "\n",
-        "raw_train_ds = tf.keras.preprocessing.text_dataset_from_directory(\n",
-        "    'aclImdb/train', \n",
-        "    batch_size=batch_size, \n",
-        "    validation_split=0.2, \n",
-        "    subset='training', \n",
+        "raw_train_ds = tf.keras.utils.text_dataset_from_directory(\n",
+        "    train_dir,\n",
+        "    batch_size=batch_size,\n",
+        "    validation_split=0.2,\n",
+        "    subset='training',\n",
         "    seed=seed)"
       ]
     },
@@ -323,7 +323,7 @@
         "id": "JWq1SUIrp1a-"
       },
       "source": [
-        "Notice the reviews contain raw text (with punctuation and occasional HTML tags like `<br/>`). You will show how to handle these in the following section. \n",
+        "Notice the reviews contain raw text (with punctuation and occasional HTML tags like `<br/>`). You will show how to handle these in the following section.\n",
         "\n",
         "The labels are 0 or 1. To see which of these correspond to positive and negative movie reviews, you can check the `class_names` property on the dataset.\n"
       ]
@@ -366,11 +366,11 @@
       },
       "outputs": [],
       "source": [
-        "raw_val_ds = tf.keras.preprocessing.text_dataset_from_directory(\n",
-        "    'aclImdb/train', \n",
-        "    batch_size=batch_size, \n",
-        "    validation_split=0.2, \n",
-        "    subset='validation', \n",
+        "raw_val_ds = tf.keras.utils.text_dataset_from_directory(\n",
+        "    train_dir,\n",
+        "    batch_size=batch_size,\n",
+        "    validation_split=0.2,\n",
+        "    subset='validation',\n",
         "    seed=seed)"
       ]
     },
@@ -382,8 +382,8 @@
       },
       "outputs": [],
       "source": [
-        "raw_test_ds = tf.keras.preprocessing.text_dataset_from_directory(\n",
-        "    'aclImdb/test', \n",
+        "raw_test_ds = tf.keras.utils.text_dataset_from_directory(\n",
+        "    test_dir,\n",
         "    batch_size=batch_size)"
       ]
     },
@@ -395,7 +395,7 @@
       "source": [
         "### Prepare the dataset for training\n",
         "\n",
-        "Next, you will standardize, tokenize, and vectorize the data using the helpful `preprocessing.TextVectorization` layer. \n",
+        "Next, you will standardize, tokenize, and vectorize the data using the helpful `tf.keras.layers.TextVectorization` layer.\n",
         "\n",
         "Standardization refers to preprocessing the text, typically to remove punctuation or HTML elements to simplify the dataset. Tokenization refers to splitting strings into tokens (for example, splitting a sentence into individual words, by splitting on whitespace). Vectorization refers to converting tokens into numbers so they can be fed into a neural network. All of these tasks can be accomplished with this layer.\n",
         "\n",
@@ -408,7 +408,7 @@
         "id": "ZVcHl-SLrH-u"
       },
       "source": [
-        "Note: to prevent [train/test skew](https://developers.google.com/machine-learning/guides/rules-of-ml#training-serving_skew) (also know as train/serving skew), it is important to preprocess the data identically at train and test time. To facilitate this, the `TextVectorization` layer can be included directly inside your model, as shown later in this tutorial."
+        "Note: To prevent [training-testing skew](https://developers.google.com/machine-learning/guides/rules-of-ml#training-serving_skew) (also known as training-serving skew), it is important to preprocess the data identically at train and test time. To facilitate this, the `TextVectorization` layer can be included directly inside your model, as shown later in this tutorial."
       ]
     },
     {
@@ -471,7 +471,7 @@
         "id": "lAhdjK7AtroA"
       },
       "source": [
-        "Note: it's important to only use your training data when calling adapt (using the test set would leak information)."
+        "Note: It's important to only use your training data when calling adapt (using the test set would leak information)."
       ]
     },
     {
@@ -581,7 +581,7 @@
         "\n",
         "`.cache()` keeps data in memory after it's loaded off disk. This will ensure the dataset does not become a bottleneck while training your model. If your dataset is too large to fit into memory, you can also use this method to create a performant on-disk cache, which is more efficient to read than many small files.\n",
         "\n",
-        "`.prefetch()` overlaps data preprocessing and model execution while training. \n",
+        "`.prefetch()` overlaps data preprocessing and model execution while training.\n",
         "\n",
         "You can learn more about both methods, as well as how to cache data to disk in the [data performance guide](https://www.tensorflow.org/guide/data_performance)."
       ]
@@ -632,11 +632,11 @@
       "outputs": [],
       "source": [
         "model = tf.keras.Sequential([\n",
-        "  layers.Embedding(max_features + 1, embedding_dim),\n",
+        "  layers.Embedding(max_features, embedding_dim),\n",
         "  layers.Dropout(0.2),\n",
         "  layers.GlobalAveragePooling1D(),\n",
         "  layers.Dropout(0.2),\n",
-        "  layers.Dense(1)])\n",
+        "  layers.Dense(1, activation='sigmoid')])\n",
         "\n",
         "model.summary()"
       ]
@@ -649,10 +649,9 @@
       "source": [
         "The layers are stacked sequentially to build the classifier:\n",
         "\n",
-        "1. The first layer is an `Embedding` layer. This layer takes the integer-encoded reviews and looks up an embedding vector for each word-index. These vectors are learned as the model trains. The vectors add a dimension to the output array. The resulting dimensions are: `(batch, sequence, embedding)`.  To learn more about embeddings, see the [word embedding tutorial](../text/word_embeddings.ipynb).\n",
+        "1. The first layer is an `Embedding` layer. This layer takes the integer-encoded reviews and looks up an embedding vector for each word-index. These vectors are learned as the model trains. The vectors add a dimension to the output array. The resulting dimensions are: `(batch, sequence, embedding)`.  To learn more about embeddings, check out the [Word embeddings](https://www.tensorflow.org/text/guide/word_embeddings) tutorial.\n",
         "2. Next, a `GlobalAveragePooling1D` layer returns a fixed-length output vector for each example by averaging over the sequence dimension. This allows the model to handle input of variable length, in the simplest way possible.\n",
-        "3. This fixed-length output vector is piped through a fully-connected (`Dense`) layer with 16 hidden units. \n",
-        "4. The last layer is densely connected with a single output node."
+        "3. The last layer is densely connected with a single output node."
       ]
     },
     {
@@ -676,9 +675,9 @@
       },
       "outputs": [],
       "source": [
-        "model.compile(loss=losses.BinaryCrossentropy(from_logits=True),\n",
+        "model.compile(loss=losses.BinaryCrossentropy(),\n",
         "              optimizer='adam',\n",
-        "              metrics=tf.metrics.BinaryAccuracy(threshold=0.0))"
+        "              metrics=[tf.metrics.BinaryAccuracy(threshold=0.5)])"
       ]
     },
     {
@@ -863,8 +862,8 @@
         ")\n",
         "\n",
         "# Test it with `raw_test_ds`, which yields raw strings\n",
-        "loss, accuracy = export_model.evaluate(raw_test_ds)\n",
-        "print(accuracy)"
+        "metrics = export_model.evaluate(raw_test_ds, return_dict=True)\n",
+        "print(metrics)"
       ]
     },
     {
@@ -886,11 +885,11 @@
       },
       "outputs": [],
       "source": [
-        "examples = [\n",
+        "examples = tf.constant([\n",
         "  \"The movie was great!\",\n",
         "  \"The movie was okay.\",\n",
         "  \"The movie was terrible...\"\n",
-        "]\n",
+        "])\n",
         "\n",
         "export_model.predict(examples)"
       ]
@@ -918,7 +917,7 @@
         "\n",
         "This tutorial showed how to train a binary classifier from scratch on the IMDB dataset. As an exercise, you can modify this notebook to train a multi-class classifier to predict the tag of a programming question on [Stack Overflow](http://stackoverflow.com/).\n",
         "\n",
-        "A [dataset](http://storage.googleapis.com/download.tensorflow.org/data/stack_overflow_16k.tar.gz) has been prepared for you to use containing the body of several thousand programming questions (for example, \"How can sort a dictionary by value in Python?\") posted to Stack Overflow. Each of these is labeled with exactly one tag (either Python, CSharp, JavaScript, or Java). Your task is to take a question as input, and predict the appropriate tag, in this case, Python. \n",
+        "A [dataset](https://storage.googleapis.com/download.tensorflow.org/data/stack_overflow_16k.tar.gz) has been prepared for you to use containing the body of several thousand programming questions (for example, \"How can I sort a dictionary by value in Python?\") posted to Stack Overflow. Each of these is labeled with exactly one tag (either Python, CSharp, JavaScript, or Java). Your task is to take a question as input, and predict the appropriate tag, in this case, Python.\n",
         "\n",
         "The dataset you will work with contains several thousand questions extracted from the much larger public Stack Overflow dataset on [BigQuery](https://console.cloud.google.com/marketplace/details/stack-exchange/stack-overflow), which contains more than 17 million posts.\n",
         "\n",
@@ -940,19 +939,19 @@
         "......1.txt\n",
         "```\n",
         "\n",
-        "Note: to increase the difficulty of the classification problem, occurrences of the words Python, CSharp, JavaScript, or Java in the programming questions have been replaced with the word *blank* (as many questions contain the language they're about).\n",
+        "Note: To increase the difficulty of the classification problem, occurrences of the words Python, CSharp, JavaScript, or Java in the programming questions have been replaced with the word *blank* (as many questions contain the language they're about).\n",
         "\n",
         "To complete this exercise, you should modify this notebook to work with the Stack Overflow dataset by making the following modifications:\n",
         "\n",
-        "1. At the top of your notebook, update the code that downloads the IMDB dataset with code to download the [Stack Overflow dataset](http://storage.googleapis.com/download.tensorflow.org/data/stack_overflow_16k.tar.gz) that has been prepreared. As the Stack Overflow dataset has a similar directory structure, you will not need to make many modifications.\n",
+        "1. At the top of your notebook, update the code that downloads the IMDB dataset with code to download the [Stack Overflow dataset](https://storage.googleapis.com/download.tensorflow.org/data/stack_overflow_16k.tar.gz) that has already been prepared. As the Stack Overflow dataset has a similar directory structure, you will not need to make many modifications.\n",
         "\n",
-        "1. Modify the last layer of your model to read `Dense(4)`, as there are now four output classes.\n",
+        "1. Modify the last layer of your model to `Dense(4)`, as there are now four output classes.\n",
         "\n",
-        "1. When compiling the model, change the loss to `tf.keras.losses.SparseCategoricalCrossentropy`. This is the correct loss function to use for a multi-class classification problem, when the labels for each class are integers (in this case, they can be 0, *1*, *2*, or *3*). In addition, change the metrics to `metrics=['accuracy']`, since this is a multi-class classification problem (`tf.metrics.BinaryAccuracy` is only used for binary classifiers).\n",
+        "1. When compiling the model, change the loss to `tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)`. This is the correct loss function to use for a multi-class classification problem, when the labels for each class are integers (in this case, they can be 0, *1*, *2*, or *3*). In addition, change the metrics to `metrics=['accuracy']`, since this is a multi-class classification problem (`tf.metrics.BinaryAccuracy` is only used for binary classifiers).\n",
         "\n",
         "1. When plotting accuracy over time, change `binary_accuracy` and `val_binary_accuracy` to `accuracy` and `val_accuracy`, respectively.\n",
         "\n",
-        "1. Once these changes are complete, you will be able to train a multi-class classifier. "
+        "1. Once these changes are complete, you will be able to train a multi-class classifier."
       ]
     },
     {
@@ -970,7 +969,6 @@
   "metadata": {
     "accelerator": "GPU",
     "colab": {
-      "collapsed_sections": [],
       "name": "text_classification.ipynb",
       "toc_visible": true
     },
diff --git a/site/en/tutorials/keras/text_classification_with_hub.ipynb b/site/en/tutorials/keras/text_classification_with_hub.ipynb
index b43e59e35fc..e7e4288ef32 100644
--- a/site/en/tutorials/keras/text_classification_with_hub.ipynb
+++ b/site/en/tutorials/keras/text_classification_with_hub.ipynb
@@ -121,7 +121,8 @@
       "outputs": [],
       "source": [
         "!pip install tensorflow-hub\n",
-        "!pip install tensorflow-datasets"
+        "!pip install tensorflow-datasets\n",
+        "!pip install tf-keras"
       ]
     },
     {
@@ -138,6 +139,7 @@
         "import tensorflow as tf\n",
         "import tensorflow_hub as hub\n",
         "import tensorflow_datasets as tfds\n",
+        "import tf_keras as keras\n",
         "\n",
         "print(\"Version: \", tf.__version__)\n",
         "print(\"Eager mode: \", tf.executing_eagerly())\n",
@@ -290,10 +292,10 @@
       },
       "outputs": [],
       "source": [
-        "model = tf.keras.Sequential()\n",
+        "model = keras.Sequential()\n",
         "model.add(hub_layer)\n",
-        "model.add(tf.keras.layers.Dense(16, activation='relu'))\n",
-        "model.add(tf.keras.layers.Dense(1))\n",
+        "model.add(keras.layers.Dense(16, activation='relu'))\n",
+        "model.add(keras.layers.Dense(1))\n",
         "\n",
         "model.summary()"
       ]
@@ -339,7 +341,7 @@
       "outputs": [],
       "source": [
         "model.compile(optimizer='adam',\n",
-        "              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),\n",
+        "              loss=keras.losses.BinaryCrossentropy(from_logits=True),\n",
         "              metrics=['accuracy'])"
       ]
     },
diff --git a/site/en/tutorials/load_data/csv.ipynb b/site/en/tutorials/load_data/csv.ipynb
index eecea0a6237..7778af974b3 100644
--- a/site/en/tutorials/load_data/csv.ipynb
+++ b/site/en/tutorials/load_data/csv.ipynb
@@ -75,7 +75,7 @@
         "1. **Loading the data off disk**\n",
         "2. **Pre-processing it into a form suitable for training.**\n",
         "\n",
-        "This tutorial focuses on the loading, and gives some quick examples of preprocessing. For a tutorial that focuses on the preprocessing aspect see the [preprocessing layers guide](https://www.tensorflow.org/guide/keras/preprocessing_layers#quick_recipes) and [tutorial](https://www.tensorflow.org/tutorials/structured_data/preprocessing_layers). \n"
+        "This tutorial focuses on the loading, and gives some quick examples of preprocessing. To learn more about the preprocessing aspect, check out the [Working with preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers) guide and the [Classify structured data using Keras preprocessing layers](../structured_data/preprocessing_layers.ipynb) tutorial.\n"
       ]
     },
     {
@@ -102,8 +102,7 @@
         "np.set_printoptions(precision=3, suppress=True)\n",
         "\n",
         "import tensorflow as tf\n",
-        "from tensorflow.keras import layers\n",
-        "from tensorflow.keras.layers.experimental import preprocessing"
+        "from tensorflow.keras import layers"
       ]
     },
     {
@@ -121,7 +120,7 @@
         "id": "ny5TEgcmHjVx"
       },
       "source": [
-        "For any small CSV dataset the simplest way to train a TensorFlow model on it is to load it into memory as a pandas Dataframe or a NumPy array. \n"
+        "For any small CSV dataset the simplest way to train a TensorFlow model on it is to load it into memory as a [pandas `DataFrame`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html) or a NumPy array.\n"
       ]
     },
     {
@@ -130,12 +129,12 @@
         "id": "LgpBOuU8PGFf"
       },
       "source": [
-        "A relatively simple example is the [abalone dataset](https://archive.ics.uci.edu/ml/datasets/abalone). \n",
+        "A relatively simple example is the [abalone dataset](https://archive.ics.uci.edu/ml/datasets/abalone).\n",
         "\n",
-        "* The dataset is small. \n",
-        "* All the input features are all limited-range floating point values. \n",
+        "* The dataset is small.\n",
+        "* All the input features are limited-range floating point values.\n",
         "\n",
-        "Here is how to download the data into a [Pandas `DataFrame`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html):"
+        "Here is how to download the data into a `DataFrame`:"
       ]
     },
     {
@@ -160,7 +159,7 @@
         "id": "hP22mdyPQ1_t"
       },
       "source": [
-        "The dataset contains a set of measurements of [abalone](https://en.wikipedia.org/wiki/Abalone), a type of sea snail. \n",
+        "The dataset contains a set of measurements of [abalone](https://en.wikipedia.org/wiki/Abalone), a type of sea snail.\n",
         "\n",
         "![an abalone shell](https://tensorflow.org/images/abalone_shell.jpg)\n",
         "\n",
@@ -215,7 +214,7 @@
         "id": "1C1yFOxLOdxh"
       },
       "source": [
-        "Next make a regression model predict the age. Since there is only a single input tensor, a `keras.Sequential` model is sufficient here."
+        "Next make a regression model predict the age. Since there is only a single input tensor, a `tf.keras.Sequential` model is sufficient here."
       ]
     },
     {
@@ -227,12 +226,12 @@
       "outputs": [],
       "source": [
         "abalone_model = tf.keras.Sequential([\n",
-        "  layers.Dense(64),\n",
+        "  layers.Dense(64, activation='relu'),\n",
         "  layers.Dense(1)\n",
         "])\n",
         "\n",
-        "abalone_model.compile(loss = tf.losses.MeanSquaredError(),\n",
-        "                      optimizer = tf.optimizers.Adam())"
+        "abalone_model.compile(loss = tf.keras.losses.MeanSquaredError(),\n",
+        "                      optimizer = tf.keras.optimizers.Adam())"
       ]
     },
     {
@@ -279,11 +278,11 @@
         "id": "yCrB2Jd-U0Vt"
       },
       "source": [
-        "It's good practice to normalize the inputs to your model. The `experimental.preprocessing` layers provide a convenient way to build this normalization into your model. \n",
+        "It's good practice to normalize the inputs to your model. The Keras preprocessing layers provide a convenient way to build this normalization into your model.\n",
         "\n",
-        "The layer will precompute the mean and variance of each column, and use these to normalize the data.\n",
+        "The `tf.keras.layers.Normalization` layer precomputes the mean and variance of each column, and uses these to normalize the data.\n",
         "\n",
-        "First you create the layer:"
+        "First, create the layer:"
       ]
     },
     {
@@ -294,7 +293,7 @@
       },
       "outputs": [],
       "source": [
-        "normalize = preprocessing.Normalization()"
+        "normalize = layers.Normalization()"
       ]
     },
     {
@@ -303,9 +302,9 @@
         "id": "hGgEZE-7Vpt6"
       },
       "source": [
-        "Then you use the `Normalization.adapt()` method to adapt the normalization layer to your data.\n",
+        "Then, use the `Normalization.adapt` method to adapt the normalization layer to your data.\n",
         "\n",
-        "Note: Only use your training data to `.adapt()` preprocessing layers. Do not use your validation or test data."
+        "Note: Only use your training data with the `PreprocessingLayer.adapt` method. Do not use your validation or test data."
       ]
     },
     {
@@ -325,7 +324,7 @@
         "id": "rE6vh0byV7cE"
       },
       "source": [
-        "Then use the normalization layer in your model:"
+        "Then, use the normalization layer in your model:"
       ]
     },
     {
@@ -338,12 +337,12 @@
       "source": [
         "norm_abalone_model = tf.keras.Sequential([\n",
         "  normalize,\n",
-        "  layers.Dense(64),\n",
+        "  layers.Dense(64, activation='relu'),\n",
         "  layers.Dense(1)\n",
         "])\n",
         "\n",
-        "norm_abalone_model.compile(loss = tf.losses.MeanSquaredError(),\n",
-        "                           optimizer = tf.optimizers.Adam())\n",
+        "norm_abalone_model.compile(loss = tf.keras.losses.MeanSquaredError(),\n",
+        "                           optimizer = tf.keras.optimizers.Adam())\n",
         "\n",
         "norm_abalone_model.fit(abalone_features, abalone_labels, epochs=10)"
       ]
@@ -356,13 +355,15 @@
       "source": [
         "## Mixed data types\n",
         "\n",
-        "The \"Titanic\" dataset contains information about the passengers on the Titanic. The nominal task on this dataset is to predict who survived. \n",
+        "In the previous sections, you worked with a dataset where all the features were limited-range floating point values. But not all datasets are limited to a single data type.\n",
+        "\n",
+        "The \"Titanic\" dataset contains information about the passengers on the Titanic. The nominal task on this dataset is to predict who survived.\n",
         "\n",
         "![The Titanic](images/csv/Titanic.jpg)\n",
         "\n",
         "Image [from Wikimedia](https://commons.wikimedia.org/wiki/File:RMS_Titanic_3.jpg)\n",
         "\n",
-        "The raw data can easily be loaded as a Pandas `DataFrame`, but is not immediately usable as input to a TensorFlow model. \n"
+        "The raw data can easily be loaded as a Pandas `DataFrame`, but is not immediately usable as input to a TensorFlow model.\n"
       ]
     },
     {
@@ -395,9 +396,9 @@
         "id": "urHOwpCDYtcI"
       },
       "source": [
-        "Because of the different data types and ranges you can't simply stack the features into  NumPy array and pass it to a `keras.Sequential` model. Each column needs to be handled individually. \n",
+        "Because of the different data types and ranges, you can't simply stack the features into a NumPy array and pass it to a `tf.keras.Sequential` model. Each column needs to be handled individually.\n",
         "\n",
-        "As one option, you could preprocess your data offline (using any tool you like) to convert categorical columns to numeric columns, then pass the processed output to your TensorFlow model. The disadvantage to that approach is that if you save and export your model the preprocessing is not saved with it. The `experimental.preprocessing` layers avoid this problem because they're part of the model.\n"
+        "As one option, you could preprocess your data offline (using any tool you like) to convert categorical columns to numeric columns, then pass the processed output to your TensorFlow model. The disadvantage to that approach is that if you save and export your model the preprocessing is not saved with it. The Keras preprocessing layers avoid this problem because they're part of the model.\n"
       ]
     },
     {
@@ -408,7 +409,7 @@
       "source": [
         "In this example, you'll build a model that implements the preprocessing logic using [Keras functional API](https://www.tensorflow.org/guide/keras/functional). You could also do it by [subclassing](https://www.tensorflow.org/guide/keras/custom_layers_and_models).\n",
         "\n",
-        "The functional API operates on \"symbolic\" tensors. Normal \"eager\" tensors have a value. In contrast these \"symbolic\" tensors do not. Instead they keep track of which operations are run on them, and build representation of the calculation, that you can run later. Here's a quick example:"
+        "The functional API operates on \"symbolic\" tensors. Normal \"eager\" tensors have a value. In contrast these \"symbolic\" tensors do not. Instead they keep track of which operations are run on them, and build a representation of the calculation, that you can run later. Here's a quick example:"
       ]
     },
     {
@@ -422,7 +423,7 @@
         "# Create a symbolic input\n",
         "input = tf.keras.Input(shape=(), dtype=tf.float32)\n",
         "\n",
-        "# Do a calculation using is\n",
+        "# Perform a calculation using the input\n",
         "result = 2*input + 1\n",
         "\n",
         "# the result doesn't have a value\n",
@@ -448,8 +449,8 @@
       },
       "outputs": [],
       "source": [
-        "print(calc(1).numpy())\n",
-        "print(calc(2).numpy())"
+        "print(calc(np.array([1])).numpy())\n",
+        "print(calc(np.array([2])).numpy())"
       ]
     },
     {
@@ -458,7 +459,7 @@
         "id": "rNS9lT7f6_U2"
       },
       "source": [
-        "To build the preprocessing model, start by building a set of symbolic `keras.Input` objects, matching the names and data-types of the CSV columns."
+        "To build the preprocessing model, start by building a set of symbolic `tf.keras.Input` objects, matching the names and data-types of the CSV columns."
       ]
     },
     {
@@ -504,7 +505,7 @@
         "                  if input.dtype==tf.float32}\n",
         "\n",
         "x = layers.Concatenate()(list(numeric_inputs.values()))\n",
-        "norm = preprocessing.Normalization()\n",
+        "norm = layers.Normalization()\n",
         "norm.adapt(np.array(titanic[numeric_inputs.keys()]))\n",
         "all_numeric_inputs = norm(x)\n",
         "\n",
@@ -517,7 +518,7 @@
         "id": "-JoR45Uj712l"
       },
       "source": [
-        "Collect all the symbolic preprocessing results, to concatenate them later."
+        "Collect all the symbolic preprocessing results, to concatenate them later:"
       ]
     },
     {
@@ -537,9 +538,9 @@
         "id": "r0Hryylyosfm"
       },
       "source": [
-        "For the string inputs use the `preprocessing.StringLookup` function to map from strings to integer indices in a vocabulary. Next, use `preprocessing.CategoryEncoding` to convert the indexes into `float32` data appropriate for the model. \n",
+        "For the string inputs use the `tf.keras.layers.StringLookup` function to map from strings to integer indices in a vocabulary. Next, use `tf.keras.layers.CategoryEncoding` to convert the indexes into `float32` data appropriate for the model.\n",
         "\n",
-        "The default settings for the `preprocessing.CategoryEncoding` layer create a one-hot vector for each input. A `layers.Embedding` would also work. See the [preprocessing layers guide](https://www.tensorflow.org/guide/keras/preprocessing_layers#quick_recipes) and [tutorial](../structured_data/preprocessing_layers.ipynb) for more on this topic."
+        "The default settings for the `tf.keras.layers.CategoryEncoding` layer create a one-hot vector for each input. A `tf.keras.layers.Embedding` would also work. Check out the [Working with preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers) guide and the [Classify structured data using Keras preprocessing layers](../structured_data/preprocessing_layers.ipynb) tutorial for more on this topic."
       ]
     },
     {
@@ -554,8 +555,8 @@
         "  if input.dtype == tf.float32:\n",
         "    continue\n",
         "  \n",
-        "  lookup = preprocessing.StringLookup(vocabulary=np.unique(titanic_features[name]))\n",
-        "  one_hot = preprocessing.CategoryEncoding(max_tokens=lookup.vocab_size())\n",
+        "  lookup = layers.StringLookup(vocabulary=np.unique(titanic_features[name]))\n",
+        "  one_hot = layers.CategoryEncoding(num_tokens=lookup.vocabulary_size())\n",
         "\n",
         "  x = lookup(input)\n",
         "  x = one_hot(x)\n",
@@ -568,7 +569,7 @@
         "id": "Wnhv0T7itnc7"
       },
       "source": [
-        "With the collection of `inputs` and `processed_inputs`, you can concatenate all the preprocessed inputs together, and build a model that handles the preprocessing:"
+        "With the collection of `inputs` and `preprocessed_inputs`, you can concatenate all the preprocessed inputs together, and build a model that handles the preprocessing:"
       ]
     },
     {
@@ -592,7 +593,7 @@
         "id": "PNHxrNW8vdda"
       },
       "source": [
-        "This `model` just contains the input preprocessing. You can run it to see what it does to your data. Keras models don't automatically convert Pandas `DataFrames` because it's not clear if it should be converted to one tensor or to a dictionary of tensors. So convert it to a dictionary of tensors:"
+        "This model just contains the input preprocessing. You can run it to see what it does to your data. Keras models don't automatically convert pandas `DataFrame`s because it's not clear if it should be converted to one tensor or to a dictionary of tensors. So, convert it to a dictionary of tensors:"
       ]
     },
     {
@@ -634,7 +635,7 @@
         "id": "qkBf4LvmzMDp"
       },
       "source": [
-        "Now build the model on top of this:"
+        "Now, build the model on top of this:"
       ]
     },
     {
@@ -647,7 +648,7 @@
       "source": [
         "def titanic_model(preprocessing_head, inputs):\n",
         "  body = tf.keras.Sequential([\n",
-        "    layers.Dense(64),\n",
+        "    layers.Dense(64, activation='relu'),\n",
         "    layers.Dense(1)\n",
         "  ])\n",
         "\n",
@@ -655,8 +656,8 @@
         "  result = body(preprocessed_inputs)\n",
         "  model = tf.keras.Model(inputs, result)\n",
         "\n",
-        "  model.compile(loss=tf.losses.BinaryCrossentropy(from_logits=True),\n",
-        "                optimizer=tf.optimizers.Adam())\n",
+        "  model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),\n",
+        "                optimizer=tf.keras.optimizers.Adam())\n",
         "  return model\n",
         "\n",
         "titanic_model = titanic_model(titanic_preprocessing, inputs)"
@@ -699,8 +700,8 @@
       },
       "outputs": [],
       "source": [
-        "titanic_model.save('test')\n",
-        "reloaded = tf.keras.models.load_model('test')"
+        "titanic_model.save('test.keras')\n",
+        "reloaded = tf.keras.models.load_model('test.keras')"
       ]
     },
     {
@@ -735,11 +736,11 @@
         "id": "NyVDCwGzR5HW"
       },
       "source": [
-        "In the previous section you relied on the model's built-in data shuffling and batching while training the model. \n",
+        "In the previous section you relied on the model's built-in data shuffling and batching while training the model.\n",
         "\n",
-        "If you need more control over the input data pipeline or need to use data that doesn't easily fit into memory: use `tf.data`. \n",
+        "If you need more control over the input data pipeline or need to use data that doesn't easily fit into memory: use `tf.data`.\n",
         "\n",
-        "For more examples see the [tf.data guide](../../guide/data.ipynb)."
+        "For more examples, refer to the [`tf.data`: Build TensorFlow input pipelines](../../guide/data.ipynb) guide."
       ]
     },
     {
@@ -750,7 +751,7 @@
       "source": [
         "### On in memory data\n",
         "\n",
-        "As a first example of applying `tf.data` to CSV data consider the following code to manually slice up the dictionary of features from the previous section. For each index, it takes that index for each feature:\n"
+        "As a first example of applying `tf.data` to CSV data, consider the following code to manually slice up the dictionary of features from the previous section. For each index, it takes that index for each feature:\n"
       ]
     },
     {
@@ -799,7 +800,7 @@
         "id": "vvp8Dct6YOIE"
       },
       "source": [
-        "The most basic `tf.data.Dataset` in memory data loader is the `Dataset.from_tensor_slices` constructor. This returns a `tf.data.Dataset` that implements a generalized version of the above `slices` function, in TensorFlow. "
+        "The most basic `tf.data.Dataset` in memory data loader is the `Dataset.from_tensor_slices` constructor. This returns a `tf.data.Dataset` that implements a generalized version of the above `slices` function, in TensorFlow."
       ]
     },
     {
@@ -904,7 +905,7 @@
       "source": [
         "### From a single file\n",
         "\n",
-        "So far this tutorial has worked with in-memory data. `tf.data` is a highly scalable toolkit for building data pipelines, and provides a few functions for dealing loading CSV files. "
+        "So far this tutorial has worked with in-memory data. `tf.data` is a highly scalable toolkit for building data pipelines, and provides a few functions for loading CSV files. "
       ]
     },
     {
@@ -924,7 +925,7 @@
         "id": "t4N-plO4tDXd"
       },
       "source": [
-        "Now read the CSV data from the file and create a `tf.data.Dataset`. \n",
+        "Now read the CSV data from the file and create a `tf.data.Dataset`.\n",
         "\n",
         "(For the full documentation, see `tf.data.experimental.make_csv_dataset`)\n"
       ]
@@ -951,10 +952,12 @@
         "id": "Sf3v3BKgy4AG"
       },
       "source": [
-        "This function includes many convenient features so the data is easy to work with. This includes:\n",
+        "This function includes many convenient features, so the data is easy to work with. This includes:\n",
         "\n",
         "* Using the column headers as dictionary keys.\n",
-        "* Automatically determining the type of each column."
+        "* Automatically determining the type of each column.\n",
+        "\n",
+        "Caution: Make sure to set the `num_epochs` argument in `tf.data.experimental.make_csv_dataset`, otherwise the default behavior for `tf.data.Dataset` is to loop endlessly."
       ]
     },
     {
@@ -978,7 +981,7 @@
         "id": "k-TgA6o2Ja6U"
       },
       "source": [
-        "Note: if you run the above cell twice it will produce different results. The default settings for `make_csv_dataset` include `shuffle_buffer_size=1000`, which is more than sufficient for this small dataset, but may not be for a real-world dataset."
+        "Note: If you run the above cell twice it will produce different results. The default settings for `tf.data.experimental.make_csv_dataset` include `shuffle_buffer_size=1000`, which is more than sufficient for this small dataset, but may not be for a real-world dataset."
       ]
     },
     {
@@ -987,7 +990,7 @@
         "id": "d6uviU_KCCWD"
       },
       "source": [
-        "It can also decompress the data on the fly. Here's a gzipped CSV file containing the [metro interstate traffic dataset](https://archive.ics.uci.edu/ml/datasets/Metro+Interstate+Traffic+Volume)\n",
+        "It can also decompress the data on the fly. Here's a gzipped CSV file containing the [metro interstate traffic dataset](https://archive.ics.uci.edu/ml/datasets/Metro+Interstate+Traffic+Volume).\n",
         "\n",
         "![A traffic jam.](images/csv/traffic.jpg)\n",
         "\n",
@@ -1014,7 +1017,7 @@
         "id": "F-IOsFHbCw0i"
       },
       "source": [
-        "Set the `compression_type` argument to read directly from the compressed file: "
+        "Set the `compression_type` argument to read directly from the compressed file:"
       ]
     },
     {
@@ -1045,7 +1048,7 @@
         "id": "p12Y6tGq8D6M"
       },
       "source": [
-        "Note: If you need to parse those date-time strings in the `tf.data` pipeline you can use `tfa.text.parse_time`."
+        "Note: If you need to parse those date-time strings in the `tf.data` pipeline, you can use `tfa.text.parse_time`."
       ]
     },
     {
@@ -1063,13 +1066,13 @@
         "id": "fN2dL_LRP83r"
       },
       "source": [
-        "There is some overhead to parsing the csv data. For small models this can be the bottleneck in training.\n",
+        "There is some overhead to parsing the CSV data. For small models this can be the bottleneck in training.\n",
         "\n",
-        "Depending on your use case it may be a good idea to use `Dataset.cache` or `data.experimental.snapshot` so that the csv data is only parsed on the first epoch. \n",
+        "Depending on your use case, it may be a good idea to use `Dataset.cache` or `tf.data.Dataset.snapshot`, so that the CSV data is only parsed on the first epoch.\n",
         "\n",
         "The main difference between the `cache` and `snapshot` methods is that `cache` files can only be used by the TensorFlow process that created them, but `snapshot` files can be read by other processes.\n",
         "\n",
-        "For example, iterating over the `traffic_volume_csv_gz_ds` 20 times, takes ~15 seconds without caching, or ~2s with caching."
+        "For example, iterating over the `traffic_volume_csv_gz_ds` 20 times may take around 15 seconds without caching, or about two seconds with caching."
       ]
     },
     {
@@ -1093,7 +1096,7 @@
         "id": "pN3HtDONh5TX"
       },
       "source": [
-        "Note: `Dataset.cache`  stores the data form the first epoch and replays it in order. So using `.cache` disables any shuffles earlier in the pipeline. Below the `.shuffle` is added back in after `.cache`."
+        "Note: `Dataset.cache` stores the data from the first epoch and replays it in order. So, using the `cache` method disables any shuffles earlier in the pipeline. Below, `Dataset.shuffle` is added back in after `Dataset.cache`."
       ]
     },
     {
@@ -1119,7 +1122,7 @@
         "id": "wN7uUBjmgNZ9"
       },
       "source": [
-        "Note: `snapshot` files are meant for *temporary* storage of a dataset while in use. This is *not* a format for long term storage. The file format is considered an internal detail, and not guaranteed between TensorFlow versions. "
+        "Note: The `tf.data.Dataset.snapshot` files are meant for *temporary* storage of a dataset while in use. This is *not* a format for long term storage. The file format is considered an internal detail, and not guaranteed between TensorFlow versions."
       ]
     },
     {
@@ -1131,8 +1134,7 @@
       "outputs": [],
       "source": [
         "%%time\n",
-        "snapshot = tf.data.experimental.snapshot('titanic.tfsnap')\n",
-        "snapshotting = traffic_volume_csv_gz_ds.apply(snapshot).shuffle(1000)\n",
+        "snapshotting = traffic_volume_csv_gz_ds.snapshot('titanic.tfsnap').shuffle(1000)\n",
         "\n",
         "for i, (batch, label) in enumerate(snapshotting.shuffle(1000).repeat(20)):\n",
         "  if i % 40 == 0:\n",
@@ -1146,7 +1148,7 @@
         "id": "fUSSegnMCGRz"
       },
       "source": [
-        "If your data loading is slowed by loading csv files, and `cache` and `snapshot` are insufficient for your use case, consider re-encoding your data into a more streamlined format."
+        "If your data loading is slowed by loading CSV files, and `Dataset.cache` and `tf.data.Dataset.snapshot` are insufficient for your use case, consider re-encoding your data into a more streamlined format."
       ]
     },
     {
@@ -1172,7 +1174,7 @@
         "\n",
         "Image by <a href=\"https://pixabay.com/users/wilhei-883152/?utm_source=link-attribution&amp;utm_medium=referral&amp;utm_campaign=image&amp;utm_content=705667\">Willi Heidelbach</a> from <a href=\"https://pixabay.com/?utm_source=link-attribution&amp;utm_medium=referral&amp;utm_campaign=image&amp;utm_content=705667\">Pixabay</a>\n",
         "\n",
-        "Download the dataset, and have a look at the files inside:"
+        "Download the dataset, and review the files inside:"
       ]
     },
     {
@@ -1220,7 +1222,7 @@
         "id": "19Udrw9iG-FS"
       },
       "source": [
-        "When dealing with a bunch of files you can pass a glob-style `file_pattern` to the `experimental.make_csv_dataset` function. The order of the files is shuffled each iteration.\n",
+        "When dealing with a bunch of files, you can pass a glob-style `file_pattern` to the `tf.data.experimental.make_csv_dataset` function. The order of the files is shuffled each iteration.\n",
         "\n",
         "Use the `num_parallel_reads` argument to set how many files are read in parallel and interleaved together."
       ]
@@ -1246,7 +1248,7 @@
         "id": "XMoexinLHYFa"
       },
       "source": [
-        "These csv files have the images flattened out into a single row. The column names are formatted `r{row}c{column}`. Here's the first batch:"
+        "These CSV files have the images flattened out into a single row. The column names are formatted `r{row}c{column}`. Here's the first batch:"
       ]
     },
     {
@@ -1274,7 +1276,7 @@
       "source": [
         "#### Optional: Packing fields\n",
         "\n",
-        "You probably don't want to work with each pixel in separate columns like this. Before trying to use this dataset be sure to pack the pixels into an image-tensor. \n",
+        "You probably don't want to work with each pixel in separate columns like this. Before trying to use this dataset be sure to pack the pixels into an image-tensor.\n",
         "\n",
         "Here is code that parses the column names to build images for each example:"
       ]
@@ -1373,12 +1375,12 @@
         "id": "3jiGZeUijJNd"
       },
       "source": [
-        "So far this tutorial has focused on the highest level utilities for reading csv data. There are other two APIs that may be helpful for advanced users if your use-case doesn't fit the basic patterns.\n",
+        "So far this tutorial has focused on the highest-level utilities for reading csv data. There are two other APIs that may be helpful for advanced users if your use-case doesn't fit the basic patterns.\n",
         "\n",
-        "* `tf.io.decode_csv` - a function for parsing lines of text into a list of CSV column tensors.\n",
-        "* `tf.data.experimental.CsvDataset` - a lower level csv dataset constructor.\n",
+        "* `tf.io.decode_csv`: a function for parsing lines of text into a list of CSV column tensors.\n",
+        "* `tf.data.experimental.CsvDataset`: a lower-level CSV dataset constructor.\n",
         "\n",
-        "This section recreates functionality provided by `make_csv_dataset`, to demonstrate how this lower level functionality can be used.\n"
+        "This section recreates functionality provided by `tf.data.experimental.make_csv_dataset`, to demonstrate how this lower-level functionality can be used.\n"
       ]
     },
     {
@@ -1391,9 +1393,9 @@
         "\n",
         "This function decodes a string, or list of strings into a list of columns.\n",
         "\n",
-        "Unlike `make_csv_dataset` this function does not try to guess column data-types. You specify the column types by providing a list of `record_defaults` containing a value of the correct type, for each column.\n",
+        "Unlike `tf.data.experimental.make_csv_dataset` this function does not try to guess column data-types. You specify the column types by providing a list of `record_defaults` containing a value of the correct type, for each column.\n",
         "\n",
-        "To read the Titanic data **as strings** using `decode_csv` you would say: "
+        "To read the Titanic data **as strings** using `tf.io.decode_csv` you would say:"
       ]
     },
     {
@@ -1477,7 +1479,7 @@
         "id": "m-LkTUTnpn2P"
       },
       "source": [
-        "Note: it is more efficient to call `decode_csv` on large batches of lines than on individual lines of csv text."
+        "Note: It is more efficient to call `tf.io.decode_csv` on large batches of lines than on individual lines of CSV text."
       ]
     },
     {
@@ -1488,9 +1490,9 @@
       "source": [
         "### `tf.data.experimental.CsvDataset`\n",
         "\n",
-        "The `tf.data.experimental.CsvDataset` class provides a minimal CSV `Dataset` interface without the convenience features of the `make_csv_dataset` function: column header parsing, column type-inference, automatic shuffling, file interleaving.\n",
+        "The `tf.data.experimental.CsvDataset` class provides a minimal CSV `Dataset` interface without the convenience features of the `tf.data.experimental.make_csv_dataset` function: column header parsing, column type-inference, automatic shuffling, file interleaving.\n",
         "\n",
-        "This constructor follows uses `record_defaults` the same way as `io.parse_csv`:\n"
+        "This constructor uses `record_defaults` the same way as `tf.io.decode_csv`:\n"
       ]
     },
     {
@@ -1548,7 +1550,7 @@
       "source": [
         "#### Multiple files\n",
         "\n",
-        "To parse the fonts dataset using `experimental.CsvDataset`, you first need to determine the column types for the `record_defaults`. Start by inspecting the first row of one file: "
+        "To parse the fonts dataset using `tf.data.experimental.CsvDataset`, you first need to determine the column types for the `record_defaults`. Start by inspecting the first row of one file:"
       ]
     },
     {
@@ -1569,7 +1571,7 @@
         "id": "etyGu8K_ySRz"
       },
       "source": [
-        "Only the first two fields are strings, the rest are ints or floats, and you can get the total number of features by counting the commas:"
+        "Only the first two fields are strings, the rest are integers or floats, and you can get the total number of features by counting the commas:"
       ]
     },
     {
@@ -1590,7 +1592,7 @@
         "id": "YeK2Pw540RNj"
       },
       "source": [
-        "The `CsvDatasaet` constructor can take a list of input files, but reads them sequentially. The first file in the list of CSVs is `AGENCY.csv`:"
+        "The `tf.data.experimental.CsvDataset` constructor can take a list of input files, but reads them sequentially. The first file in the list of CSVs is `AGENCY.csv`:"
       ]
     },
     {
@@ -1610,7 +1612,7 @@
         "id": "EfAX3G8Xywy6"
       },
       "source": [
-        "So when you pass pass the list of files to `CsvDataaset` the records from `AGENCY.csv` are read first:"
+        "So, when you pass the list of files to `CsvDataset`, the records from `AGENCY.csv` are read first:"
       ]
     },
     {
@@ -1647,7 +1649,7 @@
       "source": [
         "To interleave multiple files, use `Dataset.interleave`.\n",
         "\n",
-        "Here's an initial dataset that contains the csv file names: "
+        "Here's an initial dataset that contains the CSV file names: "
       ]
     },
     {
@@ -1696,9 +1698,9 @@
         "id": "B0QB1PtU3WAN"
       },
       "source": [
-        "The `interleave` method takes a `map_func` that creates a child-`Dataset` for each element of the parent-`Dataset`. \n",
+        "The `interleave` method takes a `map_func` that creates a child-`Dataset` for each element of the parent-`Dataset`.\n",
         "\n",
-        "Here, you want to create a `CsvDataset` from each element of the dataset of files:"
+        "Here, you want to create a `tf.data.experimental.CsvDataset` from each element of the dataset of files:"
       ]
     },
     {
@@ -1722,7 +1724,7 @@
         "id": "VxRGdLMB5nRF"
       },
       "source": [
-        "The `Dataset` returned by interleave returns elements by cycling over a number of the child-`Dataset`s. Note, below, how the dataset cycles over `cycle_length)=3` three font files:"
+        "The `Dataset` returned by interleave returns elements by cycling over a number of the child-`Dataset`s. Note, below, how the dataset cycles over `cycle_length=3` three font files:"
       ]
     },
     {
@@ -1749,7 +1751,7 @@
         "\n",
         "for row in font_rows.take(10):\n",
         "  fonts_dict['font_name'].append(row[0].numpy().decode())\n",
-        "  fonts_dict['character'].append(chr(row[2].numpy()))\n",
+        "  fonts_dict['character'].append(chr(int(row[2].numpy())))\n",
         "\n",
         "pd.DataFrame(fonts_dict)"
       ]
@@ -1769,7 +1771,7 @@
         "id": "8BtGHraUApdJ"
       },
       "source": [
-        "Earlier, it was noted that `io.decode_csv` is more efficient when run on a batch of strings.\n",
+        "Earlier, it was noted that `tf.io.decode_csv` is more efficient when run on a batch of strings.\n",
         "\n",
         "It is possible to take advantage of this fact, when using large batch sizes, to improve CSV loading performance (but try [caching](#caching) first)."
       ]
@@ -1859,15 +1861,14 @@
         "id": "aebC1plsMeOi"
       },
       "source": [
-        "For another example of increasing csv performance by using large batches see the [overfit and underfit tutorial](../keras/overfit_and_underfit.ipynb).\n",
+        "For another example of increasing CSV performance by using large batches, refer to the [Overfit and underfit tutorial](../keras/overfit_and_underfit.ipynb).\n",
         "\n",
-        "This sort of approach may work, but consider other options like `cache` and `snapshot`, or re-enncoding your data into a more streamlined format."
+        "This sort of approach may work, but consider other options like `Dataset.cache` and `tf.data.Dataset.snapshot`, or re-encoding your data into a more streamlined format."
       ]
     }
   ],
   "metadata": {
     "colab": {
-      "collapsed_sections": [],
       "name": "csv.ipynb",
       "toc_visible": true
     },
diff --git a/site/en/tutorials/load_data/images.ipynb b/site/en/tutorials/load_data/images.ipynb
index 2c54d3d3412..557666b92f9 100644
--- a/site/en/tutorials/load_data/images.ipynb
+++ b/site/en/tutorials/load_data/images.ipynb
@@ -150,10 +150,8 @@
       "source": [
         "import pathlib\n",
         "dataset_url = \"https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz\"\n",
-        "data_dir = tf.keras.utils.get_file(origin=dataset_url,\n",
-        "                                   fname='flower_photos',\n",
-        "                                   untar=True)\n",
-        "data_dir = pathlib.Path(data_dir)"
+        "archive = tf.keras.utils.get_file(origin=dataset_url, extract=True)\n",
+        "data_dir = pathlib.Path(archive).with_suffix('')"
       ]
     },
     {
@@ -502,7 +500,7 @@
         "\n",
         "For completeness, you will show how to train a simple model using the datasets you have just prepared.\n",
         "\n",
-        "The [Sequential](../../guide/keras/sequential_model.ipynb) model consists of three convolution blocks (`tf.keras.layers.Conv2D`) with a max pooling layer (`tf.keras.layers.MaxPooling2D`) in each of them. There's a fully-connected layer (`tf.keras.layers.Dense`) with 128 units on top of it that is activated by a ReLU activation function (`'relu'`). This model has not been tuned in any way—the goal is to show you the mechanics using the datasets you just created. To learn more about image classification, visit the [Image classification](../images/classification.ipynb) tutorial."
+        "The [Sequential](https://www.tensorflow.org/guide/keras/sequential_model) model consists of three convolution blocks (`tf.keras.layers.Conv2D`) with a max pooling layer (`tf.keras.layers.MaxPooling2D`) in each of them. There's a fully-connected layer (`tf.keras.layers.Dense`) with 128 units on top of it that is activated by a ReLU activation function (`'relu'`). This model has not been tuned in any way—the goal is to show you the mechanics using the datasets you just created. To learn more about image classification, visit the [Image classification](../images/classification.ipynb) tutorial."
       ]
     },
     {
@@ -548,7 +546,7 @@
       "source": [
         "model.compile(\n",
         "  optimizer='adam',\n",
-        "  loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
+        "  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
         "  metrics=['accuracy'])"
       ]
     },
@@ -582,7 +580,7 @@
         "id": "MEtT9YGjSAOK"
       },
       "source": [
-        "Note: You can also write a custom training loop instead of using `Model.fit`. To learn more, visit the [Writing a training loop from scratch](../../guide/keras/writing_a_training_loop_from_scratch.ipynb) tutorial."
+        "Note: You can also write a custom training loop instead of using `Model.fit`. To learn more, visit the [Writing a training loop from scratch](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch) tutorial."
       ]
     },
     {
diff --git a/site/en/tutorials/load_data/pandas_dataframe.ipynb b/site/en/tutorials/load_data/pandas_dataframe.ipynb
index 3ea24498185..b9d0763e068 100644
--- a/site/en/tutorials/load_data/pandas_dataframe.ipynb
+++ b/site/en/tutorials/load_data/pandas_dataframe.ipynb
@@ -92,6 +92,7 @@
       },
       "outputs": [],
       "source": [
+        "import numpy as np\n",
         "import pandas as pd\n",
         "import tensorflow as tf\n",
         "\n",
@@ -205,7 +206,7 @@
         "id": "xNxJ41MafiB-"
       },
       "source": [
-        "If your data has a uniform datatype, or `dtype`, it's possible use a pandas DataFrame anywhere you could use a NumPy array. This works because the `pandas.DataFrame` class supports the `__array__` protocol, and TensorFlow's `tf.convert_to_tensor` function accepts objects that support the protocol.\n",
+        "If your data has a uniform datatype, or `dtype`, it's possible to use a pandas DataFrame anywhere you could use a NumPy array. This works because the `pandas.DataFrame` class supports the `__array__` protocol, and TensorFlow's `tf.convert_to_tensor` function accepts objects that support the protocol.\n",
         "\n",
         "Take the numeric features from the dataset (skip the categorical features for now):"
       ]
@@ -278,7 +279,7 @@
         "id": "u8M3oYHZgH_t"
       },
       "source": [
-        "The first step is to normalize the input ranges. Use a `preprocessing.Normalization` layer for that.\n",
+        "The first step is to normalize the input ranges. Use a `tf.keras.layers.Normalization` layer for that.\n",
         "\n",
         "To set the layer's mean and standard-deviation before running it be sure to call the `Normalization.adapt` method:"
       ]
@@ -292,7 +293,7 @@
       "outputs": [],
       "source": [
         "normalizer = tf.keras.layers.Normalization(axis=-1)\n",
-        "normalizer.adapt(numeric_features)"
+        "normalizer.adapt(np.array(numeric_features))"
       ]
     },
     {
@@ -428,9 +429,9 @@
         "id": "NQcp7kiPF8TP"
       },
       "source": [
-        "When you start dealing with heterogenous data, it is no longer possible to treat the DataFrame as if it were a single array. TensorFlow tensors require that all elements have the same `dtype`.\n",
+        "When you start dealing with heterogeneous data, it is no longer possible to treat the DataFrame as if it were a single array. TensorFlow tensors require that all elements have the same `dtype`.\n",
         "\n",
-        "So, in this case, you need to start treating it as a dictionary of columns, where each column has a uniform dtype. A DataFrame is a lot like a dictionary of arrays, so typically all you need to do is cast the DataFrame to a Python dict. Many important TensorFlow APIs support (nested-)dictionaries of arrays as inputs."
+        "So, in this case, you need to start treating it as a dictionary of columns, where each column has a uniform `dtype`. A DataFrame is a lot like a dictionary of arrays, so typically all you need to do is cast the DataFrame to a Python dict. Many important TensorFlow APIs support (nested-)dictionaries of arrays as inputs."
       ]
     },
     {
@@ -446,79 +447,77 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "U3QDo-jwHYXc"
+        "id": "voDoA447GBC3"
       },
       "outputs": [],
       "source": [
-        "numeric_dict_ds = tf.data.Dataset.from_tensor_slices((dict(numeric_features), target))"
+        "numeric_features_dict = {key: value.to_numpy()[:, tf.newaxis] for key, value in dict(numeric_features).items()}\n",
+        "target_array =  target.to_numpy()[:, tf.newaxis]"
       ]
     },
     {
-      "cell_type": "markdown",
+      "cell_type": "code",
+      "execution_count": null,
       "metadata": {
-        "id": "yyEERK9ldIi_"
+        "id": "U3QDo-jwHYXc"
       },
+      "outputs": [],
       "source": [
-        "Here are the first three examples from that dataset:"
+        "numeric_dict_ds = tf.data.Dataset.from_tensor_slices((numeric_features_dict , target_array))"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "q0tDwk0VdH6D"
+        "id": "HL4Bf1b7M7DT"
       },
       "outputs": [],
       "source": [
-        "for row in numeric_dict_ds.take(3):\n",
-        "  print(row)"
+        "len(numeric_features_dict)"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "DEAM6HAFxlMy"
+        "id": "yyEERK9ldIi_"
       },
       "source": [
-        "### Dictionaries with Keras"
+        "Here are the first three examples from that dataset:"
       ]
     },
     {
-      "cell_type": "markdown",
+      "cell_type": "code",
+      "execution_count": null,
       "metadata": {
-        "id": "dnoyoWLWx07i"
+        "id": "q0tDwk0VdH6D"
       },
+      "outputs": [],
       "source": [
-        "Typically, Keras models and layers expect a single input tensor, but these classes can accept and return nested structures of dictionaries, tuples and tensors. These structures are known as \"nests\" (refer to the `tf.nest` module for details).\n",
-        "\n",
-        "There are two equivalent ways you can write a keras model that accepts a dictionary as input."
+        "for row in numeric_dict_ds.take(3):\n",
+        "  print(row)"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "5xUTrm0apDTr"
+        "id": "dnoyoWLWx07i"
       },
       "source": [
-        "#### 1. The Model-subclass style\n",
+        "Typically, Keras models and layers expect a single input tensor, but these classes can accept and return nested structures of dictionaries, tuples and tensors. These structures are known as \"nests\" (refer to the `tf.nest` module for details).\n",
         "\n",
-        "You write a subclass of `tf.keras.Model` (or `tf.keras.Layer`). You directly handle the inputs, and create the outputs:"
+        "There are two equivalent ways you can write a Keras model that accepts a dictionary as input."
       ]
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
+      "cell_type": "markdown",
       "metadata": {
-        "id": "Zc3HV99CFRWL"
+        "id": "5xUTrm0apDTr"
       },
-      "outputs": [],
       "source": [
-        "  def stack_dict(inputs, fun=tf.stack):\n",
-        "    values = []\n",
-        "    for key in sorted(inputs.keys()):\n",
-        "      values.append(tf.cast(inputs[key], tf.float32))\n",
+        "### 1. The Model-subclass style\n",
         "\n",
-        "    return fun(values, axis=-1)"
+        "You write a subclass of `tf.keras.Model` (or `tf.keras.Layer`). You directly handle the inputs, and create the outputs:"
       ]
     },
     {
@@ -533,7 +532,7 @@
         "class MyModel(tf.keras.Model):\n",
         "  def __init__(self):\n",
         "    # Create all the internal layers in init.\n",
-        "    super().__init__(self)\n",
+        "    super().__init__()\n",
         "\n",
         "    self.normalizer = tf.keras.layers.Normalization(axis=-1)\n",
         "\n",
@@ -544,14 +543,23 @@
         "      tf.keras.layers.Dense(1)\n",
         "    ])\n",
         "\n",
+        "    self.concat = tf.keras.layers.Concatenate(axis=1)\n",
+        "\n",
+        "  def _stack(self, input_dict):\n",
+        "    values = []\n",
+        "    for key, value in sorted(input_dict.items()):\n",
+        "      values.append(value)\n",
+        "\n",
+        "    return self.concat(values)\n",
+        "\n",
         "  def adapt(self, inputs):\n",
-        "    # Stach the inputs and `adapt` the normalization layer.\n",
-        "    inputs = stack_dict(inputs)\n",
+        "    # Stack the inputs and `adapt` the normalization layer.\n",
+        "    inputs = self._stack(inputs)\n",
         "    self.normalizer.adapt(inputs)\n",
         "\n",
         "  def call(self, inputs):\n",
         "    # Stack the inputs\n",
-        "    inputs = stack_dict(inputs)\n",
+        "    inputs = self._stack(inputs)\n",
         "    # Run them through all the layers.\n",
         "    result = self.seq(inputs)\n",
         "\n",
@@ -559,7 +567,7 @@
         "\n",
         "model = MyModel()\n",
         "\n",
-        "model.adapt(dict(numeric_features))\n",
+        "model.adapt(numeric_features_dict)\n",
         "\n",
         "model.compile(optimizer='adam',\n",
         "              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),\n",
@@ -584,7 +592,7 @@
       },
       "outputs": [],
       "source": [
-        "model.fit(dict(numeric_features), target, epochs=5, batch_size=BATCH_SIZE)"
+        "model.fit(numeric_features_dict, target_array, epochs=5, batch_size=BATCH_SIZE)"
       ]
     },
     {
@@ -625,7 +633,7 @@
         "id": "QIIdxIYm13Ik"
       },
       "source": [
-        "#### 2. The Keras functional style"
+        "### 2. The Keras functional style"
       ]
     },
     {
@@ -652,10 +660,13 @@
       },
       "outputs": [],
       "source": [
-        "x = stack_dict(inputs, fun=tf.concat)\n",
+        "xs = [value for key, value in sorted(inputs.items())]\n",
+        "\n",
+        "concat = tf.keras.layers.Concatenate(axis=1)\n",
+        "x = concat(xs)\n",
         "\n",
         "normalizer = tf.keras.layers.Normalization(axis=-1)\n",
-        "normalizer.adapt(stack_dict(dict(numeric_features)))\n",
+        "normalizer.adapt(np.concatenate([value for key, value in sorted(numeric_features_dict.items())], axis=1))\n",
         "\n",
         "x = normalizer(x)\n",
         "x = tf.keras.layers.Dense(10, activation='relu')(x)\n",
@@ -678,7 +689,7 @@
       },
       "outputs": [],
       "source": [
-        "tf.keras.utils.plot_model(model, rankdir=\"LR\", show_shapes=True)"
+        "tf.keras.utils.plot_model(model, rankdir=\"LR\", show_shapes=True,  show_layer_names=True)"
       ]
     },
     {
@@ -698,7 +709,7 @@
       },
       "outputs": [],
       "source": [
-        "model.fit(dict(numeric_features), target, epochs=5, batch_size=BATCH_SIZE)"
+        "model.fit(numeric_features_dict, target, epochs=5, batch_size=BATCH_SIZE)"
       ]
     },
     {
@@ -728,7 +739,7 @@
         "id": "zYQ5fDaRxRWQ"
       },
       "source": [
-        "It you're passing a heterogenous `DataFrame` to Keras, each column may need unique preprocessing. You could do this preprocessing directly in the DataFrame, but for a model to work correctly, inputs always need to be preprocessed the same way. So, the best approach is to build the preprocessing into the model. [Keras preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers) cover many common tasks."
+        "If you're passing a heterogeneous DataFrame to Keras, each column may need unique preprocessing. You could do this preprocessing directly in the DataFrame, but for a model to work correctly, inputs always need to be preprocessed the same way. So, the best approach is to build the preprocessing into the model. [Keras preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers) cover many common tasks."
       ]
     },
     {
@@ -746,9 +757,9 @@
         "id": "C6aVQN4Gw-Va"
       },
       "source": [
-        "In this dataset some of the \"integer\" features in the raw data are actually Categorical indices. These indices are not really ordered numeric values (refer to the <a href=\"https://archive.ics.uci.edu/ml/datasets/heart+Disease\" class=\"external\">the dataset description</a> for details). Because these are unordered they are inapropriate to feed directly to the model; the model would interpret them as being ordered. To use these inputs you'll need to encode them, either as one-hot vectors or embedding vectors. The same applies to string-categorical features.\n",
+        "In this dataset some of the \"integer\" features in the raw data are actually Categorical indices. These indices are not really ordered numeric values (refer to the <a href=\"https://archive.ics.uci.edu/ml/datasets/heart+Disease\" class=\"external\">the dataset description</a> for details). Because these are unordered they are inappropriate to feed directly to the model; the model would interpret them as being ordered. To use these inputs you'll need to encode them, either as one-hot vectors or embedding vectors. The same applies to string-categorical features.\n",
         "\n",
-        "Note: If you have many features that need identical preprocessing it's more efficient to concatenate them together befofre applying the preprocessing.\n",
+        "Note: If you have many features that need identical preprocessing it's more efficient to concatenate them together before applying the preprocessing.\n",
         "\n",
         "Binary features on the other hand do not generally need to be encoded or normalized.\n",
         "\n",
@@ -783,7 +794,7 @@
         "id": "HRcC8WkyamJb"
       },
       "source": [
-        "The next step is to build a preprocessing model that will apply apropriate preprocessing to each to each input and concatenate the results.\n",
+        "The next step is to build a preprocessing model that will apply appropriate preprocessing to each input and concatenate the results.\n",
         "\n",
         "This section uses the [Keras Functional API](https://www.tensorflow.org/guide/keras/functional) to implement  the preprocessing. You start by creating one `tf.keras.Input` for each column of the dataframe:"
       ]
@@ -806,7 +817,7 @@
         "  else:\n",
         "    dtype = tf.float32\n",
         "\n",
-        "  inputs[name] = tf.keras.Input(shape=(), name=name, dtype=dtype)"
+        "  inputs[name] = tf.keras.Input(shape=(1,), name=name, dtype=dtype)"
       ]
     },
     {
@@ -852,9 +863,7 @@
         "\n",
         "for name in binary_feature_names:\n",
         "  inp = inputs[name]\n",
-        "  inp = inp[:, tf.newaxis]\n",
-        "  float_value = tf.cast(inp, tf.float32)\n",
-        "  preprocessed.append(float_value)\n",
+        "  preprocessed.append(inp)\n",
         "\n",
         "preprocessed"
       ]
@@ -879,7 +888,7 @@
       "outputs": [],
       "source": [
         "normalizer = tf.keras.layers.Normalization(axis=-1)\n",
-        "normalizer.adapt(stack_dict(dict(numeric_features)))"
+        "normalizer.adapt(np.concatenate([value for key, value in sorted(numeric_features_dict.items())], axis=1))"
       ]
     },
     {
@@ -899,11 +908,11 @@
       },
       "outputs": [],
       "source": [
-        "numeric_inputs = {}\n",
+        "numeric_inputs = []\n",
         "for name in numeric_feature_names:\n",
-        "  numeric_inputs[name]=inputs[name]\n",
+        "  numeric_inputs.append(inputs[name])\n",
         "\n",
-        "numeric_inputs = stack_dict(numeric_inputs)\n",
+        "numeric_inputs = tf.keras.layers.Concatenate(axis=-1)(numeric_inputs)\n",
         "numeric_normalized = normalizer(numeric_inputs)\n",
         "\n",
         "preprocessed.append(numeric_normalized)\n",
@@ -926,7 +935,7 @@
         "id": "Z3wcFs1oKVao"
       },
       "source": [
-        "To use categorical features you'll first need to encode them into either binary vectors or embeddings. Since these features only contain a small number of categories, convert the inputs directly to one-hot vectors using the `output_mode='one_hot'` option, supported byy both the `tf.keras.layers.StringLookup` and `tf.keras.layers.IntegerLookup` layers.\n",
+        "To use categorical features you'll first need to encode them into either binary vectors or embeddings. Since these features only contain a small number of categories, convert the inputs directly to one-hot vectors using the `output_mode='one_hot'` option, supported by both the `tf.keras.layers.StringLookup` and `tf.keras.layers.IntegerLookup` layers.\n",
         "\n",
         "Here is an example of how these layers work:"
       ]
@@ -985,7 +994,7 @@
         "  else:\n",
         "    lookup = tf.keras.layers.IntegerLookup(vocabulary=vocab, output_mode='one_hot')\n",
         "\n",
-        "  x = inputs[name][:, tf.newaxis]\n",
+        "  x = inputs[name]\n",
         "  x = lookup(x)\n",
         "  preprocessed.append(x)"
       ]
@@ -1036,8 +1045,8 @@
       },
       "outputs": [],
       "source": [
-        "preprocesssed_result = tf.concat(preprocessed, axis=-1)\n",
-        "preprocesssed_result"
+        "preprocessed_result = tf.keras.layers.Concatenate(axis=1)(preprocessed)\n",
+        "preprocessed_result"
       ]
     },
     {
@@ -1057,7 +1066,7 @@
       },
       "outputs": [],
       "source": [
-        "preprocessor = tf.keras.Model(inputs, preprocesssed_result)"
+        "preprocessor = tf.keras.Model(inputs, preprocessed_result)"
       ]
     },
     {
@@ -1068,7 +1077,7 @@
       },
       "outputs": [],
       "source": [
-        "tf.keras.utils.plot_model(preprocessor, rankdir=\"LR\", show_shapes=True)"
+        "tf.keras.utils.plot_model(preprocessor, rankdir=\"LR\", show_shapes=True,  show_layer_names=True)"
       ]
     },
     {
@@ -1183,6 +1192,17 @@
         "                metrics=['accuracy'])"
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "i_Z2C2ZcZ3oC"
+      },
+      "outputs": [],
+      "source": [
+        "tf.keras.utils.plot_model(model, show_shapes=True,  show_layer_names=True)"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -1258,9 +1278,7 @@
   ],
   "metadata": {
     "colab": {
-      "collapsed_sections": [],
       "name": "pandas_dataframe.ipynb",
-      "provenance": [],
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/tutorials/load_data/text.ipynb b/site/en/tutorials/load_data/text.ipynb
index a30e24fea19..35004db9b94 100644
--- a/site/en/tutorials/load_data/text.ipynb
+++ b/site/en/tutorials/load_data/text.ipynb
@@ -71,7 +71,7 @@
         "This tutorial demonstrates two ways to load and preprocess text.\n",
         "\n",
         "- First, you will use Keras utilities and preprocessing layers. These include `tf.keras.utils.text_dataset_from_directory` to turn data into a `tf.data.Dataset` and `tf.keras.layers.TextVectorization` for data standardization, tokenization, and vectorization. If you are new to TensorFlow, you should start with these.\n",
-        "- Then, you will use lower-level utilities like `tf.data.TextLineDataset` to load text files, and [TensorFlow Text](https://www.tensorflow.org/text) APIs, such as `text.UnicodeScriptTokenizer` and `text.case_fold_utf8`, to preprocess the data for finer-grain control."
+        "- Then, you will use lower-level utilities like `tf.data.TextLineDataset` to load text files, `tf.lookup` for custom in-model lookup tables, and [TensorFlow Text](https://www.tensorflow.org/text) APIs, such as `text.UnicodeScriptTokenizer` and `text.case_fold_utf8`, to preprocess the data for finer-grain control."
       ]
     },
     {
@@ -82,11 +82,7 @@
       },
       "outputs": [],
       "source": [
-        "# Be sure you're using the stable versions of both `tensorflow` and\n",
-        "# `tensorflow-text`, for binary compatibility.\n",
-        "!pip uninstall -y tf-nightly keras-nightly\n",
-        "!pip install tensorflow\n",
-        "!pip install tensorflow-text"
+        "!pip install \"tensorflow-text==2.13.*\""
       ]
     },
     {
@@ -100,6 +96,7 @@
         "import collections\n",
         "import pathlib\n",
         "\n",
+        "import matplotlib.pyplot as plt\n",
         "import tensorflow as tf\n",
         "\n",
         "from tensorflow.keras import layers\n",
@@ -122,6 +119,18 @@
         "As a first example, you will download a dataset of programming questions from Stack Overflow. Each question (_\"How do I sort a dictionary by value?\"_) is labeled with exactly one tag (`Python`, `CSharp`, `JavaScript`, or `Java`). Your task is to develop a model that predicts the tag for a question. This is an example of multi-class classification—an important and widely applicable kind of machine learning problem."
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bU4PRq7Z81B0"
+      },
+      "source": [
+        "To implement this task, you'll start with the simplest tools:\n",
+        "\n",
+        "* `keras.utils.text_datasaet_from_directory`: for loading text-file examples.\n",
+        "* `keras.layers.TextVectorization`: for converting strings to token indices.\n"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -208,7 +217,7 @@
       "source": [
         "### Load the dataset\n",
         "\n",
-        "Next, you will load the data off disk and prepare it into a format suitable for training. To do so, you will use the `tf.keras.utils.text_dataset_from_directory` utility to create a labeled `tf.data.Dataset`. If you're new to `tf.data`, it's a powerful collection of tools for building input pipelines. (Learn more in the [tf.data: Build TensorFlow input pipelines](../../guide/data.ipynb) guide.)\n",
+        "Next, you will load the data off-disk and prepare it into a format suitable for training. To do so, you will use the `tf.keras.utils.text_dataset_from_directory` utility to create a labeled `tf.data.Dataset`. If you're new to `tf.data`, it's a powerful collection of tools for building input pipelines. (Learn more in the [tf.data: Build TensorFlow input pipelines](../../guide/data.ipynb) guide.)\n",
         "\n",
         "The `tf.keras.utils.text_dataset_from_directory` API expects a directory structure as follows:\n",
         "\n",
@@ -353,6 +362,44 @@
         "    batch_size=batch_size)"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "NHuAF8hYfP5Z"
+      },
+      "source": [
+        "### Configure the datasets for performance\n",
+        "\n",
+        "These are two important methods you should use when loading data to make sure that I/O does not become blocking.\n",
+        "\n",
+        "- `Dataset.cache` keeps data in memory after it's loaded off-disk. This will ensure the dataset does not become a bottleneck while training your model. If your dataset is too large to fit into memory, you can also use this method to create a performant on-disk cache, which is more efficient to read than many small files.\n",
+        "- `Dataset.prefetch` overlaps data preprocessing and model execution while training.\n",
+        "\n",
+        "You can learn more about both methods, as well as how to cache data to disk in the *Prefetching* section of the [Better performance with the tf.data API](../../guide/data_performance.ipynb) guide."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "h14NxXx13WRD"
+      },
+      "outputs": [],
+      "source": [
+        "raw_train_ds = raw_train_ds.cache().prefetch(buffer_size=tf.data.AUTOTUNE)\n",
+        "raw_val_ds = raw_val_ds.cache().prefetch(buffer_size=tf.data.AUTOTUNE)\n",
+        "raw_test_ds = raw_test_ds.prefetch(buffer_size=tf.data.AUTOTUNE)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8Ph91k8n3yPB"
+      },
+      "source": [
+        "This first example will keep things simple by integrating the text processing into the model. But you may be able to increase performance by moving the text processing into the dataset pipeline, this is demonstrated in [Example 2](#example2)"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -466,41 +513,24 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "RngfPyArSsvM"
-      },
-      "outputs": [],
-      "source": [
-        "def binary_vectorize_text(text, label):\n",
-        "  text = tf.expand_dims(text, -1)\n",
-        "  return binary_vectorize_layer(text), label"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "_1W54wf0LhQ0"
+        "id": "Vi_sElMiSmXe"
       },
       "outputs": [],
       "source": [
-        "def int_vectorize_text(text, label):\n",
-        "  text = tf.expand_dims(text, -1)\n",
-        "  return int_vectorize_layer(text), label"
+        "# Retrieve a batch (of 32 reviews and labels) from the dataset.\n",
+        "text_batch, label_batch = next(iter(raw_train_ds))\n",
+        "first_question, first_label = text_batch[0], label_batch[0]\n",
+        "print(\"Question:\", first_question)\n",
+        "print(\"Label:\", first_label)"
       ]
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
+      "cell_type": "markdown",
       "metadata": {
-        "id": "Vi_sElMiSmXe"
+        "id": "a2nTgt9e1KjK"
       },
-      "outputs": [],
       "source": [
-        "# Retrieve a batch (of 32 reviews and labels) from the dataset.\n",
-        "text_batch, label_batch = next(iter(raw_train_ds))\n",
-        "first_question, first_label = text_batch[0], label_batch[0]\n",
-        "print(\"Question\", first_question)\n",
-        "print(\"Label\", first_label)"
+        "The binary vectorization layer returns a multi-hot vector, with a 1 in the location for each token that was in the input string."
       ]
     },
     {
@@ -512,7 +542,10 @@
       "outputs": [],
       "source": [
         "print(\"'binary' vectorized question:\",\n",
-        "      binary_vectorize_text(first_question, first_label)[0])"
+        "      list(binary_vectorize_layer(first_question).numpy()))\n",
+        "\n",
+        "plt.plot(binary_vectorize_layer(first_question).numpy())\n",
+        "plt.xlim(0,1000)"
       ]
     },
     {
@@ -524,7 +557,7 @@
       "outputs": [],
       "source": [
         "print(\"'int' vectorized question:\",\n",
-        "      int_vectorize_text(first_question, first_label)[0])"
+        "      int_vectorize_layer(first_question).numpy())"
       ]
     },
     {
@@ -554,108 +587,48 @@
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "0kHgPE_YwHvp"
-      },
-      "source": [
-        "You are nearly ready to train your model.\n",
-        "\n",
-        "As a final preprocessing step, you will apply the `TextVectorization` layers you created earlier to the training, validation, and test sets:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "46LeHmnD55wJ"
-      },
-      "outputs": [],
-      "source": [
-        "binary_train_ds = raw_train_ds.map(binary_vectorize_text)\n",
-        "binary_val_ds = raw_val_ds.map(binary_vectorize_text)\n",
-        "binary_test_ds = raw_test_ds.map(binary_vectorize_text)\n",
-        "\n",
-        "int_train_ds = raw_train_ds.map(int_vectorize_text)\n",
-        "int_val_ds = raw_val_ds.map(int_vectorize_text)\n",
-        "int_test_ds = raw_test_ds.map(int_vectorize_text)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "NHuAF8hYfP5Z"
+        "id": "NYGb7z_bfpGm"
       },
       "source": [
-        "### Configure the dataset for performance\n",
-        "\n",
-        "These are two important methods you should use when loading data to make sure that I/O does not become blocking.\n",
-        "\n",
-        "- `Dataset.cache` keeps data in memory after it's loaded off disk. This will ensure the dataset does not become a bottleneck while training your model. If your dataset is too large to fit into memory, you can also use this method to create a performant on-disk cache, which is more efficient to read than many small files.\n",
-        "- `Dataset.prefetch` overlaps data preprocessing and model execution while training.\n",
+        "### Train the model\n",
         "\n",
-        "You can learn more about both methods, as well as how to cache data to disk in the *Prefetching* section of the [Better performance with the tf.data API](../../guide/data_performance.ipynb) guide."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "PabA9DFIfSz7"
-      },
-      "outputs": [],
-      "source": [
-        "AUTOTUNE = tf.data.AUTOTUNE\n",
+        "It's time to create your neural network.\n",
         "\n",
-        "def configure_dataset(dataset):\n",
-        "  return dataset.cache().prefetch(buffer_size=AUTOTUNE)"
+        "For the `'binary'` vectorized data, define a simple bag-of-words linear model, then configure and train it:"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "J8GcJLvb3JH0"
+        "id": "L9D0Gngw8Cwr"
       },
       "outputs": [],
       "source": [
-        "binary_train_ds = configure_dataset(binary_train_ds)\n",
-        "binary_val_ds = configure_dataset(binary_val_ds)\n",
-        "binary_test_ds = configure_dataset(binary_test_ds)\n",
+        "binary_model = tf.keras.Sequential([\n",
+        "    binary_vectorize_layer,\n",
+        "    layers.Dense(4)])\n",
         "\n",
-        "int_train_ds = configure_dataset(int_train_ds)\n",
-        "int_val_ds = configure_dataset(int_val_ds)\n",
-        "int_test_ds = configure_dataset(int_test_ds)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "NYGb7z_bfpGm"
-      },
-      "source": [
-        "### Train the model\n",
-        "\n",
-        "It's time to create your neural network.\n",
+        "binary_model.compile(\n",
+        "    loss=losses.SparseCategoricalCrossentropy(from_logits=True),\n",
+        "    optimizer='adam',\n",
+        "    metrics=['accuracy'])\n",
         "\n",
-        "For the `'binary'` vectorized data, define a simple bag-of-words linear model, then configure and train it:"
+        "tf.keras.utils.plot_model(binary_model, show_shapes=True)"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "2q8iAU-VMzaN"
+        "id": "n12Cnhtc_bct"
       },
       "outputs": [],
       "source": [
-        "binary_model = tf.keras.Sequential([layers.Dense(4)])\n",
-        "\n",
-        "binary_model.compile(\n",
-        "    loss=losses.SparseCategoricalCrossentropy(from_logits=True),\n",
-        "    optimizer='adam',\n",
-        "    metrics=['accuracy'])\n",
+        "bin_history = binary_model.fit(\n",
+        "    raw_train_ds, validation_data=raw_val_ds, epochs=10)\n",
         "\n",
-        "history = binary_model.fit(\n",
-        "    binary_train_ds, validation_data=binary_val_ds, epochs=10)"
+        "print()"
       ]
     },
     {
@@ -671,17 +644,24 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "5ztw2XH_LbVz"
+        "id": "nxLCInw7AUFr"
       },
       "outputs": [],
       "source": [
-        "def create_model(vocab_size, num_labels):\n",
-        "  model = tf.keras.Sequential([\n",
+        "def create_model(vocab_size, num_labels, vectorizer=None):\n",
+        "  my_layers =[]\n",
+        "  if vectorizer is not None:\n",
+        "    my_layers = [vectorizer]\n",
+        "\n",
+        "  my_layers.extend([\n",
         "      layers.Embedding(vocab_size, 64, mask_zero=True),\n",
+        "      layers.Dropout(0.5),\n",
         "      layers.Conv1D(64, 5, padding=\"valid\", activation=\"relu\", strides=2),\n",
         "      layers.GlobalMaxPooling1D(),\n",
         "      layers.Dense(num_labels)\n",
         "  ])\n",
+        "\n",
+        "  model = tf.keras.Sequential(my_layers)\n",
         "  return model"
       ]
     },
@@ -689,169 +669,130 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "s9rG1cFRL31Z"
+        "id": "_igpRY7p8NTK"
       },
       "outputs": [],
       "source": [
         "# `vocab_size` is `VOCAB_SIZE + 1` since `0` is used additionally for padding.\n",
-        "int_model = create_model(vocab_size=VOCAB_SIZE + 1, num_labels=4)\n",
-        "int_model.compile(\n",
-        "    loss=losses.SparseCategoricalCrossentropy(from_logits=True),\n",
-        "    optimizer='adam',\n",
-        "    metrics=['accuracy'])\n",
-        "history = int_model.fit(int_train_ds, validation_data=int_val_ds, epochs=5)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "x3J9Eeuv97zE"
-      },
-      "source": [
-        "Compare the two models:"
+        "int_model = create_model(vocab_size=VOCAB_SIZE + 1, num_labels=4, vectorizer=int_vectorize_layer)\n",
+        "\n",
+        "tf.keras.utils.plot_model(int_model, show_shapes=True)"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "N8ViDXw99v_u"
+        "id": "C4UA3h8GAUFs"
       },
       "outputs": [],
       "source": [
-        "print(\"Linear model on binary vectorized data:\")\n",
-        "print(binary_model.summary())"
+        "\n",
+        "int_model.compile(\n",
+        "    loss=losses.SparseCategoricalCrossentropy(from_logits=True),\n",
+        "    optimizer='adam',\n",
+        "    metrics=['accuracy'])\n",
+        "int_history = int_model.fit(raw_train_ds, validation_data=raw_val_ds, epochs=10)"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "P9BOeoCwborD"
+        "id": "hHTVwxNAovn7"
       },
       "outputs": [],
       "source": [
-        "print(\"ConvNet model on int vectorized data:\")\n",
-        "print(int_model.summary())"
+        "loss = plt.plot(bin_history.epoch, bin_history.history['loss'], label='bin-loss')\n",
+        "plt.plot(bin_history.epoch, bin_history.history['val_loss'], '--', color=loss[0].get_color(), label='bin-val_loss')\n",
+        "\n",
+        "loss = plt.plot(int_history.epoch, int_history.history['loss'], label='int-loss')\n",
+        "plt.plot(int_history.epoch, int_history.history['val_loss'], '--', color=loss[0].get_color(), label='int-val_loss')\n",
+        "\n",
+        "plt.legend()\n",
+        "plt.xlabel('Epoch')\n",
+        "plt.ylabel('CE/token')"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "zYYW9tUdCtTy"
+        "id": "0kHgPE_YwHvp"
       },
       "source": [
-        "Evaluate both models on the test data:"
+        "You are nearly ready to train your model.\n",
+        "\n",
+        "As a final preprocessing step, you will apply the `TextVectorization` layers you created earlier to the training, validation, and test sets:"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "5dTc4nZqf7fK"
+        "id": "46LeHmnD55wJ"
       },
       "outputs": [],
       "source": [
-        "binary_loss, binary_accuracy = binary_model.evaluate(binary_test_ds)\n",
-        "int_loss, int_accuracy = int_model.evaluate(int_test_ds)\n",
+        "binary_train_ds = raw_train_ds.map(lambda x,y: (binary_vectorize_layer(x), y))\n",
+        "binary_val_ds = raw_val_ds.map(lambda x,y: (binary_vectorize_layer(x), y))\n",
+        "binary_test_ds = raw_test_ds.map(lambda x,y: (binary_vectorize_layer(x), y))\n",
         "\n",
-        "print(\"Binary model accuracy: {:2.2%}\".format(binary_accuracy))\n",
-        "print(\"Int model accuracy: {:2.2%}\".format(int_accuracy))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "F9dhj8Hey9DS"
-      },
-      "source": [
-        "Note: This example dataset represents a rather simple classification problem. More complex datasets and problems bring out subtle but significant differences in preprocessing strategies and model architectures. Be sure to try out different hyperparameters and epochs to compare various approaches."
+        "int_train_ds = raw_train_ds.map(lambda x,y: (int_vectorize_layer(x), y))\n",
+        "int_val_ds = raw_val_ds.map(lambda x,y: (int_vectorize_layer(x), y))\n",
+        "int_test_ds = raw_test_ds.map(lambda x,y: (int_vectorize_layer(x), y))"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "h9GaXTsIgP-3"
+        "id": "NZo_OTxD3ItL"
       },
       "source": [
-        "### Export the model\n",
-        "\n",
-        "In the code above, you applied `tf.keras.layers.TextVectorization` to the dataset before feeding text to the model. If you want to make your model capable of processing raw strings (for example, to simplify deploying it), you can include the `TextVectorization` layer inside your model.\n",
-        "\n",
-        "To do so, you can create a new model using the weights you have just trained:"
+        "### Export the model"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "_bRe3KX8gRCX"
+        "id": "HYteWp9fykbP"
       },
       "outputs": [],
       "source": [
-        "export_model = tf.keras.Sequential(\n",
-        "    [binary_vectorize_layer, binary_model,\n",
-        "     layers.Activation('sigmoid')])\n",
-        "\n",
-        "export_model.compile(\n",
-        "    loss=losses.SparseCategoricalCrossentropy(from_logits=False),\n",
-        "    optimizer='adam',\n",
-        "    metrics=['accuracy'])\n",
-        "\n",
-        "# Test it with `raw_test_ds`, which yields raw strings\n",
-        "loss, accuracy = export_model.evaluate(raw_test_ds)\n",
-        "print(\"Accuracy: {:2.2%}\".format(binary_accuracy))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "m2eqTVBP4DUN"
-      },
-      "source": [
-        "Now, your model can take raw strings as input and predict a score for each label using `Model.predict`. Define a function to find the label with the maximum score:"
+        "binary_model.export('bin.tf')"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "GU53uRXz45iO"
+        "id": "OPD0WE_vy-dT"
       },
       "outputs": [],
       "source": [
-        "def get_string_labels(predicted_scores_batch):\n",
-        "  predicted_int_labels = tf.argmax(predicted_scores_batch, axis=1)\n",
-        "  predicted_labels = tf.gather(raw_train_ds.class_names, predicted_int_labels)\n",
-        "  return predicted_labels"
+        "loaded = tf.saved_model.load('bin.tf')"
       ]
     },
     {
-      "cell_type": "markdown",
+      "cell_type": "code",
+      "execution_count": null,
       "metadata": {
-        "id": "yqnWc7Nn5eou"
+        "id": "xFoBg53DzE2s"
       },
+      "outputs": [],
       "source": [
-        "### Run inference on new data"
+        "binary_model.predict(['How do you sort a list?'])"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "BOR2MupW1_zS"
+        "id": "Cx5aIBm3zVQ0"
       },
       "outputs": [],
       "source": [
-        "inputs = [\n",
-        "    \"how do I extract keys from a dict into a list?\",  # 'python'\n",
-        "    \"debug public static void main(string[] args) {...}\",  # 'java'\n",
-        "]\n",
-        "predicted_scores = export_model.predict(inputs)\n",
-        "predicted_labels = get_string_labels(predicted_scores)\n",
-        "for input, label in zip(inputs, predicted_labels):\n",
-        "  print(\"Question: \", input)\n",
-        "  print(\"Predicted label: \", label.numpy())"
+        "loaded.serve(tf.constant(['How do you sort a list?'])).numpy()"
       ]
     },
     {
@@ -860,11 +801,12 @@
         "id": "0QDVfii_4slI"
       },
       "source": [
+        "\n",
         "Including the text preprocessing logic inside your model enables you to export a model for production that simplifies deployment, and reduces the potential for [train/test skew](https://developers.google.com/machine-learning/guides/rules-of-ml#training-serving_skew).\n",
         "\n",
         "There is a performance difference to keep in mind when choosing where to apply `tf.keras.layers.TextVectorization`. Using it outside of your model enables you to do asynchronous CPU processing and buffering of your data when training on GPU. So, if you're training your model on the GPU, you probably want to go with this option to get the best performance while developing your model, then switch to including the `TextVectorization` layer inside your model when you're ready to prepare for deployment.\n",
         "\n",
-        "Visit the [Save and load models](../tutorials/keras/save_and_load.ipynb) tutorial to learn more about saving models."
+        "Visit the [Save and load models](../keras/save_and_load.ipynb) tutorial to learn more about saving models."
       ]
     },
     {
@@ -873,7 +815,9 @@
         "id": "p4cvuFzavTRy"
       },
       "source": [
-        "## Example 2: Predict the author of Iliad translations\n"
+        "<a name=\"example2\">\n",
+        "\n",
+        "## Example 2: Predict the author of Iliad translations"
       ]
     },
     {
@@ -885,6 +829,21 @@
         "The following provides an example of using `tf.data.TextLineDataset` to load examples from text files, and [TensorFlow Text](https://www.tensorflow.org/text) to preprocess the data. You will use three different English translations of the same work, Homer's Iliad, and train a model to identify the translator given a single line of text."
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "cu2ruJMG9vnY"
+      },
+      "source": [
+        "To implement this task you'll use some lower level tools.\n",
+        "\n",
+        "* You'll use `tf.data.TextLineDataset` to load text-lines from files.\n",
+        "* You'll implement your own version of `keras.layers.TextVectorization` using:\n",
+        "  * `text.UnicodeScriptTokenizer` - to convert strings to tokens.\n",
+        "  * `tf.lookup.StaticVocabularyTable` - to convert tokens to integer ids.\n",
+        "* You'll maximize performance by placing the text processing in the dataset pipeline, so it can run in parallel with the model training."
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -1031,7 +990,7 @@
       "source": [
         "### Prepare the dataset for training\n",
         "\n",
-        "Instead of using `tf.keras.layers.TextVectorization` to preprocess the text dataset, you will now use the TensorFlow Text APIs to standardize and tokenize the data, build a vocabulary and use `tf.lookup.StaticVocabularyTable` to map tokens to integers to feed to the model. (Learn more about [TensorFlow Text](https://www.tensorflow.org/text)).\n",
+        "Instead of using `tf.keras.layers.TextVectorization` to preprocess the text dataset, you will now use the lower-level TensorFlow Text APIs to standardize and tokenize the data, build a vocabulary and use `tf.lookup.StaticVocabularyTable` to map tokens to integers to feed to the model. (Learn more about [TensorFlow Text](https://www.tensorflow.org/text)).\n",
         "\n",
         "Define a function to convert the text to lower-case and tokenize it:\n",
         "\n",
@@ -1043,24 +1002,34 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "v4DpQW-Y12rm"
+        "id": "gX6opNxnjgkH"
       },
       "outputs": [],
       "source": [
-        "tokenizer = tf_text.UnicodeScriptTokenizer()"
+        "class MyTokenizer(tf.keras.layers.Layer):\n",
+        "  def __init__(self):\n",
+        "    super().__init__()\n",
+        "    self.tokenizer = tf_text.UnicodeScriptTokenizer()\n",
+        "\n",
+        "  def call(self, text):\n",
+        "    lower_case = tf_text.case_fold_utf8(text)\n",
+        "    result = self.tokenizer.tokenize(lower_case)\n",
+        "    # If you pass a batch of strings, it will return a RaggedTensor.\n",
+        "    if isinstance(result, tf.RaggedTensor):\n",
+        "      # Convert to dense 0-padded.\n",
+        "      result = result.to_tensor()\n",
+        "    return result"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "pz8xEj0ugu51"
+        "id": "v4DpQW-Y12rm"
       },
       "outputs": [],
       "source": [
-        "def tokenize(text, unused_label):\n",
-        "  lower_case = tf_text.case_fold_utf8(text)\n",
-        "  return tokenizer.tokenize(lower_case)"
+        "tokenizer = MyTokenizer()"
       ]
     },
     {
@@ -1071,7 +1040,8 @@
       },
       "outputs": [],
       "source": [
-        "tokenized_ds = all_labeled_data.map(tokenize)"
+        "tokenized_ds = all_labeled_data.map(lambda text, label: (tokenizer(text), label))\n",
+        "tokenized_ds"
       ]
     },
     {
@@ -1087,12 +1057,16 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "g2mkWri7LiGq"
+        "id": "S0cWZcWjkQAc"
       },
       "outputs": [],
       "source": [
-        "for text_batch in tokenized_ds.take(5):\n",
-        "  print(\"Tokens: \", text_batch.numpy())"
+        "for tokens, label in tokenized_ds.take(1):\n",
+        "  break\n",
+        "\n",
+        "print(tokens)\n",
+        "print()\n",
+        "print(label)"
       ]
     },
     {
@@ -1108,23 +1082,22 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "YkHtbGnDh6mg"
+        "id": "FqYVv8LMnHKW"
       },
       "outputs": [],
       "source": [
-        "tokenized_ds = configure_dataset(tokenized_ds)\n",
+        "tokenized_ds = tokenized_ds.cache().prefetch(tf.data.AUTOTUNE)\n",
+        "\n",
+        "vocab_count = collections.Counter()\n",
+        "for toks, labels in tokenized_ds.ragged_batch(1000):\n",
+        "  toks = tf.reshape(toks, [-1])\n",
+        "  for tok in toks.numpy():\n",
+        "    vocab_count[tok] += 1\n",
         "\n",
-        "vocab_dict = collections.defaultdict(lambda: 0)\n",
-        "for toks in tokenized_ds.as_numpy_iterator():\n",
-        "  for tok in toks:\n",
-        "    vocab_dict[tok] += 1\n",
+        "vocab = [tok for tok, count in vocab_count.most_common(VOCAB_SIZE)]\n",
         "\n",
-        "vocab = sorted(vocab_dict.items(), key=lambda x: x[1], reverse=True)\n",
-        "vocab = [token for token, count in vocab]\n",
-        "vocab = vocab[:VOCAB_SIZE]\n",
-        "vocab_size = len(vocab)\n",
-        "print(\"Vocab size: \", vocab_size)\n",
-        "print(\"First five vocab entries:\", vocab[:5])"
+        "print(\"First five vocab entries:\", vocab[:5])\n",
+        "print()"
       ]
     },
     {
@@ -1133,25 +1106,74 @@
         "id": "PyKSsaNAKi17"
       },
       "source": [
-        "To convert the tokens into integers, use the `vocab` set to create a `tf.lookup.StaticVocabularyTable`. You will map tokens to integers in the range [`2`, `vocab_size + 2`]. As with the `TextVectorization` layer, `0` is reserved to denote padding and `1` is reserved to denote an out-of-vocabulary (OOV) token."
+        "To convert the tokens into integers, use the `vocab` set to create a `tf.lookup.StaticVocabularyTable`. You will map tokens to integers with `0` reserved for padding, and `n+1` reserved to denote an out-of-vocabulary (OOV) token."
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "kCBo2yFHD7y6"
+        "id": "UYB3t-jUhdYB"
       },
       "outputs": [],
       "source": [
-        "keys = vocab\n",
-        "values = range(2, len(vocab) + 2)  # Reserve `0` for padding, `1` for OOV tokens.\n",
+        "class MyVocabTable(tf.keras.layers.Layer):\n",
+        "  def __init__(self, vocab):\n",
+        "    super().__init__()\n",
+        "    self.keys = [''] + vocab\n",
+        "    self.values = range(len(self.keys))\n",
         "\n",
-        "init = tf.lookup.KeyValueTensorInitializer(\n",
-        "    keys, values, key_dtype=tf.string, value_dtype=tf.int64)\n",
+        "    self.init = tf.lookup.KeyValueTensorInitializer(\n",
+        "        self.keys, self.values, key_dtype=tf.string, value_dtype=tf.int64)\n",
         "\n",
-        "num_oov_buckets = 1\n",
-        "vocab_table = tf.lookup.StaticVocabularyTable(init, num_oov_buckets)"
+        "    num_oov_buckets = 1\n",
+        "\n",
+        "    self.table = tf.lookup.StaticVocabularyTable(self.init, num_oov_buckets)\n",
+        "\n",
+        "  def call(self, x):\n",
+        "    result = self.table.lookup(x)\n",
+        "    return result"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "laUBoirZ55Sp"
+      },
+      "source": [
+        "Try it on a dummy vocabulary:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "kCBo2yFHD7y62"
+      },
+      "outputs": [],
+      "source": [
+        "vocab_table = MyVocabTable(['a','b','c'])\n",
+        "vocab_table(tf.constant([''] + list('abcdefghi')))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yvXh1oGo6CEq"
+      },
+      "source": [
+        "Create one for the real vocabulary:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "zPsRw1UD6u-q"
+      },
+      "outputs": [],
+      "source": [
+        "vocab_table = MyVocabTable(vocab)"
       ]
     },
     {
@@ -1160,22 +1182,21 @@
         "id": "Z5F-EiBpOADE"
       },
       "source": [
-        "Finally, define a function to standardize, tokenize and vectorize the dataset using the tokenizer and lookup table:"
+        "Finally, define a layer to standardize, tokenize and vectorize the dataset using the tokenizer and lookup table:"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "HcIQ7LOTh6eT"
+        "id": "8ClQM4xxpo-s"
       },
       "outputs": [],
       "source": [
-        "def preprocess_text(text, label):\n",
-        "  standardized = tf_text.case_fold_utf8(text)\n",
-        "  tokenized = tokenizer.tokenize(standardized)\n",
-        "  vectorized = vocab_table.lookup(tokenized)\n",
-        "  return vectorized, label"
+        "preprocess_text = tf.keras.Sequential([\n",
+        "    tokenizer,\n",
+        "    vocab_table\n",
+        "])"
       ]
     },
     {
@@ -1197,7 +1218,7 @@
       "source": [
         "example_text, example_label = next(iter(all_labeled_data))\n",
         "print(\"Sentence: \", example_text.numpy())\n",
-        "vectorized_text, example_label = preprocess_text(example_text, example_label)\n",
+        "vectorized_text = preprocess_text(example_text)\n",
         "print(\"Vectorized sentence: \", vectorized_text.numpy())"
       ]
     },
@@ -1207,7 +1228,7 @@
         "id": "p9qHM0v8k_Mg"
       },
       "source": [
-        "Now run the preprocess function on the dataset using `Dataset.map`:"
+        "Now create a dataset pipeline that will process the text on the fly using `Dataset.map`:"
       ]
     },
     {
@@ -1218,7 +1239,13 @@
       },
       "outputs": [],
       "source": [
-        "all_encoded_data = all_labeled_data.map(preprocess_text)"
+        "all_encoded_data = all_labeled_data.map(lambda text, labels:(preprocess_text(text), labels))\n",
+        "\n",
+        "for ids, label in all_encoded_data.take(1):\n",
+        "  break\n",
+        "\n",
+        "print(\"Ids: \", ids.numpy())\n",
+        "print(\"Label: \", label.numpy())"
       ]
     },
     {
@@ -1249,20 +1276,8 @@
       },
       "outputs": [],
       "source": [
-        "train_data = all_encoded_data.skip(VALIDATION_SIZE).shuffle(BUFFER_SIZE)\n",
-        "validation_data = all_encoded_data.take(VALIDATION_SIZE)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "qTP0IwHBCn0Q"
-      },
-      "outputs": [],
-      "source": [
-        "train_data = train_data.padded_batch(BATCH_SIZE)\n",
-        "validation_data = validation_data.padded_batch(BATCH_SIZE)"
+        "train_data = all_encoded_data.skip(VALIDATION_SIZE).shuffle(BUFFER_SIZE).padded_batch(BATCH_SIZE)\n",
+        "validation_data = all_encoded_data.take(VALIDATION_SIZE).padded_batch(BATCH_SIZE)"
       ]
     },
     {
@@ -1294,53 +1309,69 @@
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "UI4I6_Sa0vWu"
+        "id": "h44Ox11OYLP-"
       },
       "source": [
-        "Since you use `0` for padding and `1` for out-of-vocabulary (OOV) tokens, the vocabulary size has increased by two:"
+        "Configure the datasets for better performance as before:"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "u21LlkO8QGRX"
+        "id": "BpT0b_7mYRXV"
       },
       "outputs": [],
       "source": [
-        "vocab_size += 2"
+        "train_data = train_data.prefetch(tf.data.AUTOTUNE)\n",
+        "validation_data = validation_data.prefetch(tf.data.AUTOTUNE)"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "h44Ox11OYLP-"
+        "id": "K8SUhGFNsmRi"
       },
       "source": [
-        "Configure the datasets for better performance as before:"
+        "### Train the model\n",
+        "\n",
+        "You can train a model on this dataset as before:"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "UI4I6_Sa0vWu"
+      },
+      "source": [
+        "Since this text vectorization adds `0` for padding and `n+1` for out-of-vocabulary (OOV) tokens, the vocabulary size has increased by two:"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "BpT0b_7mYRXV"
+        "id": "ClUFxHh47dtJ"
       },
       "outputs": [],
       "source": [
-        "train_data = configure_dataset(train_data)\n",
-        "validation_data = configure_dataset(validation_data)"
+        "model = create_model(vocab_size=VOCAB_SIZE+2, num_labels=3)\n",
+        "\n",
+        "model.compile(\n",
+        "    optimizer='adam',\n",
+        "    loss=losses.SparseCategoricalCrossentropy(from_logits=True),\n",
+        "    metrics=['accuracy'])"
       ]
     },
     {
-      "cell_type": "markdown",
+      "cell_type": "code",
+      "execution_count": null,
       "metadata": {
-        "id": "K8SUhGFNsmRi"
+        "id": "xHpqtP6b7fPl"
       },
+      "outputs": [],
       "source": [
-        "### Train the model\n",
-        "\n",
-        "You can train a model on this dataset as before:"
+        "tf.keras.utils.plot_model(model, show_shapes=True)"
       ]
     },
     {
@@ -1351,13 +1382,6 @@
       },
       "outputs": [],
       "source": [
-        "model = create_model(vocab_size=vocab_size, num_labels=3)\n",
-        "\n",
-        "model.compile(\n",
-        "    optimizer='adam',\n",
-        "    loss=losses.SparseCategoricalCrossentropy(from_logits=True),\n",
-        "    metrics=['accuracy'])\n",
-        "\n",
         "history = model.fit(train_data, validation_data=validation_data, epochs=3)"
       ]
     },
@@ -1369,10 +1393,10 @@
       },
       "outputs": [],
       "source": [
-        "loss, accuracy = model.evaluate(validation_data)\n",
+        "metrics = model.evaluate(validation_data, return_dict=True)\n",
         "\n",
-        "print(\"Loss: \", loss)\n",
-        "print(\"Accuracy: {:2.2%}\".format(accuracy))"
+        "print(\"Loss: \", metrics['loss'])\n",
+        "print(\"Accuracy: {:2.2%}\".format(metrics['accuracy']))"
       ]
     },
     {
@@ -1390,25 +1414,21 @@
         "id": "FEuMLJA_Xiwo"
       },
       "source": [
-        "To make the model capable of taking raw strings as input, you will create a Keras `TextVectorization` layer that performs the same steps as your custom preprocessing function. Since you have already trained a vocabulary, you can use `TextVectorization.set_vocabulary` (instead of `TextVectorization.adapt`), which trains a new vocabulary."
+        "To make the model capable of taking raw strings as input, pack both the text processor and the model into a `keras.Sequential`:"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "_ODkRXbk6aHb"
+        "id": "ZpFRX39rt6UL"
       },
       "outputs": [],
       "source": [
-        "preprocess_layer = TextVectorization(\n",
-        "    max_tokens=vocab_size,\n",
-        "    standardize=tf_text.case_fold_utf8,\n",
-        "    split=tokenizer.tokenize,\n",
-        "    output_mode='int',\n",
-        "    output_sequence_length=MAX_SEQUENCE_LENGTH)\n",
-        "\n",
-        "preprocess_layer.set_vocabulary(vocab)"
+        "export_model = tf.keras.Sequential([\n",
+        "    preprocess_text,\n",
+        "    model\n",
+        "])"
       ]
     },
     {
@@ -1419,34 +1439,104 @@
       },
       "outputs": [],
       "source": [
-        "export_model = tf.keras.Sequential(\n",
-        "    [preprocess_layer, model,\n",
-        "     layers.Activation('sigmoid')])\n",
-        "\n",
         "export_model.compile(\n",
         "    loss=losses.SparseCategoricalCrossentropy(from_logits=False),\n",
         "    optimizer='adam',\n",
         "    metrics=['accuracy'])"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "AmU7iV9vtnfY"
+      },
+      "source": [
+        "This model you can run directly on batches of strings:"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "Pyg0B4zsc-UD"
+        "id": "UVwz4FADul8p"
       },
       "outputs": [],
       "source": [
         "# Create a test dataset of raw strings.\n",
         "test_ds = all_labeled_data.take(VALIDATION_SIZE).batch(BATCH_SIZE)\n",
-        "test_ds = configure_dataset(test_ds)\n",
-        "\n",
+        "test_ds = test_ds.cache().prefetch(tf.data.AUTOTUNE)\n",
+        "test_ds"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Pyg0B4zsc-UD"
+      },
+      "outputs": [],
+      "source": [
         "loss, accuracy = export_model.evaluate(test_ds)\n",
         "\n",
         "print(\"Loss: \", loss)\n",
         "print(\"Accuracy: {:2.2%}\".format(accuracy))"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jenLg-ANtusp"
+      },
+      "source": [
+        "Use `saved_model.save` to export it.\n",
+        "\n",
+        "<!-- TODO: After 2.15 switch this to use Model.export -->"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "RrxPxqXYwp5t"
+      },
+      "outputs": [],
+      "source": [
+        "tf.saved_model.save(export_model, 'export.tf')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "wdpOqe2nxTDP"
+      },
+      "outputs": [],
+      "source": [
+        "loaded = tf.saved_model.load('export.tf')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "sFvoII8lnpyi"
+      },
+      "outputs": [],
+      "source": [
+        "export_model(tf.constant(['The field bristled with the long and deadly spears which they bore.'])).numpy()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "nig380T3siW_"
+      },
+      "outputs": [],
+      "source": [
+        "loaded(tf.constant(['The field bristled with the long and deadly spears which they bore.'])).numpy()"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -1480,7 +1570,7 @@
         "]\n",
         "\n",
         "predicted_scores = export_model.predict(inputs)\n",
-        "predicted_labels = tf.argmax(predicted_scores, axis=1)\n",
+        "predicted_labels = tf.math.argmax(predicted_scores, axis=1)\n",
         "\n",
         "for input, label in zip(inputs, predicted_labels):\n",
         "  print(\"Question: \", input)\n",
@@ -1636,8 +1726,8 @@
       "outputs": [],
       "source": [
         "# Configure datasets for performance as before.\n",
-        "train_ds = configure_dataset(train_ds)\n",
-        "val_ds = configure_dataset(val_ds)"
+        "train_ds = train_ds.cache().prefetch(tf.data.AUTOTUNE)\n",
+        "val_ds = val_ds.cache().prefetch(tf.data.AUTOTUNE)"
       ]
     },
     {
@@ -1657,7 +1747,7 @@
       },
       "outputs": [],
       "source": [
-        "model = create_model(vocab_size=VOCAB_SIZE + 1, num_labels=1)\n",
+        "model = create_model(vocab_size=VOCAB_SIZE, num_labels=1)\n",
         "model.summary()"
       ]
     },
@@ -1762,7 +1852,7 @@
         "\n",
         "This tutorial demonstrated several ways to load and preprocess text. As a next step, you can explore additional text preprocessing [TensorFlow Text](https://www.tensorflow.org/text) tutorials, such as:\n",
         "\n",
-        "- [BERT Preprocessing with TF Text](tensorflow.org/text/guide/bert_preprocessing_guide)\n",
+        "- [BERT Preprocessing with TF Text](https://www.tensorflow.org/text/guide/bert_preprocessing_guide)\n",
         "- [Tokenizing with TF Text](https://www.tensorflow.org/text/guide/tokenizers)\n",
         "- [Subword tokenizers](https://www.tensorflow.org/text/guide/subwords_tokenizer)\n",
         "\n",
@@ -1773,8 +1863,18 @@
   "metadata": {
     "accelerator": "GPU",
     "colab": {
-      "collapsed_sections": [],
-      "name": "text.ipynb",
+      "gpuType": "T4",
+      "private_outputs": true,
+      "provenance": [
+        {
+          "file_id": "1XHr3IzoXn_Si4HSDpY43hdTirjwMDvON",
+          "timestamp": 1694267031282
+        },
+        {
+          "file_id": "https://github.com/tensorflow/docs/blob/4daf8a1645ddb4dd97ef31dca2e456ffaedbc8a1/site/en/tutorials/load_data/text.ipynb",
+          "timestamp": 1694092268316
+        }
+      ],
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/tutorials/load_data/tfrecord.ipynb b/site/en/tutorials/load_data/tfrecord.ipynb
index 27c3c372b35..905e1a3745b 100644
--- a/site/en/tutorials/load_data/tfrecord.ipynb
+++ b/site/en/tutorials/load_data/tfrecord.ipynb
@@ -330,6 +330,7 @@
       },
       "outputs": [],
       "source": [
+        "@tf.py_function(Tout=tf.string)\n",
         "def serialize_example(feature0, feature1, feature2, feature3):\n",
         "  \"\"\"\n",
         "  Creates a tf.train.Example message ready to be written to a file.\n",
@@ -355,7 +356,7 @@
         "id": "XftzX9CN_uGT"
       },
       "source": [
-        "For example, suppose you have a single observation from the dataset, `[False, 4, bytes('goat'), 0.9876]`. You can create and print the `tf.train.Example` message for this observation using `create_message()`. Each single observation will be written as a `Features` message as per the above. Note that the `tf.train.Example` [message](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/example.proto#L88) is just a wrapper around the `Features` message:"
+        "For example, suppose you have a single observation from the dataset, `[False, 4, bytes('goat'), 0.9876]`. You can create and print the `tf.train.Example` message for this observation using `serialize_example()`. Each single observation will be written as a `Features` message as per the above. Note that the `tf.train.Example` [message](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/example.proto#L88) is just a wrapper around the `Features` message:"
       ]
     },
     {
@@ -368,10 +369,10 @@
       "source": [
         "# This is an example observation from the dataset.\n",
         "\n",
-        "example_observation = []\n",
-        "\n",
-        "serialized_example = serialize_example(False, 4, b'goat', 0.9876)\n",
+        "example_observation = [False, 4, b'goat', 0.9876]\n",
+        "serialized_example = serialize_example(*example_observation)\n",
         "serialized_example"
+
       ]
     },
     {
@@ -391,7 +392,7 @@
       },
       "outputs": [],
       "source": [
-        "example_proto = tf.train.Example.FromString(serialized_example)\n",
+        "example_proto = tf.train.Example.FromString(serialized_example.numpy())\n",
         "example_proto"
       ]
     },
@@ -433,199 +434,150 @@
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "y-Hjmee-fbLH"
+        "id": "jyg1g3gU7DNn"
       },
       "source": [
-        "## TFRecord files using `tf.data`"
+        "## Reading and writing TFRecord files"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "GmehkCCT81Ez"
+        "id": "3FXG3miA7Kf1"
       },
       "source": [
-        "The `tf.data` module also provides tools for reading and writing data in TensorFlow."
+        "The `tf.io` module also contains pure-Python functions for reading and writing TFRecord files."
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "1FISEuz8ubu3"
+        "id": "CKn5uql2lAaN"
       },
       "source": [
-        "### Writing a TFRecord file\n",
-        "\n",
-        "The easiest way to get the data into a dataset is to use the `from_tensor_slices` method.\n",
-        "\n",
-        "Applied to an array, it returns a dataset of scalars:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "mXeaukvwu5_-"
-      },
-      "outputs": [],
-      "source": [
-        "tf.data.Dataset.from_tensor_slices(feature1)"
+        "### Writing a TFRecord file"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "f-q0VKyZvcad"
-      },
-      "source": [
-        "Applied to a tuple of arrays, it returns a dataset of tuples:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "H5sWyu1kxnvg"
+        "id": "LNW_FA-GQWXs"
       },
-      "outputs": [],
       "source": [
-        "features_dataset = tf.data.Dataset.from_tensor_slices((feature0, feature1, feature2, feature3))\n",
-        "features_dataset"
+        "Next, write the 10,000 observations to the file `test.tfrecord`. Each observation is converted to a `tf.train.Example` message, then written to file. You can then verify that the file `test.tfrecord` has been created:"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "m1C-t71Nywze"
+        "id": "gxB_cwlN0DLy"
       },
       "outputs": [],
       "source": [
-        "# Use `take(1)` to only pull one example from the dataset.\n",
-        "for f0,f1,f2,f3 in features_dataset.take(1):\n",
-        "  print(f0)\n",
-        "  print(f1)\n",
-        "  print(f2)\n",
-        "  print(f3)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "mhIe63awyZYd"
-      },
-      "source": [
-        "Use the `tf.data.Dataset.map` method to apply a function to each element of a `Dataset`.\n",
-        "\n",
-        "The mapped function must operate in TensorFlow graph mode—it must operate on and return `tf.Tensors`. A non-tensor function, like `serialize_example`, can be wrapped with `tf.py_function` to make it compatible.\n",
-        "\n",
-        "Using `tf.py_function` requires to specify the shape and type information that is otherwise unavailable:"
+        "filename = 'test.tfrecord'"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "apB5KYrJzjPI"
+        "id": "MKPHzoGv7q44"
       },
       "outputs": [],
       "source": [
-        "def tf_serialize_example(f0,f1,f2,f3):\n",
-        "  tf_string = tf.py_function(\n",
-        "    serialize_example,\n",
-        "    (f0, f1, f2, f3),  # Pass these args to the above function.\n",
-        "    tf.string)      # The return type is `tf.string`.\n",
-        "  return tf.reshape(tf_string, ()) # The result is a scalar."
+        "# Write the `tf.train.Example` observations to the file.\n",
+        "with tf.io.TFRecordWriter(filename) as writer:\n",
+        "  for i in range(n_observations):\n",
+        "    example = serialize_example(feature0[i], feature1[i], feature2[i], feature3[i])\n",
+        "    writer.write(example.numpy())"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "lHFjW4u4Npz9"
+        "id": "EjdFHHJMpUUo"
       },
       "outputs": [],
       "source": [
-        "tf_serialize_example(f0, f1, f2, f3)"
+        "!du -sh {filename}"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "CrFZ9avE3HUF"
-      },
-      "source": [
-        "Apply this function to each element in the dataset:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "VDeqYVbW3ww9"
-      },
-      "outputs": [],
-      "source": [
-        "serialized_features_dataset = features_dataset.map(tf_serialize_example)\n",
-        "serialized_features_dataset"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "DlDfuh46bRf6"
+        "id": "2osVRnYNni-E"
       },
-      "outputs": [],
       "source": [
-        "def generator():\n",
-        "  for features in features_dataset:\n",
-        "    yield serialize_example(*features)"
+        "### Reading a TFRecord file in python\n",
+        "\n",
+        "These serialized tensors can be easily parsed using `tf.train.Example.ParseFromString`:"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "iv9oXKrcbhvX"
+        "id": "U3tnd3LerOtV"
       },
       "outputs": [],
       "source": [
-        "serialized_features_dataset = tf.data.Dataset.from_generator(\n",
-        "    generator, output_types=tf.string, output_shapes=())"
+        "filenames = [filename]\n",
+        "raw_dataset = tf.data.TFRecordDataset(filenames)\n",
+        "raw_dataset"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "Dqz8C4D5cIj9"
+        "id": "nsEAACHcnm3f"
       },
       "outputs": [],
       "source": [
-        "serialized_features_dataset"
+        "for raw_record in raw_dataset.take(1):\n",
+        "  example = tf.train.Example()\n",
+        "  example.ParseFromString(raw_record.numpy())\n",
+        "  print(example)"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "p6lw5VYpjZZC"
+        "id": "yhnZZmhm1miG"
       },
       "source": [
-        "And write them to a TFRecord file:"
+        "That returns a `tf.train.Example` proto which is dificult to use as is, but it's fundamentally a representation of a:\n",
+        "\n",
+        "```\n",
+        "Dict[str,\n",
+        "     Union[List[float],\n",
+        "           List[int],\n",
+        "           List[str]]]\n",
+        "```\n",
+        "\n",
+        "The following code manually converts the `Example` to a dictionary of NumPy arrays, without using TensorFlow Ops. Refer to [the PROTO file](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/feature.proto) for details."
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "vP1VgTO44UIE"
+        "id": "Ziv9tiNE1l6J"
       },
       "outputs": [],
       "source": [
-        "filename = 'test.tfrecord'\n",
-        "writer = tf.data.experimental.TFRecordWriter(filename)\n",
-        "writer.write(serialized_features_dataset)"
+        "result = {}\n",
+        "# example.features.feature is the dictionary\n",
+        "for key, feature in example.features.feature.items():\n",
+        "  # The values are the Feature objects which contain a `kind` which contains:\n",
+        "  # one of three fields: bytes_list, float_list, int64_list\n",
+        "\n",
+        "  kind = feature.WhichOneof('kind')\n",
+        "  result[key] = np.array(getattr(feature, kind).value)\n",
+        "\n",
+        "result"
       ]
     },
     {
@@ -634,7 +586,7 @@
         "id": "6aV0GQhV8tmp"
       },
       "source": [
-        "### Reading a TFRecord file"
+        "### Reading a TFRecord file Using tf.data"
       ]
     },
     {
@@ -724,7 +676,7 @@
         "id": "gWETjUqhEQZf"
       },
       "source": [
-        "Alternatively, use `tf.parse example` to parse the whole batch at once. Apply this function to each item in the dataset using the `tf.data.Dataset.map` method:"
+        "Alternatively, use `tf.parse_example` to parse the whole batch at once. Apply this function to each item in the dataset using the `tf.data.Dataset.map` method:"
       ]
     },
     {
@@ -769,144 +721,6 @@
         "Here, the `tf.parse_example` function unpacks the `tf.train.Example` fields into standard tensors."
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "jyg1g3gU7DNn"
-      },
-      "source": [
-        "## TFRecord files in Python"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "3FXG3miA7Kf1"
-      },
-      "source": [
-        "The `tf.io` module also contains pure-Python functions for reading and writing TFRecord files."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "CKn5uql2lAaN"
-      },
-      "source": [
-        "### Writing a TFRecord file"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "LNW_FA-GQWXs"
-      },
-      "source": [
-        "Next, write the 10,000 observations to the file `test.tfrecord`. Each observation is converted to a `tf.train.Example` message, then written to file. You can then verify that the file `test.tfrecord` has been created:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "MKPHzoGv7q44"
-      },
-      "outputs": [],
-      "source": [
-        "# Write the `tf.train.Example` observations to the file.\n",
-        "with tf.io.TFRecordWriter(filename) as writer:\n",
-        "  for i in range(n_observations):\n",
-        "    example = serialize_example(feature0[i], feature1[i], feature2[i], feature3[i])\n",
-        "    writer.write(example)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "EjdFHHJMpUUo"
-      },
-      "outputs": [],
-      "source": [
-        "!du -sh {filename}"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "2osVRnYNni-E"
-      },
-      "source": [
-        "### Reading a TFRecord file\n",
-        "\n",
-        "These serialized tensors can be easily parsed using `tf.train.Example.ParseFromString`:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "U3tnd3LerOtV"
-      },
-      "outputs": [],
-      "source": [
-        "filenames = [filename]\n",
-        "raw_dataset = tf.data.TFRecordDataset(filenames)\n",
-        "raw_dataset"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "nsEAACHcnm3f"
-      },
-      "outputs": [],
-      "source": [
-        "for raw_record in raw_dataset.take(1):\n",
-        "  example = tf.train.Example()\n",
-        "  example.ParseFromString(raw_record.numpy())\n",
-        "  print(example)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "yhnZZmhm1miG"
-      },
-      "source": [
-        "That returns a `tf.train.Example` proto which is dificult to use as is, but it's fundamentally a representation of a:\n",
-        "\n",
-        "```\n",
-        "Dict[str,\n",
-        "     Union[List[float],\n",
-        "           List[int],\n",
-        "           List[str]]]\n",
-        "```\n",
-        "\n",
-        "The following code manually converts the `Example` to a dictionary of NumPy arrays, without using TensorFlow Ops. Refer to [the PROTO file](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/feature.proto) for detials."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Ziv9tiNE1l6J"
-      },
-      "outputs": [],
-      "source": [
-        "result = {}\n",
-        "# example.features.feature is the dictionary\n",
-        "for key, feature in example.features.feature.items():\n",
-        "  # The values are the Feature objects which contain a `kind` which contains:\n",
-        "  # one of three fields: bytes_list, float_list, int64_list\n",
-        "\n",
-        "  kind = feature.WhichOneof('kind')\n",
-        "  result[key] = np.array(getattr(feature, kind).value)\n",
-        "\n",
-        "result"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -1149,6 +963,7 @@
         "pL--_KGdYoBz"
       ],
       "name": "tfrecord.ipynb",
+      "private_outputs": true,
       "provenance": [],
       "toc_visible": true
     },
diff --git a/site/en/tutorials/load_data/video.ipynb b/site/en/tutorials/load_data/video.ipynb
new file mode 100644
index 00000000000..42439404948
--- /dev/null
+++ b/site/en/tutorials/load_data/video.ipynb
@@ -0,0 +1,1026 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mt9dL5dIir8X"
+      },
+      "source": [
+        "##### Copyright 2022 The TensorFlow Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "ufPx7EiCiqgR"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4StGz9ynOEL6"
+      },
+      "source": [
+        "# Load video data"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KwQtSOz0VrVX"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/tutorials/load_data/video\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/load_data/video.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/tutorials/load_data/video.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/tutorials/load_data/video.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "F-SqCosJ6-0H"
+      },
+      "source": [
+        "This tutorial demonstrates how to load and preprocess [AVI](https://en.wikipedia.org/wiki/Audio_Video_Interleave) video data using the [UCF101 human action dataset](https://www.tensorflow.org/datasets/catalog/ucf101). Once you have preprocessed the data, it can be used for such tasks as video classification/recognition, captioning or clustering. The original dataset contains realistic action videos collected from YouTube with 101 categories, including playing cello, brushing teeth, and applying eye makeup. You will learn how to:\n",
+        "\n",
+        "* Load the data from a zip file.\n",
+        "\n",
+        "* Read sequences of frames out of the video files.\n",
+        "\n",
+        "* Visualize the video data.\n",
+        "\n",
+        "* Wrap the frame-generator [`tf.data.Dataset`](https://www.tensorflow.org/guide/data).\n",
+        "\n",
+        "This video loading and preprocessing tutorial is the first part in a series of TensorFlow video tutorials. Here are the other three tutorials:\n",
+        "\n",
+        "- [Build a 3D CNN model for video classification](https://www.tensorflow.org/tutorials/video/video_classification): Note that this tutorial uses a (2+1)D CNN that decomposes the spatial and temporal aspects of 3D data; if you are using volumetric data such as an MRI scan, consider using a 3D CNN instead of a (2+1)D CNN.\n",
+        "- [MoViNet for streaming action recognition](https://www.tensorflow.org/hub/tutorials/movinet): Get familiar with the MoViNet models that are available on TF Hub.\n",
+        "- [Transfer learning for video classification with MoViNet](https://www.tensorflow.org/tutorials/video/transfer_learning_with_movinet): This tutorial explains how to use a pre-trained video classification model trained on a different dataset with the UCF-101 dataset."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PnpPjKVD68eH"
+      },
+      "source": [
+        "## Setup\n",
+        "\n",
+        "Begin by installing and importing some necessary libraries, including:\n",
+        "[remotezip](https://github.com/gtsystem/python-remotezip) to inspect the contents of a ZIP file, [tqdm](https://github.com/tqdm/tqdm) to use a progress bar, [OpenCV](https://opencv.org/) to process video files, and [`tensorflow_docs`](https://github.com/tensorflow/docs/tree/master/tools/tensorflow_docs) for embedding data in a Jupyter notebook."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "SjI3AaaO16bd"
+      },
+      "outputs": [],
+      "source": [
+        "# The way this tutorial uses the `TimeDistributed` layer requires TF>=2.10\n",
+        "!pip install -U \"tensorflow>=2.10.0\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "P5SBasQcbwQA"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install remotezip tqdm opencv-python\n",
+        "!pip install -q git+https://github.com/tensorflow/docs"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9RYQIJ9C6BVH"
+      },
+      "outputs": [],
+      "source": [
+        "import tqdm\n",
+        "import random\n",
+        "import pathlib\n",
+        "import itertools\n",
+        "import collections\n",
+        "\n",
+        "import os\n",
+        "import cv2\n",
+        "import numpy as np\n",
+        "import remotezip as rz\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "\n",
+        "# Some modules to display an animation using imageio.\n",
+        "import imageio\n",
+        "from IPython import display\n",
+        "from urllib import request\n",
+        "from tensorflow_docs.vis import embed"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KbhwWLLM7FXo"
+      },
+      "source": [
+        "## Download a subset of the UCF101 dataset\n",
+        "\n",
+        "The [UCF101 dataset](https://www.tensorflow.org/datasets/catalog/ucf101) contains 101 categories of different actions in video, primarily used in action recognition. You will use a subset of these categories in this demo."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "gVIgj-jIA8U8"
+      },
+      "outputs": [],
+      "source": [
+        "URL = 'https://storage.googleapis.com/thumos14_files/UCF101_videos.zip'"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2tm8aBzw6Md7"
+      },
+      "source": [
+        "The above URL contains a zip file with the UCF 101 dataset. Create a function that uses the `remotezip` library to examine the contents of the zip file in that URL:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "lY-x7TaZlK6O"
+      },
+      "outputs": [],
+      "source": [
+        "def list_files_from_zip_url(zip_url):\n",
+        "  \"\"\" List the files in each class of the dataset given a URL with the zip file.\n",
+        "\n",
+        "    Args:\n",
+        "      zip_url: A URL from which the files can be extracted from.\n",
+        "\n",
+        "    Returns:\n",
+        "      List of files in each of the classes.\n",
+        "  \"\"\"\n",
+        "  files = []\n",
+        "  with rz.RemoteZip(zip_url) as zip:\n",
+        "    for zip_info in zip.infolist():\n",
+        "      files.append(zip_info.filename)\n",
+        "  return files"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "lYErXAdUr-rk"
+      },
+      "outputs": [],
+      "source": [
+        "files = list_files_from_zip_url(URL)\n",
+        "files = [f for f in files if f.endswith('.avi')]\n",
+        "files[:10]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rQ4l8D9dFPS7"
+      },
+      "source": [
+        "Begin with a few videos and a limited number of classes for training. After running the above code block, notice that the class name is included in the filename of each video.\n",
+        "\n",
+        "Define the `get_class` function that retrieves the class name from a filename. Then, create a function called `get_files_per_class` which converts the list of all files (`files` above) into a dictionary listing the files for each class:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "yyyivOX0sO19"
+      },
+      "outputs": [],
+      "source": [
+        "def get_class(fname):\n",
+        "  \"\"\" Retrieve the name of the class given a filename.\n",
+        "\n",
+        "    Args:\n",
+        "      fname: Name of the file in the UCF101 dataset.\n",
+        "\n",
+        "    Returns:\n",
+        "      Class that the file belongs to.\n",
+        "  \"\"\"\n",
+        "  return fname.split('_')[-3]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "1qnH0xKzlyw_"
+      },
+      "outputs": [],
+      "source": [
+        "def get_files_per_class(files):\n",
+        "  \"\"\" Retrieve the files that belong to each class.\n",
+        "\n",
+        "    Args:\n",
+        "      files: List of files in the dataset.\n",
+        "\n",
+        "    Returns:\n",
+        "      Dictionary of class names (key) and files (values). \n",
+        "  \"\"\"\n",
+        "  files_for_class = collections.defaultdict(list)\n",
+        "  for fname in files:\n",
+        "    class_name = get_class(fname)\n",
+        "    files_for_class[class_name].append(fname)\n",
+        "  return files_for_class"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "VxSt5YgSGrWn"
+      },
+      "source": [
+        "Once you have the list of files per class, you can choose how many classes you would like to use and how many videos you would like per class in order to create your dataset. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "qPdURg74uUTk"
+      },
+      "outputs": [],
+      "source": [
+        "NUM_CLASSES = 10\n",
+        "FILES_PER_CLASS = 50"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "GUs0xtXsr9i3"
+      },
+      "outputs": [],
+      "source": [
+        "files_for_class = get_files_per_class(files)\n",
+        "classes = list(files_for_class.keys())"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "-YqFARvqwon9"
+      },
+      "outputs": [],
+      "source": [
+        "print('Num classes:', len(classes))\n",
+        "print('Num videos for class[0]:', len(files_for_class[classes[0]]))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yFAFqKqE92bQ"
+      },
+      "source": [
+        "Create a new function called `select_subset_of_classes` that selects a subset of the classes present within the dataset and a particular number of files per class:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "O3jek4QimIj-"
+      },
+      "outputs": [],
+      "source": [
+        "def select_subset_of_classes(files_for_class, classes, files_per_class):\n",
+        "  \"\"\" Create a dictionary with the class name and a subset of the files in that class.\n",
+        "\n",
+        "    Args:\n",
+        "      files_for_class: Dictionary of class names (key) and files (values).\n",
+        "      classes: List of classes.\n",
+        "      files_per_class: Number of files per class of interest.\n",
+        "\n",
+        "    Returns:\n",
+        "      Dictionary with class as key and list of specified number of video files in that class.\n",
+        "  \"\"\"\n",
+        "  files_subset = dict()\n",
+        "\n",
+        "  for class_name in classes:\n",
+        "    class_files = files_for_class[class_name]\n",
+        "    files_subset[class_name] = class_files[:files_per_class]\n",
+        "\n",
+        "  return files_subset"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "5cjcz6Gpcb-W"
+      },
+      "outputs": [],
+      "source": [
+        "files_subset = select_subset_of_classes(files_for_class, classes[:NUM_CLASSES], FILES_PER_CLASS)\n",
+        "list(files_subset.keys())"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ALrlDS1lZx3E"
+      },
+      "source": [
+        "Define helper functions that split the videos into training, validation, and test sets. The videos are downloaded from a URL with the zip file, and placed into their respective subdirectiories."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "AH9sWS_6nRz3"
+      },
+      "outputs": [],
+      "source": [
+        "def download_from_zip(zip_url, to_dir, file_names):\n",
+        "  \"\"\" Download the contents of the zip file from the zip URL.\n",
+        "\n",
+        "    Args:\n",
+        "      zip_url: A URL with a zip file containing data.\n",
+        "      to_dir: A directory to download data to.\n",
+        "      file_names: Names of files to download.\n",
+        "  \"\"\"\n",
+        "  with rz.RemoteZip(zip_url) as zip:\n",
+        "    for fn in tqdm.tqdm(file_names):\n",
+        "      class_name = get_class(fn)\n",
+        "      zip.extract(fn, str(to_dir / class_name))\n",
+        "      unzipped_file = to_dir / class_name / fn\n",
+        "\n",
+        "      fn = pathlib.Path(fn).parts[-1]\n",
+        "      output_file = to_dir / class_name / fn\n",
+        "      unzipped_file.rename(output_file)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "pejRTChA6mrp"
+      },
+      "source": [
+        "The following function returns the remaining data that hasn't already been placed into a subset of data. It allows you to place that remaining data in the next specified subset of data."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "6ARYc-WLqqNF"
+      },
+      "outputs": [],
+      "source": [
+        "def split_class_lists(files_for_class, count):\n",
+        "  \"\"\" Returns the list of files belonging to a subset of data as well as the remainder of\n",
+        "    files that need to be downloaded.\n",
+        "    \n",
+        "    Args:\n",
+        "      files_for_class: Files belonging to a particular class of data.\n",
+        "      count: Number of files to download.\n",
+        "\n",
+        "    Returns:\n",
+        "      Files belonging to the subset of data and dictionary of the remainder of files that need to be downloaded.\n",
+        "  \"\"\"\n",
+        "  split_files = []\n",
+        "  remainder = {}\n",
+        "  for cls in files_for_class:\n",
+        "    split_files.extend(files_for_class[cls][:count])\n",
+        "    remainder[cls] = files_for_class[cls][count:]\n",
+        "  return split_files, remainder"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "LlEQ_I0TLd1X"
+      },
+      "source": [
+        "The following `download_ucf_101_subset` function allows you to download a subset of the UCF101 dataset and split it into the training, validation, and test sets. You can specify the number of classes that you would like to use. The `splits` argument allows you to pass in a dictionary in which the key values are the name of subset (example: \"train\") and the number of videos you would like to have per class."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "IHH2Y1M06xoz"
+      },
+      "outputs": [],
+      "source": [
+        "def download_ucf_101_subset(zip_url, num_classes, splits, download_dir):\n",
+        "  \"\"\" Download a subset of the UCF101 dataset and split them into various parts, such as\n",
+        "    training, validation, and test.\n",
+        "\n",
+        "    Args:\n",
+        "      zip_url: A URL with a ZIP file with the data.\n",
+        "      num_classes: Number of labels.\n",
+        "      splits: Dictionary specifying the training, validation, test, etc. (key) division of data \n",
+        "              (value is number of files per split).\n",
+        "      download_dir: Directory to download data to.\n",
+        "\n",
+        "    Return:\n",
+        "      Mapping of the directories containing the subsections of data.\n",
+        "  \"\"\"\n",
+        "  files = list_files_from_zip_url(zip_url)\n",
+        "  for f in files:\n",
+        "    path = os.path.normpath(f)\n",
+        "    tokens = path.split(os.sep)\n",
+        "    if len(tokens) <= 2:\n",
+        "      files.remove(f) # Remove that item from the list if it does not have a filename\n",
+        "  \n",
+        "  files_for_class = get_files_per_class(files)\n",
+        "\n",
+        "  classes = list(files_for_class.keys())[:num_classes]\n",
+        "\n",
+        "  for cls in classes:\n",
+        "    random.shuffle(files_for_class[cls])\n",
+        "    \n",
+        "  # Only use the number of classes you want in the dictionary\n",
+        "  files_for_class = {x: files_for_class[x] for x in classes}\n",
+        "\n",
+        "  dirs = {}\n",
+        "  for split_name, split_count in splits.items():\n",
+        "    print(split_name, \":\")\n",
+        "    split_dir = download_dir / split_name\n",
+        "    split_files, files_for_class = split_class_lists(files_for_class, split_count)\n",
+        "    download_from_zip(zip_url, split_dir, split_files)\n",
+        "    dirs[split_name] = split_dir\n",
+        "\n",
+        "  return dirs"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "NuD-xU8Q66Vm"
+      },
+      "outputs": [],
+      "source": [
+        "download_dir = pathlib.Path('./UCF101_subset/')\n",
+        "subset_paths = download_ucf_101_subset(URL,\n",
+        "                                       num_classes = NUM_CLASSES,\n",
+        "                                       splits = {\"train\": 30, \"val\": 10, \"test\": 10},\n",
+        "                                       download_dir = download_dir)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MBMRm9Ub3Zrk"
+      },
+      "source": [
+        "After downloading the data, you should now have a copy of a subset of the UCF101 dataset. Run the following code to print the total number of videos you have amongst all your subsets of data."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "zupvOLYP4D4q"
+      },
+      "outputs": [],
+      "source": [
+        "video_count_train = len(list(download_dir.glob('train/*/*.avi')))\n",
+        "video_count_val = len(list(download_dir.glob('val/*/*.avi')))\n",
+        "video_count_test = len(list(download_dir.glob('test/*/*.avi')))\n",
+        "video_total = video_count_train + video_count_val + video_count_test\n",
+        "print(f\"Total videos: {video_total}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JmJG1SlXiOX8"
+      },
+      "source": [
+        "You can also preview the directory of data files now."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "y9be0WlDiNM0"
+      },
+      "outputs": [],
+      "source": [
+        "!find ./UCF101_subset"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "U4uslY4dScyu"
+      },
+      "source": [
+        "## Create frames from each video file"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "D1vvyT0F7JAZ"
+      },
+      "source": [
+        "The `frames_from_video_file` function splits the videos into frames, reads a randomly chosen span of `n_frames` out of a video file, and returns them as a NumPy `array`.\n",
+        "To reduce memory and computation overhead, choose a **small** number of frames. In addition, pick the **same** number of frames from each video, which makes it easier to work on batches of data.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "vNBCiV3bMzpD"
+      },
+      "outputs": [],
+      "source": [
+        "def format_frames(frame, output_size):\n",
+        "  \"\"\"\n",
+        "    Pad and resize an image from a video.\n",
+        "    \n",
+        "    Args:\n",
+        "      frame: Image that needs to resized and padded. \n",
+        "      output_size: Pixel size of the output frame image.\n",
+        "\n",
+        "    Return:\n",
+        "      Formatted frame with padding of specified output size.\n",
+        "  \"\"\"\n",
+        "  frame = tf.image.convert_image_dtype(frame, tf.float32)\n",
+        "  frame = tf.image.resize_with_pad(frame, *output_size)\n",
+        "  return frame"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9ujLDC9G7JyE"
+      },
+      "outputs": [],
+      "source": [
+        "def frames_from_video_file(video_path, n_frames, output_size = (224,224), frame_step = 15):\n",
+        "  \"\"\"\n",
+        "    Creates frames from each video file present for each category.\n",
+        "\n",
+        "    Args:\n",
+        "      video_path: File path to the video.\n",
+        "      n_frames: Number of frames to be created per video file.\n",
+        "      output_size: Pixel size of the output frame image.\n",
+        "\n",
+        "    Return:\n",
+        "      An NumPy array of frames in the shape of (n_frames, height, width, channels).\n",
+        "  \"\"\"\n",
+        "  # Read each video frame by frame\n",
+        "  result = []\n",
+        "  src = cv2.VideoCapture(str(video_path))  \n",
+        "\n",
+        "  video_length = src.get(cv2.CAP_PROP_FRAME_COUNT)\n",
+        "\n",
+        "  need_length = 1 + (n_frames - 1) * frame_step\n",
+        "\n",
+        "  if need_length > video_length:\n",
+        "    start = 0\n",
+        "  else:\n",
+        "    max_start = video_length - need_length\n",
+        "    start = random.randint(0, max_start + 1)\n",
+        "\n",
+        "  src.set(cv2.CAP_PROP_POS_FRAMES, start)\n",
+        "  # ret is a boolean indicating whether read was successful, frame is the image itself\n",
+        "  ret, frame = src.read()\n",
+        "  result.append(format_frames(frame, output_size))\n",
+        "\n",
+        "  for _ in range(n_frames - 1):\n",
+        "    for _ in range(frame_step):\n",
+        "      ret, frame = src.read()\n",
+        "    if ret:\n",
+        "      frame = format_frames(frame, output_size)\n",
+        "      result.append(frame)\n",
+        "    else:\n",
+        "      result.append(np.zeros_like(result[0]))\n",
+        "  src.release()\n",
+        "  result = np.array(result)[..., [2, 1, 0]]\n",
+        "\n",
+        "  return result"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "1ENtlwhxwyTe"
+      },
+      "source": [
+        "## Visualize video data\n",
+        "\n",
+        "The `frames_from_video_file` function that returns a set of frames as a NumPy array. Try using this function on a new video from [Wikimedia](https://commons.wikimedia.org/wiki/Category:Videos_of_sports) by Patrick Gillett:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Z2hgSghlykzA"
+      },
+      "outputs": [],
+      "source": [
+        "!curl -O https://upload.wikimedia.org/wikipedia/commons/8/86/End_of_a_jam.ogv"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "xdHvHw3hym-U"
+      },
+      "outputs": [],
+      "source": [
+        "video_path = \"End_of_a_jam.ogv\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "u845YODXyqo5"
+      },
+      "outputs": [],
+      "source": [
+        "sample_video = frames_from_video_file(video_path, n_frames = 10)\n",
+        "sample_video.shape"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "zFHGHiFgGjv2"
+      },
+      "outputs": [],
+      "source": [
+        "def to_gif(images):\n",
+        "  converted_images = np.clip(images * 255, 0, 255).astype(np.uint8)\n",
+        "  imageio.mimsave('./animation.gif', converted_images, fps=10)\n",
+        "  return embed.embed_file('./animation.gif')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "7hiwUJenEN3p"
+      },
+      "outputs": [],
+      "source": [
+        "to_gif(sample_video)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3dktTnDVG7xf"
+      },
+      "source": [
+        "In addition to examining this video, you can also display the UCF-101 data. To do this, run the following code:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "MghJzJsWme0t"
+      },
+      "outputs": [],
+      "source": [
+        "# docs-infra: no-execute\n",
+        "ucf_sample_video = frames_from_video_file(next(subset_paths['train'].glob('*/*.avi')), 50)\n",
+        "to_gif(ucf_sample_video)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "NlvuC5_E7XrF"
+      },
+      "source": [
+        "Next, define the `FrameGenerator` class in order to create an iterable object that can feed data into the TensorFlow data pipeline. The generator (`__call__`) function yields the frame array produced by `frames_from_video_file` and a one-hot encoded vector of the label associated with the set of frames."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "MVmfLTlw7Ues"
+      },
+      "outputs": [],
+      "source": [
+        "class FrameGenerator:\n",
+        "  def __init__(self, path, n_frames, training = False):\n",
+        "    \"\"\" Returns a set of frames with their associated label. \n",
+        "\n",
+        "      Args:\n",
+        "        path: Video file paths.\n",
+        "        n_frames: Number of frames. \n",
+        "        training: Boolean to determine if training dataset is being created.\n",
+        "    \"\"\"\n",
+        "    self.path = path\n",
+        "    self.n_frames = n_frames\n",
+        "    self.training = training\n",
+        "    self.class_names = sorted(set(p.name for p in self.path.iterdir() if p.is_dir()))\n",
+        "    self.class_ids_for_name = dict((name, idx) for idx, name in enumerate(self.class_names))\n",
+        "\n",
+        "  def get_files_and_class_names(self):\n",
+        "    video_paths = list(self.path.glob('*/*.avi'))\n",
+        "    classes = [p.parent.name for p in video_paths] \n",
+        "    return video_paths, classes\n",
+        "\n",
+        "  def __call__(self):\n",
+        "    video_paths, classes = self.get_files_and_class_names()\n",
+        "\n",
+        "    pairs = list(zip(video_paths, classes))\n",
+        "\n",
+        "    if self.training:\n",
+        "      random.shuffle(pairs)\n",
+        "\n",
+        "    for path, name in pairs:\n",
+        "      video_frames = frames_from_video_file(path, self.n_frames) \n",
+        "      label = self.class_ids_for_name[name] # Encode labels\n",
+        "      yield video_frames, label"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "xsvhPIkpzx-r"
+      },
+      "source": [
+        "Test out the `FrameGenerator` object before wrapping it as a TensorFlow Dataset object. Moreover, for the training dataset, ensure you enable training mode so that the data will be shuffled."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "P5jwagZxzxOf"
+      },
+      "outputs": [],
+      "source": [
+        "fg = FrameGenerator(subset_paths['train'], 10, training=True)\n",
+        "\n",
+        "frames, label = next(fg())\n",
+        "\n",
+        "print(f\"Shape: {frames.shape}\")\n",
+        "print(f\"Label: {label}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "E7MRRFSks7l1"
+      },
+      "source": [
+        "Finally, create a TensorFlow data input pipeline. This pipeline that you create from the generator object allows you to feed in data to your deep learning model. In this video pipeline, each element is a single set of frames and its associated label. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "HM4NboJr7ck4"
+      },
+      "outputs": [],
+      "source": [
+        "# Create the training set\n",
+        "output_signature = (tf.TensorSpec(shape = (None, None, None, 3), dtype = tf.float32),\n",
+        "                    tf.TensorSpec(shape = (), dtype = tf.int16))\n",
+        "train_ds = tf.data.Dataset.from_generator(FrameGenerator(subset_paths['train'], 10, training=True),\n",
+        "                                          output_signature = output_signature)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9oF_8m8IZvcY"
+      },
+      "source": [
+        "Check to see that the labels are shuffled. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "3XYVmsgiZsJD"
+      },
+      "outputs": [],
+      "source": [
+        "for frames, labels in train_ds.take(10):\n",
+        "  print(labels)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Pi8-WkOkEXw5"
+      },
+      "outputs": [],
+      "source": [
+        "# Create the validation set\n",
+        "val_ds = tf.data.Dataset.from_generator(FrameGenerator(subset_paths['val'], 10),\n",
+        "                                        output_signature = output_signature)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "V6qXc-6i7eyK"
+      },
+      "outputs": [],
+      "source": [
+        "# Print the shapes of the data\n",
+        "train_frames, train_labels = next(iter(train_ds))\n",
+        "print(f'Shape of training set of frames: {train_frames.shape}')\n",
+        "print(f'Shape of training labels: {train_labels.shape}')\n",
+        "\n",
+        "val_frames, val_labels = next(iter(val_ds))\n",
+        "print(f'Shape of validation set of frames: {val_frames.shape}')\n",
+        "print(f'Shape of validation labels: {val_labels.shape}')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bIrFpUIxvTLe"
+      },
+      "source": [
+        "## Configure the dataset for performance\n",
+        "\n",
+        "Use buffered prefetching such that you can yield data from the disk without having I/O become blocking. Two important functions to use while loading data are:\n",
+        "\n",
+        "* `Dataset.cache`: keeps the sets of frames in memory after they're loaded off the disk during the first epoch. This function ensures that the dataset does not become a bottleneck while training your model. If your dataset is too large to fit into memory, you can also use this method to create a performant on-disk cache.\n",
+        "\n",
+        "* `Dataset.prefetch`: overlaps data preprocessing and model execution while training.\n",
+        "Refer to [Better performance with the `tf.data`](https://www.tensorflow.org/guide/data_performance) for details."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "QSxjFtxAvY3_"
+      },
+      "outputs": [],
+      "source": [
+        "AUTOTUNE = tf.data.AUTOTUNE\n",
+        "\n",
+        "train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size = AUTOTUNE)\n",
+        "val_ds = val_ds.cache().shuffle(1000).prefetch(buffer_size = AUTOTUNE)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "VaY-hyr-Fbfr"
+      },
+      "source": [
+        "To prepare the data to be fed into the model, use batching as shown below. Notice that when working with video data, such as AVI files, the data should be shaped as a five dimensional object. These dimensions are as follows: `[batch_size, number_of_frames, height, width, channels]`. In comparison, an image would have four dimensions: `[batch_size, height, width, channels]`. The image below is an illustration of how the shape of video data is represented.\n",
+        "\n",
+        "![Video data shape](https://www.tensorflow.org/images/tutorials/video/video_data_shape.png)\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "pp2Qc6XSFmeB"
+      },
+      "outputs": [],
+      "source": [
+        "train_ds = train_ds.batch(2)\n",
+        "val_ds = val_ds.batch(2)\n",
+        "\n",
+        "train_frames, train_labels = next(iter(train_ds))\n",
+        "print(f'Shape of training set of frames: {train_frames.shape}')\n",
+        "print(f'Shape of training labels: {train_labels.shape}')\n",
+        "\n",
+        "val_frames, val_labels = next(iter(val_ds))\n",
+        "print(f'Shape of validation set of frames: {val_frames.shape}')\n",
+        "print(f'Shape of validation labels: {val_labels.shape}')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hqjXn1FgsMqZ"
+      },
+      "source": [
+        "## Next steps\n",
+        "\n",
+        "Now that you have created a TensorFlow `Dataset` of video frames with their labels, you can use it with a deep learning model. The following classification model that uses a pre-trained [EfficientNet](https://arxiv.org/abs/1905.11946) trains to high accuracy in a few minutes:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "qzqgPBUuForj"
+      },
+      "outputs": [],
+      "source": [
+        "net = tf.keras.applications.EfficientNetB0(include_top = False)\n",
+        "net.trainable = False\n",
+        "\n",
+        "model = tf.keras.Sequential([\n",
+        "    tf.keras.layers.Rescaling(scale=255),\n",
+        "    tf.keras.layers.TimeDistributed(net),\n",
+        "    tf.keras.layers.Dense(10),\n",
+        "    tf.keras.layers.GlobalAveragePooling3D()\n",
+        "])\n",
+        "\n",
+        "model.compile(optimizer = 'adam',\n",
+        "              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True),\n",
+        "              metrics=['accuracy'])\n",
+        "\n",
+        "model.fit(train_ds, \n",
+        "          epochs = 10,\n",
+        "          validation_data = val_ds,\n",
+        "          callbacks = tf.keras.callbacks.EarlyStopping(patience = 2, monitor = 'val_loss'))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DdJm7ojgGxtT"
+      },
+      "source": [
+        "To learn more about working with video data in TensorFlow, check out the following tutorials:\n",
+        "\n",
+        "* [Build a 3D CNN model for video classification](https://www.tensorflow.org/tutorials/video/video_classification)\n",
+        "* [MoViNet for streaming action recognition](https://www.tensorflow.org/hub/tutorials/movinet)\n",
+        "* [Transfer learning for video classification with MoViNet](https://www.tensorflow.org/tutorials/video/transfer_learning_with_movinet)"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "name": "video.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/tutorials/optimization/compression.ipynb b/site/en/tutorials/optimization/compression.ipynb
new file mode 100644
index 00000000000..b94ecaf6476
--- /dev/null
+++ b/site/en/tutorials/optimization/compression.ipynb
@@ -0,0 +1,1187 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Tce3stUlHN0L"
+      },
+      "source": [
+        "##### Copyright 2022 The TensorFlow Compression Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "tuOe1ymfHZPu"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "qFdPvlXBOdUN"
+      },
+      "source": [
+        "# Scalable model compression"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MfBg1C5NB3X0"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/tutorials/optimization/compression\">\n",
+        "    <img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />\n",
+        "    View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/optimization/compression.ipynb\">\n",
+        "    <img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />\n",
+        "    Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/tutorials/optimization/compression.ipynb\">\n",
+        "    <img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />\n",
+        "    View source on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/tutorials/optimization/compression.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "xHxb-dlhMIzW"
+      },
+      "source": [
+        "## Overview\n",
+        "\n",
+        "This notebook shows how to compress a model using [TensorFlow Compression](https://github.com/tensorflow/compression).\n",
+        "\n",
+        "In the example below, we compress the weights of an MNIST classifier to a much smaller size than their floating point representation, while retaining classification accuracy. This is done by a two step process, based on the paper [Scalable Model Compression by Entropy Penalized Reparameterization](https://arxiv.org/abs/1906.06624):\n",
+        "\n",
+        "- Training a \"compressible\" model with an explicit **entropy penalty** during training, which encourages compressibility of the model parameters. The weight on this penalty, $\\lambda$, enables continuously controlling the trade-off between the compressed model size and its accuracy.\n",
+        "\n",
+        "- Encoding the compressible model into a compressed model using a coding scheme that is matched with the penalty, meaning that the penalty is a good predictor for model size. This ensures that the method doesn't require multiple iterations of training, compressing, and re-training the model for fine-tuning.\n",
+        "\n",
+        "This method is strictly concerned with compressed model size, not with computational complexity. It can be combined with a technique like model pruning to reduce size and complexity.\n",
+        "\n",
+        "Example compression results on various models:\n",
+        "\n",
+        "Model (dataset)         | Model size | Comp. ratio | Top-1 error comp. (uncomp.)\n",
+        "------------------------|------------|-------------|------------\n",
+        "LeNet300-100 (MNIST)    | 8.56 KB    | 124x        |  1.9%  (1.6%)\n",
+        "LeNet5-Caffe (MNIST)    | 2.84 KB    | 606x        |  1.0%  (0.7%)\n",
+        "VGG-16 (CIFAR-10)       | 101 KB     | 590x        | 10.0%  (6.6%)\n",
+        "ResNet-20-4 (CIFAR-10)  | 128 KB     | 134x        |  8.8%  (5.0%)\n",
+        "ResNet-18 (ImageNet)    | 1.97 MB    |  24x        | 30.0% (30.0%)\n",
+        "ResNet-50 (ImageNet)    | 5.49 MB    |  19x        | 26.0% (25.0%)\n",
+        "\n",
+        "Applications include:\n",
+        "- Deploying/broadcasting models to edge devices on a large scale, saving bandwidth in transit.\n",
+        "- Communicating global model state to clients in federated learning. The model architecture (number of hidden units, etc.) is unchanged from the initial model, and clients can continue learning on the decompressed model.\n",
+        "- Performing inference on extremely memory limited clients. During inference, the weights of each layer can be sequentially decompressed, and discarded right after the activations are computed."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MUXex9ctTuDB"
+      },
+      "source": [
+        "## Setup\n",
+        "\n",
+        "Install Tensorflow Compression via `pip`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "K489KsEgxuLI"
+      },
+      "outputs": [],
+      "source": [
+        "%%bash\n",
+        "# Installs the latest version of TFC compatible with the installed TF version.\n",
+        "\n",
+        "read MAJOR MINOR <<< \"$(pip show tensorflow | perl -p -0777 -e 's/.*Version: (\\d+)\\.(\\d+).*/\\1 \\2/sg')\"\n",
+        "pip install \"tensorflow-compression<$MAJOR.$(($MINOR+1))\"\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WfVAmHCVxpTS"
+      },
+      "source": [
+        "Import library dependencies."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "IqR2PQG4ZaZ0"
+      },
+      "outputs": [],
+      "source": [
+        "import matplotlib.pyplot as plt\n",
+        "import tensorflow as tf\n",
+        "import tensorflow_compression as tfc\n",
+        "import tensorflow_datasets as tfds\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wsncKT2iymgQ"
+      },
+      "source": [
+        "## Define and train a basic MNIST classifier\n",
+        "\n",
+        "In order to effectively compress dense and convolutional layers, we need to define custom layer classes. These are analogous to the layers under `tf.keras.layers`, but we will subclass them later to effectively implement Entropy Penalized Reparameterization (EPR). For this purpose, we also add a copy constructor.\n",
+        "\n",
+        "First, we define a standard dense layer:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "n_7ZRqiaO1WQ"
+      },
+      "outputs": [],
+      "source": [
+        "class CustomDense(tf.keras.layers.Layer):\n",
+        "\n",
+        "  def __init__(self, filters, name=\"dense\"):\n",
+        "    super().__init__(name=name)\n",
+        "    self.filters = filters\n",
+        "\n",
+        "  @classmethod\n",
+        "  def copy(cls, other, **kwargs):\n",
+        "    \"\"\"Returns an instantiated and built layer, initialized from `other`.\"\"\"\n",
+        "    self = cls(filters=other.filters, name=other.name, **kwargs)\n",
+        "    self.build(None, other=other)\n",
+        "    return self\n",
+        "\n",
+        "  def build(self, input_shape, other=None):\n",
+        "    \"\"\"Instantiates weights, optionally initializing them from `other`.\"\"\"\n",
+        "    if other is None:\n",
+        "      kernel_shape = (input_shape[-1], self.filters)\n",
+        "      kernel = tf.keras.initializers.GlorotUniform()(shape=kernel_shape)\n",
+        "      bias = tf.keras.initializers.Zeros()(shape=(self.filters,))\n",
+        "    else:\n",
+        "      kernel, bias = other.kernel, other.bias\n",
+        "    self.kernel = tf.Variable(\n",
+        "        tf.cast(kernel, self.variable_dtype), name=\"kernel\")\n",
+        "    self.bias = tf.Variable(\n",
+        "        tf.cast(bias, self.variable_dtype), name=\"bias\")\n",
+        "    self.built = True\n",
+        "\n",
+        "  def call(self, inputs):\n",
+        "    outputs = tf.linalg.matvec(self.kernel, inputs, transpose_a=True)\n",
+        "    outputs = tf.nn.bias_add(outputs, self.bias)\n",
+        "    return tf.nn.leaky_relu(outputs)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "RUZkcXegc0yR"
+      },
+      "source": [
+        "And similarly, a 2D convolutional layer:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "RDibtb8EWCSj"
+      },
+      "outputs": [],
+      "source": [
+        "class CustomConv2D(tf.keras.layers.Layer):\n",
+        "\n",
+        "  def __init__(self, filters, kernel_size,\n",
+        "               strides=1, padding=\"SAME\", name=\"conv2d\"):\n",
+        "    super().__init__(name=name)\n",
+        "    self.filters = filters\n",
+        "    self.kernel_size = kernel_size\n",
+        "    self.strides = strides\n",
+        "    self.padding = padding\n",
+        "\n",
+        "  @classmethod\n",
+        "  def copy(cls, other, **kwargs):\n",
+        "    \"\"\"Returns an instantiated and built layer, initialized from `other`.\"\"\"\n",
+        "    self = cls(filters=other.filters, kernel_size=other.kernel_size,\n",
+        "               strides=other.strides, padding=other.padding, name=other.name,\n",
+        "               **kwargs)\n",
+        "    self.build(None, other=other)\n",
+        "    return self\n",
+        "\n",
+        "  def build(self, input_shape, other=None):\n",
+        "    \"\"\"Instantiates weights, optionally initializing them from `other`.\"\"\"\n",
+        "    if other is None:\n",
+        "      kernel_shape = 2 * (self.kernel_size,) + (input_shape[-1], self.filters)\n",
+        "      kernel = tf.keras.initializers.GlorotUniform()(shape=kernel_shape)\n",
+        "      bias = tf.keras.initializers.Zeros()(shape=(self.filters,))\n",
+        "    else:\n",
+        "      kernel, bias = other.kernel, other.bias\n",
+        "    self.kernel = tf.Variable(\n",
+        "        tf.cast(kernel, self.variable_dtype), name=\"kernel\")\n",
+        "    self.bias = tf.Variable(\n",
+        "        tf.cast(bias, self.variable_dtype), name=\"bias\")\n",
+        "    self.built = True\n",
+        "\n",
+        "  def call(self, inputs):\n",
+        "    outputs = tf.nn.convolution(\n",
+        "        inputs, self.kernel, strides=self.strides, padding=self.padding)\n",
+        "    outputs = tf.nn.bias_add(outputs, self.bias)\n",
+        "    return tf.nn.leaky_relu(outputs)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6xWa1hHMdCpG"
+      },
+      "source": [
+        "Before we continue with model compression, let's check that we can successfully train a regular classifier.\n",
+        "\n",
+        "Define the model architecture:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "8yZESLgW-vp1"
+      },
+      "outputs": [],
+      "source": [
+        "classifier = tf.keras.Sequential([\n",
+        "    CustomConv2D(20, 5, strides=2, name=\"conv_1\"),\n",
+        "    CustomConv2D(50, 5, strides=2, name=\"conv_2\"),\n",
+        "    tf.keras.layers.Flatten(),\n",
+        "    CustomDense(500, name=\"fc_1\"),\n",
+        "    CustomDense(10, name=\"fc_2\"),\n",
+        "], name=\"classifier\")\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9iRSvt_CdUuY"
+      },
+      "source": [
+        "Load the training data:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "L4bsA3HFF2k0"
+      },
+      "outputs": [],
+      "source": [
+        "def normalize_img(image, label):\n",
+        "  \"\"\"Normalizes images: `uint8` -> `float32`.\"\"\"\n",
+        "  return tf.cast(image, tf.float32) / 255., label\n",
+        "\n",
+        "training_dataset, validation_dataset = tfds.load(\n",
+        "    \"mnist\",\n",
+        "    split=[\"train\", \"test\"],\n",
+        "    shuffle_files=True,\n",
+        "    as_supervised=True,\n",
+        "    with_info=False,\n",
+        ")\n",
+        "training_dataset = training_dataset.map(normalize_img)\n",
+        "validation_dataset = validation_dataset.map(normalize_img)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rR9WYjt_daRG"
+      },
+      "source": [
+        "Finally, train the model:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ROn2DbzsBirI"
+      },
+      "outputs": [],
+      "source": [
+        "def train_model(model, training_data, validation_data, **kwargs):\n",
+        "  model.compile(\n",
+        "      optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),\n",
+        "      loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
+        "      metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],\n",
+        "      # Uncomment this to ease debugging:\n",
+        "      # run_eagerly=True,\n",
+        "  )\n",
+        "  kwargs.setdefault(\"epochs\", 5)\n",
+        "  kwargs.setdefault(\"verbose\", 1)\n",
+        "  log = model.fit(\n",
+        "      training_data.batch(128).prefetch(8),\n",
+        "      validation_data=validation_data.batch(128).cache(),\n",
+        "      validation_freq=1,\n",
+        "      **kwargs,\n",
+        "  )\n",
+        "  return log.history[\"val_sparse_categorical_accuracy\"][-1]\n",
+        "\n",
+        "classifier_accuracy = train_model(\n",
+        "    classifier, training_dataset, validation_dataset)\n",
+        "\n",
+        "print(f\"Accuracy: {classifier_accuracy:0.4f}\")\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "QupWKZ91di-y"
+      },
+      "source": [
+        "Success! The model trained fine, and reached an accuracy of over 98% on the validation set within 5 epochs."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yRqZFwb5dqQm"
+      },
+      "source": [
+        "## Train a compressible classifier\n",
+        "\n",
+        "Entropy Penalized Reparameterization (EPR) has two main ingredients:\n",
+        "\n",
+        "- Applying a **penalty** to the model weights during training which corresponds to their entropy under a probabilistic model, which is matched with the encoding scheme of the weights. Below, we define a Keras `Regularizer` which implements this penalty.\n",
+        "\n",
+        "- **Reparameterizing** the weights, i.e. bringing them into a latent representation which is more compressible (yields a better trade-off between compressibility and model performance). For convolutional kernels, [it has been shown](https://arxiv.org/abs/1906.06624) that the Fourier domain is a good representation. For other parameters, the below example simply uses scalar quantization (rounding) with a varying quantization step size."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "e4jmnqEmO6eB"
+      },
+      "source": [
+        "First, define the penalty.\n",
+        "\n",
+        "The example below uses a code/probabilistic model implemented in the `tfc.PowerLawEntropyModel` class, inspired by the paper [Optimizing the Communication-Accuracy Trade-off in Federated Learning with Rate-Distortion Theory](https://arxiv.org/abs/2201.02664). The penalty is defined as:\n",
+        "$$ \\log \\Bigl(\\frac {|x| + \\alpha} \\alpha\\Bigr), $$\n",
+        "where $x$ is one element of the model parameter or its latent representation, and $\\alpha$ is a small constant for numerical stability around values of 0."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "hh57nxjuwocc"
+      },
+      "outputs": [],
+      "source": [
+        "_ = tf.linspace(-5., 5., 501)\n",
+        "plt.plot(_, tfc.PowerLawEntropyModel(0).penalty(_));\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Gr3-6vLrwo-H"
+      },
+      "source": [
+        "The penalty is effectively a regularization loss (sometimes called \"weight loss\"). The fact that it is concave with a cusp at zero encourages weight sparsity. The coding scheme applied for compressing the weights, an [Elias gamma code](https://en.wikipedia.org/wiki/Elias_gamma_coding), produces codes of length $ 1 + \\lfloor \\log_2 |x| \\rfloor $ bits for the magnitude of the element. That is, it is matched to the penalty, and applying the penalty thus minimizes the expected code length."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "H1Yt6e1ub6pU"
+      },
+      "outputs": [],
+      "source": [
+        "class PowerLawRegularizer(tf.keras.regularizers.Regularizer):\n",
+        "\n",
+        "  def __init__(self, lmbda):\n",
+        "    super().__init__()\n",
+        "    self.lmbda = lmbda\n",
+        "\n",
+        "  def __call__(self, variable):\n",
+        "    em = tfc.PowerLawEntropyModel(coding_rank=variable.shape.rank)\n",
+        "    return self.lmbda * em.penalty(variable)\n",
+        "\n",
+        "# Normalizing the weight of the penalty by the number of model parameters is a\n",
+        "# good rule of thumb to produce comparable results across models.\n",
+        "regularizer = PowerLawRegularizer(lmbda=2./classifier.count_params())\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kyQc35QTf8Aq"
+      },
+      "source": [
+        "Second, define subclasses of `CustomDense` and `CustomConv2D` which have the following additional functionality:\n",
+        "\n",
+        "- They take an instance of the above regularizer and apply it to the kernels and biases during training.\n",
+        "- They define kernel and bias as a `@property`, which perform quantization with straight-through gradients whenever the variables are accessed. This accurately reflects the computation that is carried out later in the compressed model.\n",
+        "- They define additional `log_step` variables, which represent the logarithm of the quantization step size. The coarser the quantization, the smaller the model size, but the lower the accuracy. The quantization step sizes are trainable for each model parameter, so that performing optimization on the penalized loss function will determine what quantization step size is best.\n",
+        "\n",
+        "The quantization step is defined as follows:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "60fMt3avgSFw"
+      },
+      "outputs": [],
+      "source": [
+        "def quantize(latent, log_step):\n",
+        "  step = tf.exp(log_step)\n",
+        "  return tfc.round_st(latent / step) * step\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "stKrchp7mB0b"
+      },
+      "source": [
+        "With that, we can define the dense layer:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Ciz1F1WsXre_"
+      },
+      "outputs": [],
+      "source": [
+        "class CompressibleDense(CustomDense):\n",
+        "\n",
+        "  def __init__(self, regularizer, *args, **kwargs):\n",
+        "    super().__init__(*args, **kwargs)\n",
+        "    self.regularizer = regularizer\n",
+        "\n",
+        "  def build(self, input_shape, other=None):\n",
+        "    \"\"\"Instantiates weights, optionally initializing them from `other`.\"\"\"\n",
+        "    super().build(input_shape, other=other)\n",
+        "    if other is not None and hasattr(other, \"kernel_log_step\"):\n",
+        "      kernel_log_step = other.kernel_log_step\n",
+        "      bias_log_step = other.bias_log_step\n",
+        "    else:\n",
+        "      kernel_log_step = bias_log_step = -4.\n",
+        "    self.kernel_log_step = tf.Variable(\n",
+        "        tf.cast(kernel_log_step, self.variable_dtype), name=\"kernel_log_step\")\n",
+        "    self.bias_log_step = tf.Variable(\n",
+        "        tf.cast(bias_log_step, self.variable_dtype), name=\"bias_log_step\")\n",
+        "    self.add_loss(lambda: self.regularizer(\n",
+        "        self.kernel_latent / tf.exp(self.kernel_log_step)))\n",
+        "    self.add_loss(lambda: self.regularizer(\n",
+        "        self.bias_latent / tf.exp(self.bias_log_step)))\n",
+        "\n",
+        "  @property\n",
+        "  def kernel(self):\n",
+        "    return quantize(self.kernel_latent, self.kernel_log_step)\n",
+        "\n",
+        "  @kernel.setter\n",
+        "  def kernel(self, kernel):\n",
+        "    self.kernel_latent = tf.Variable(kernel, name=\"kernel_latent\")\n",
+        "\n",
+        "  @property\n",
+        "  def bias(self):\n",
+        "    return quantize(self.bias_latent, self.bias_log_step)\n",
+        "\n",
+        "  @bias.setter\n",
+        "  def bias(self, bias):\n",
+        "    self.bias_latent = tf.Variable(bias, name=\"bias_latent\")\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "CsykbQO0hxzW"
+      },
+      "source": [
+        "The convolutional layer is analogous. In addition, the convolution kernel is stored as its real-valued discrete Fourier transform (RDFT) whenever the kernel is set, and the transform is inverted whenever the kernel is used. Since the different frequency components of the kernel tend to be more or less compressible, each of them gets its own quantization step size assigned.\n",
+        "\n",
+        "Define the Fourier transform and its inverse as follows:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "rUFMKGHDguJS"
+      },
+      "outputs": [],
+      "source": [
+        "def to_rdft(kernel, kernel_size):\n",
+        "  # The kernel has shape (H, W, I, O) -> transpose to take DFT over last two\n",
+        "  # dimensions.\n",
+        "  kernel = tf.transpose(kernel, (2, 3, 0, 1))\n",
+        "  # The RDFT has type complex64 and shape (I, O, FH, FW).\n",
+        "  kernel_rdft = tf.signal.rfft2d(kernel)\n",
+        "  # Map real and imaginary parts into regular floats. The result is float32\n",
+        "  # and has shape (I, O, FH, FW, 2).\n",
+        "  kernel_rdft = tf.stack(\n",
+        "      [tf.math.real(kernel_rdft), tf.math.imag(kernel_rdft)], axis=-1)\n",
+        "  # Divide by kernel size to make the DFT orthonormal (length-preserving).\n",
+        "  return kernel_rdft / kernel_size\n",
+        "\n",
+        "def from_rdft(kernel_rdft, kernel_size):\n",
+        "  # Undoes the transformations in to_rdft.\n",
+        "  kernel_rdft *= kernel_size\n",
+        "  kernel_rdft = tf.dtypes.complex(*tf.unstack(kernel_rdft, axis=-1))\n",
+        "  kernel = tf.signal.irfft2d(kernel_rdft, fft_length=2 * (kernel_size,))\n",
+        "  return tf.transpose(kernel, (2, 3, 0, 1))\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "esZZrJ5ImVDY"
+      },
+      "source": [
+        "With that, define the convolutional layer as:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "YKzXBNCO7bjB"
+      },
+      "outputs": [],
+      "source": [
+        "class CompressibleConv2D(CustomConv2D):\n",
+        "\n",
+        "  def __init__(self, regularizer, *args, **kwargs):\n",
+        "    super().__init__(*args, **kwargs)\n",
+        "    self.regularizer = regularizer\n",
+        "\n",
+        "  def build(self, input_shape, other=None):\n",
+        "    \"\"\"Instantiates weights, optionally initializing them from `other`.\"\"\"\n",
+        "    super().build(input_shape, other=other)\n",
+        "    if other is not None and hasattr(other, \"kernel_log_step\"):\n",
+        "      kernel_log_step = other.kernel_log_step\n",
+        "      bias_log_step = other.bias_log_step\n",
+        "    else:\n",
+        "      kernel_log_step = tf.fill(self.kernel_latent.shape[2:], -4.)\n",
+        "      bias_log_step = -4.\n",
+        "    self.kernel_log_step = tf.Variable(\n",
+        "        tf.cast(kernel_log_step, self.variable_dtype), name=\"kernel_log_step\")\n",
+        "    self.bias_log_step = tf.Variable(\n",
+        "        tf.cast(bias_log_step, self.variable_dtype), name=\"bias_log_step\")\n",
+        "    self.add_loss(lambda: self.regularizer(\n",
+        "        self.kernel_latent / tf.exp(self.kernel_log_step)))\n",
+        "    self.add_loss(lambda: self.regularizer(\n",
+        "        self.bias_latent / tf.exp(self.bias_log_step)))\n",
+        "\n",
+        "  @property\n",
+        "  def kernel(self):\n",
+        "    kernel_rdft = quantize(self.kernel_latent, self.kernel_log_step)\n",
+        "    return from_rdft(kernel_rdft, self.kernel_size)\n",
+        "\n",
+        "  @kernel.setter\n",
+        "  def kernel(self, kernel):\n",
+        "    kernel_rdft = to_rdft(kernel, self.kernel_size)\n",
+        "    self.kernel_latent = tf.Variable(kernel_rdft, name=\"kernel_latent\")\n",
+        "\n",
+        "  @property\n",
+        "  def bias(self):\n",
+        "    return quantize(self.bias_latent, self.bias_log_step)\n",
+        "\n",
+        "  @bias.setter\n",
+        "  def bias(self, bias):\n",
+        "    self.bias_latent = tf.Variable(bias, name=\"bias_latent\")\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "1-ekDDQ9jidI"
+      },
+      "source": [
+        "Define a classifier model with the same architecture as above, but using these modified layers:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "TQgp84L7qalw"
+      },
+      "outputs": [],
+      "source": [
+        "def make_mnist_classifier(regularizer):\n",
+        "  return tf.keras.Sequential([\n",
+        "      CompressibleConv2D(regularizer, 20, 5, strides=2, name=\"conv_1\"),\n",
+        "      CompressibleConv2D(regularizer, 50, 5, strides=2, name=\"conv_2\"),\n",
+        "      tf.keras.layers.Flatten(),\n",
+        "      CompressibleDense(regularizer, 500, name=\"fc_1\"),\n",
+        "      CompressibleDense(regularizer, 10, name=\"fc_2\"),\n",
+        "  ], name=\"classifier\")\n",
+        "\n",
+        "compressible_classifier = make_mnist_classifier(regularizer)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hJ-TMHE1kNFc"
+      },
+      "source": [
+        "And train the model:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "6L5ZJAX4EiXW"
+      },
+      "outputs": [],
+      "source": [
+        "penalized_accuracy = train_model(\n",
+        "    compressible_classifier, training_dataset, validation_dataset)\n",
+        "\n",
+        "print(f\"Accuracy: {penalized_accuracy:0.4f}\")\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ZuE4NeY_kTDz"
+      },
+      "source": [
+        "The compressible model has reached a similar accuracy as the plain classifier.\n",
+        "\n",
+        "However, the model is not actually compressed yet. To do this, we define another set of subclasses which store the kernels and biases in their compressed form – as a sequence of bits."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "AZhj8A2gnBkD"
+      },
+      "source": [
+        "## Compress the classifier\n",
+        "\n",
+        "The subclasses of `CustomDense` and `CustomConv2D` defined below convert the weights of a compressible dense layer into binary strings. In addition, they store the logarithm of the quantization step size at half precision to save space. Whenever the kernel or bias is accessed through the `@property`, they are decompressed from their string representation and dequantized.\n",
+        "\n",
+        "First, define functions to compress and decompress a model parameter:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "xS19FhDajeto"
+      },
+      "outputs": [],
+      "source": [
+        "def compress_latent(latent, log_step, name):\n",
+        "  em = tfc.PowerLawEntropyModel(latent.shape.rank)\n",
+        "  compressed = em.compress(latent / tf.exp(log_step))\n",
+        "  compressed = tf.Variable(compressed, name=f\"{name}_compressed\")\n",
+        "  log_step = tf.cast(log_step, tf.float16)\n",
+        "  log_step = tf.Variable(log_step, name=f\"{name}_log_step\")\n",
+        "  return compressed, log_step\n",
+        "\n",
+        "def decompress_latent(compressed, shape, log_step):\n",
+        "  latent = tfc.PowerLawEntropyModel(len(shape)).decompress(compressed, shape)\n",
+        "  step = tf.exp(tf.cast(log_step, latent.dtype))\n",
+        "  return latent * step\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bPPABE9fjqHJ"
+      },
+      "source": [
+        "With these, we can define `CompressedDense`:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "CnaiNzhgaZ7s"
+      },
+      "outputs": [],
+      "source": [
+        "class CompressedDense(CustomDense):\n",
+        "\n",
+        "  def build(self, input_shape, other=None):\n",
+        "    assert isinstance(other, CompressibleDense)\n",
+        "    self.input_channels = other.kernel.shape[0]\n",
+        "    self.kernel_compressed, self.kernel_log_step = compress_latent(\n",
+        "        other.kernel_latent, other.kernel_log_step, \"kernel\")\n",
+        "    self.bias_compressed, self.bias_log_step = compress_latent(\n",
+        "        other.bias_latent, other.bias_log_step, \"bias\")\n",
+        "    self.built = True\n",
+        "\n",
+        "  @property\n",
+        "  def kernel(self):\n",
+        "    kernel_shape = (self.input_channels, self.filters)\n",
+        "    return decompress_latent(\n",
+        "        self.kernel_compressed, kernel_shape, self.kernel_log_step)\n",
+        "\n",
+        "  @property\n",
+        "  def bias(self):\n",
+        "    bias_shape = (self.filters,)\n",
+        "    return decompress_latent(\n",
+        "        self.bias_compressed, bias_shape, self.bias_log_step)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tzvMCM0El2iW"
+      },
+      "source": [
+        "The convolutional layer class is analogous to the above."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "hS-2ADA6iWeQ"
+      },
+      "outputs": [],
+      "source": [
+        "class CompressedConv2D(CustomConv2D):\n",
+        "\n",
+        "  def build(self, input_shape, other=None):\n",
+        "    assert isinstance(other, CompressibleConv2D)\n",
+        "    self.input_channels = other.kernel.shape[2]\n",
+        "    self.kernel_compressed, self.kernel_log_step = compress_latent(\n",
+        "        other.kernel_latent, other.kernel_log_step, \"kernel\")\n",
+        "    self.bias_compressed, self.bias_log_step = compress_latent(\n",
+        "        other.bias_latent, other.bias_log_step, \"bias\")\n",
+        "    self.built = True\n",
+        "\n",
+        "  @property\n",
+        "  def kernel(self):\n",
+        "    rdft_shape = (self.input_channels, self.filters,\n",
+        "                  self.kernel_size, self.kernel_size // 2 + 1, 2)\n",
+        "    kernel_rdft = decompress_latent(\n",
+        "        self.kernel_compressed, rdft_shape, self.kernel_log_step)\n",
+        "    return from_rdft(kernel_rdft, self.kernel_size)\n",
+        "\n",
+        "  @property\n",
+        "  def bias(self):\n",
+        "    bias_shape = (self.filters,)\n",
+        "    return decompress_latent(\n",
+        "        self.bias_compressed, bias_shape, self.bias_log_step)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "cJLCPoe3l8jG"
+      },
+      "source": [
+        "To turn the compressible model into a compressed one, we can conveniently use the `clone_model` function. `compress_layer` converts any compressible layer into a compressed one, and simply passes through any other types of layers (such as `Flatten`, etc.).\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "WEHroUyhG56m"
+      },
+      "outputs": [],
+      "source": [
+        "def compress_layer(layer):\n",
+        "  if isinstance(layer, CompressibleDense):\n",
+        "    return CompressedDense.copy(layer)\n",
+        "  if isinstance(layer, CompressibleConv2D):\n",
+        "    return CompressedConv2D.copy(layer)\n",
+        "  return type(layer).from_config(layer.get_config())\n",
+        "\n",
+        "compressed_classifier = tf.keras.models.clone_model(\n",
+        "    compressible_classifier, clone_function=compress_layer)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "b3wbN1XQmkDg"
+      },
+      "source": [
+        "Now, let's validate that the compressed model still performs as expected:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "R95kuURITpa9"
+      },
+      "outputs": [],
+      "source": [
+        "compressed_classifier.compile(metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])\n",
+        "_, compressed_accuracy = compressed_classifier.evaluate(validation_dataset.batch(128))\n",
+        "\n",
+        "print(f\"Accuracy of the compressible classifier: {penalized_accuracy:0.4f}\")\n",
+        "print(f\"Accuracy of the compressed classifier: {compressed_accuracy:0.4f}\")\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KtFhpXh6uaIY"
+      },
+      "source": [
+        "The classification accuracy of the compressed model is identical to the one achieved during training!\n",
+        "\n",
+        "In addition, the size of the compressed model weights is much smaller than the original model size:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Qp-ecfuYufbs"
+      },
+      "outputs": [],
+      "source": [
+        "def get_weight_size_in_bytes(weight):\n",
+        "  if weight.dtype == tf.string:\n",
+        "    return tf.reduce_sum(tf.strings.length(weight, unit=\"BYTE\"))\n",
+        "  else:\n",
+        "    return tf.size(weight) * weight.dtype.size\n",
+        "\n",
+        "original_size = sum(map(get_weight_size_in_bytes, classifier.weights))\n",
+        "compressed_size = sum(map(get_weight_size_in_bytes, compressed_classifier.weights))\n",
+        "\n",
+        "print(f\"Size of original model weights: {original_size} bytes\")\n",
+        "print(f\"Size of compressed model weights: {compressed_size} bytes\")\n",
+        "print(f\"Compression ratio: {(original_size/compressed_size):0.0f}x\")\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "K8A8v0df6TR2"
+      },
+      "source": [
+        "Storing the models on disk requires some overhead for storing the model architecture, function graphs, etc.\n",
+        "\n",
+        "Lossless compression methods such as ZIP are good at compressing this type of data, but not the weights themselves. That is why there is still a significant benefit of EPR when counting model size inclusive of that overhead, after also applying ZIP compression:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "4hunDYxH1zqb"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "import shutil\n",
+        "\n",
+        "def get_disk_size(model, path):\n",
+        "  model.save(path)\n",
+        "  zip_path = shutil.make_archive(path, \"zip\", path)\n",
+        "  return os.path.getsize(zip_path)\n",
+        "\n",
+        "original_zip_size = get_disk_size(classifier, \"/tmp/classifier\")\n",
+        "compressed_zip_size = get_disk_size(\n",
+        "    compressed_classifier, \"/tmp/compressed_classifier\")\n",
+        "\n",
+        "print(f\"Original on-disk size (ZIP compressed): {original_zip_size} bytes\")\n",
+        "print(f\"Compressed on-disk size (ZIP compressed): {compressed_zip_size} bytes\")\n",
+        "print(f\"Compression ratio: {(original_zip_size/compressed_zip_size):0.0f}x\")\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "FSITvJrlAhZs"
+      },
+      "source": [
+        "## Regularization effect and size–accuracy trade-off\n",
+        "\n",
+        "Above, the $\\lambda$ hyperparameter was set to 2 (normalized by the number of parameters in the model). As we increase $\\lambda$, the model weights are more and more heavily penalized for compressibility.\n",
+        "\n",
+        "For low values, the penalty can act like a weight regularizer. It actually has a beneficial effect on the generalization performance of the classifier, and can lead to a slightly higher accuracy on the validation dataset:\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "4rhmKu98FdPJ"
+      },
+      "outputs": [],
+      "source": [
+        "#@title\n",
+        "\n",
+        "print(f\"Accuracy of the vanilla classifier: {classifier_accuracy:0.4f}\")\n",
+        "print(f\"Accuracy of the penalized classifier: {penalized_accuracy:0.4f}\")\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9UCfC4LQFdjL"
+      },
+      "source": [
+        "For higher values, we see a smaller and smaller model size, but also a gradually diminishing accuracy. To see this, let's train a few models and plot their size vs. accuracy:\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "diApPKHbAIqa"
+      },
+      "outputs": [],
+      "source": [
+        "def compress_and_evaluate_model(lmbda):\n",
+        "  print(f\"lambda={lmbda:0.0f}: training...\", flush=True)\n",
+        "  regularizer = PowerLawRegularizer(lmbda=lmbda/classifier.count_params())\n",
+        "  compressible_classifier = make_mnist_classifier(regularizer)\n",
+        "  train_model(\n",
+        "      compressible_classifier, training_dataset, validation_dataset, verbose=0)\n",
+        "  print(\"compressing...\", flush=True)\n",
+        "  compressed_classifier = tf.keras.models.clone_model(\n",
+        "      compressible_classifier, clone_function=compress_layer)\n",
+        "  compressed_size = sum(map(\n",
+        "      get_weight_size_in_bytes, compressed_classifier.weights))\n",
+        "  compressed_zip_size = float(get_disk_size(\n",
+        "      compressed_classifier, \"/tmp/compressed_classifier\"))\n",
+        "  print(\"evaluating...\", flush=True)\n",
+        "  compressed_classifier = tf.keras.models.load_model(\n",
+        "      \"/tmp/compressed_classifier\")\n",
+        "  compressed_classifier.compile(\n",
+        "      metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])\n",
+        "  _, compressed_accuracy = compressed_classifier.evaluate(\n",
+        "      validation_dataset.batch(128), verbose=0)\n",
+        "  print()\n",
+        "  return compressed_size, compressed_zip_size, compressed_accuracy\n",
+        "\n",
+        "lambdas = (2., 5., 10., 20., 50.)\n",
+        "metrics = [compress_and_evaluate_model(l) for l in lambdas]\n",
+        "metrics = tf.convert_to_tensor(metrics, tf.float32)\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "bhAi85KzGqTz"
+      },
+      "outputs": [],
+      "source": [
+        "#@title\n",
+        "\n",
+        "def plot_broken_xaxis(ax, compressed_sizes, original_size, original_accuracy):\n",
+        "  xticks = list(range(\n",
+        "      int(tf.math.floor(min(compressed_sizes) / 5) * 5),\n",
+        "      int(tf.math.ceil(max(compressed_sizes) / 5) * 5) + 1,\n",
+        "      5))\n",
+        "  xticks.append(xticks[-1] + 10)\n",
+        "  ax.set_xlim(xticks[0], xticks[-1] + 2)\n",
+        "  ax.set_xticks(xticks[1:])\n",
+        "  ax.set_xticklabels(xticks[1:-1] + [f\"{original_size:0.2f}\"])\n",
+        "  ax.plot(xticks[-1], original_accuracy, \"o\", label=\"float32\")\n",
+        "\n",
+        "sizes, zip_sizes, accuracies = tf.transpose(metrics)\n",
+        "sizes /= 1024\n",
+        "zip_sizes /= 1024\n",
+        "\n",
+        "fig, (axl, axr) = plt.subplots(1, 2, sharey=True, figsize=(10, 4))\n",
+        "axl.plot(sizes, accuracies, \"o-\", label=\"EPR compressed\")\n",
+        "axr.plot(zip_sizes, accuracies, \"o-\", label=\"EPR compressed\")\n",
+        "plot_broken_xaxis(axl, sizes, original_size/1024, classifier_accuracy)\n",
+        "plot_broken_xaxis(axr, zip_sizes, original_zip_size/1024, classifier_accuracy)\n",
+        "\n",
+        "axl.set_xlabel(\"size of model weights [kbytes]\")\n",
+        "axr.set_xlabel(\"ZIP compressed on-disk model size [kbytes]\")\n",
+        "axl.set_ylabel(\"accuracy\")\n",
+        "axl.legend(loc=\"lower right\")\n",
+        "axr.legend(loc=\"lower right\")\n",
+        "axl.grid()\n",
+        "axr.grid()\n",
+        "for i in range(len(lambdas)):\n",
+        "  axl.annotate(f\"$\\lambda = {lambdas[i]:0.0f}$\", (sizes[i], accuracies[i]),\n",
+        "               xytext=(10, -5), xycoords=\"data\", textcoords=\"offset points\")\n",
+        "  axr.annotate(f\"$\\lambda = {lambdas[i]:0.0f}$\", (zip_sizes[i], accuracies[i]),\n",
+        "               xytext=(10, -5), xycoords=\"data\", textcoords=\"offset points\")\n",
+        "plt.tight_layout()\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ajrHaFTAaLd2"
+      },
+      "source": [
+        "The plot should ideally show an elbow-shaped size–accuracy trade-off, but it is normal for accuracy metrics to be somewhat noisy. Depending on initialization, the curve can exhibit some kinks.\n",
+        "\n",
+        "Due to the regularization effect, the EPR compressed model is more accurate on the test set than the original model for small values of $\\lambda$. The EPR compressed model is also many times smaller, even if we compare the sizes after additional ZIP compression."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-RBhdXZTzoWw"
+      },
+      "source": [
+        "## Decompress the classifier\n",
+        "\n",
+        "`CompressedDense` and `CompressedConv2D` decompress their weights on every forward pass. This makes them ideal for memory-limited devices, but the decompression can be computationally expensive, especially for small batch sizes.\n",
+        "\n",
+        "To decompress the model once, and use it for further training or inference, we can convert it back into a model using regular or compressible layers. This can be useful in model deployment or federated learning scenarios.\n",
+        "\n",
+        "First, converting back into a plain model, we can do inference, and/or continue regular training without a compression penalty:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "QBB2-X5XzvwB"
+      },
+      "outputs": [],
+      "source": [
+        "def decompress_layer(layer):\n",
+        "  if isinstance(layer, CompressedDense):\n",
+        "    return CustomDense.copy(layer)\n",
+        "  if isinstance(layer, CompressedConv2D):\n",
+        "    return CustomConv2D.copy(layer)\n",
+        "  return type(layer).from_config(layer.get_config())\n",
+        "\n",
+        "decompressed_classifier = tf.keras.models.clone_model(\n",
+        "    compressed_classifier, clone_function=decompress_layer)\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ehE2ov8U0p0G"
+      },
+      "outputs": [],
+      "source": [
+        "decompressed_accuracy = train_model(\n",
+        "    decompressed_classifier, training_dataset, validation_dataset, epochs=1)\n",
+        "\n",
+        "print(f\"Accuracy of the compressed classifier: {compressed_accuracy:0.4f}\")\n",
+        "print(f\"Accuracy of the decompressed classifier after one more epoch of training: {decompressed_accuracy:0.4f}\")\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jiSCvemQ04o8"
+      },
+      "source": [
+        "Note that the validation accuracy drops after training for an additional epoch, since the training is done without regularization.\n",
+        "\n",
+        "Alternatively, we can convert the model back into a \"compressible\" one, for inference and/or further training with a compression penalty:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "JDppVUdx1BvY"
+      },
+      "outputs": [],
+      "source": [
+        "def decompress_layer_with_penalty(layer):\n",
+        "  if isinstance(layer, CompressedDense):\n",
+        "    return CompressibleDense.copy(layer, regularizer=regularizer)\n",
+        "  if isinstance(layer, CompressedConv2D):\n",
+        "    return CompressibleConv2D.copy(layer, regularizer=regularizer)\n",
+        "  return type(layer).from_config(layer.get_config())\n",
+        "\n",
+        "decompressed_classifier = tf.keras.models.clone_model(\n",
+        "    compressed_classifier, clone_function=decompress_layer_with_penalty)\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "AJcnyOFW2IcK"
+      },
+      "outputs": [],
+      "source": [
+        "decompressed_accuracy = train_model(\n",
+        "    decompressed_classifier, training_dataset, validation_dataset, epochs=1)\n",
+        "\n",
+        "print(f\"Accuracy of the compressed classifier: {compressed_accuracy:0.4f}\")\n",
+        "print(f\"Accuracy of the decompressed classifier after one more epoch of training: {decompressed_accuracy:0.4f}\")\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ciol315T_TwQ"
+      },
+      "source": [
+        "Here, the accuracy improves after training for an additional epoch."
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [
+        "Tce3stUlHN0L",
+        "xHxb-dlhMIzW"
+      ],
+      "name": "compression.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/tutorials/quickstart/advanced.ipynb b/site/en/tutorials/quickstart/advanced.ipynb
index 5fcbedb3ab5..7cc134b2613 100644
--- a/site/en/tutorials/quickstart/advanced.ipynb
+++ b/site/en/tutorials/quickstart/advanced.ipynb
@@ -165,7 +165,7 @@
         "id": "BPZ68wASog_I"
       },
       "source": [
-        "Build the `tf.keras` model using the Keras [model subclassing API](https://www.tensorflow.org/guide/keras#model_subclassing):"
+        "Build the `tf.keras` model using the Keras [model subclassing API](https://www.tensorflow.org/guide/keras/custom_layers_and_models):"
       ]
     },
     {
@@ -178,7 +178,7 @@
       "source": [
         "class MyModel(Model):\n",
         "  def __init__(self):\n",
-        "    super(MyModel, self).__init__()\n",
+        "    super().__init__()\n",
         "    self.conv1 = Conv2D(32, 3, activation='relu')\n",
         "    self.flatten = Flatten()\n",
         "    self.d1 = Dense(128, activation='relu')\n",
@@ -200,7 +200,7 @@
         "id": "uGih-c2LgbJu"
       },
       "source": [
-        "Choose an optimizer and loss function for training: "
+        "Choose an optimizer and loss function for training:"
       ]
     },
     {
@@ -311,10 +311,10 @@
         "\n",
         "for epoch in range(EPOCHS):\n",
         "  # Reset the metrics at the start of the next epoch\n",
-        "  train_loss.reset_states()\n",
-        "  train_accuracy.reset_states()\n",
-        "  test_loss.reset_states()\n",
-        "  test_accuracy.reset_states()\n",
+        "  train_loss.reset_state()\n",
+        "  train_accuracy.reset_state()\n",
+        "  test_loss.reset_state()\n",
+        "  test_accuracy.reset_state()\n",
         "\n",
         "  for images, labels in train_ds:\n",
         "    train_step(images, labels)\n",
@@ -324,10 +324,10 @@
         "\n",
         "  print(\n",
         "    f'Epoch {epoch + 1}, '\n",
-        "    f'Loss: {train_loss.result()}, '\n",
-        "    f'Accuracy: {train_accuracy.result() * 100}, '\n",
-        "    f'Test Loss: {test_loss.result()}, '\n",
-        "    f'Test Accuracy: {test_accuracy.result() * 100}'\n",
+        "    f'Loss: {train_loss.result():0.2f}, '\n",
+        "    f'Accuracy: {train_accuracy.result() * 100:0.2f}, '\n",
+        "    f'Test Loss: {test_loss.result():0.2f}, '\n",
+        "    f'Test Accuracy: {test_accuracy.result() * 100:0.2f}'\n",
         "  )"
       ]
     },
@@ -344,8 +344,8 @@
   "metadata": {
     "accelerator": "GPU",
     "colab": {
-      "collapsed_sections": [],
       "name": "advanced.ipynb",
+      "provenance": [],
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/tutorials/quickstart/beginner.ipynb b/site/en/tutorials/quickstart/beginner.ipynb
index b3e77ff8add..6a7811cfde4 100644
--- a/site/en/tutorials/quickstart/beginner.ipynb
+++ b/site/en/tutorials/quickstart/beginner.ipynb
@@ -70,9 +70,10 @@
       "source": [
         "This short introduction uses [Keras](https://www.tensorflow.org/guide/keras/overview) to:\n",
         "\n",
-        "1. Build a neural network that classifies images.\n",
+        "1. Load a prebuilt dataset.\n",
+        "1. Build a neural network machine learning model that classifies images.\n",
         "2. Train this neural network.\n",
-        "3. And, finally, evaluate the accuracy of the model."
+        "3. Evaluate the accuracy of the model."
       ]
     },
     {
@@ -81,10 +82,12 @@
         "id": "hiH7AC-NTniF"
       },
       "source": [
-        "This is a [Google Colaboratory](https://colab.research.google.com/notebooks/welcome.ipynb) notebook file. Python programs are run directly in the browser—a great way to learn and use TensorFlow. To follow this tutorial, run the notebook in Google Colab by clicking the button at the top of this page.\n",
+        "This tutorial is a [Google Colaboratory](https://colab.research.google.com/notebooks/welcome.ipynb) notebook. Python programs are run directly in the browser—a great way to learn and use TensorFlow. To follow this tutorial, run the notebook in Google Colab by clicking the button at the top of this page.\n",
         "\n",
         "1. In Colab, connect to a Python runtime: At the top-right of the menu bar, select *CONNECT*.\n",
-        "2. Run all the notebook code cells: Select *Runtime* > *Run all*."
+        "2. To run all the code in the notebook, select **Runtime** > **Run all**. To run the code cells one at a time, hover over each cell and select the **Run cell** icon.\n",
+        "\n",
+        "![Run cell icon](images/beginner/run_cell_icon.png)"
       ]
     },
     {
@@ -93,9 +96,9 @@
         "id": "nnrWf3PCEzXL"
       },
       "source": [
-        "Download and install TensorFlow 2. Import TensorFlow into your program:\n",
+        "## Set up TensorFlow\n",
         "\n",
-        "Note: Upgrade `pip` to install the TensorFlow 2 package. See the [install guide](https://www.tensorflow.org/install) for details."
+        "Import TensorFlow into your program to get started:"
       ]
     },
     {
@@ -116,7 +119,13 @@
         "id": "7NAbSZiaoJ4z"
       },
       "source": [
-        "Load and prepare the [MNIST dataset](http://yann.lecun.com/exdb/mnist/). Convert the samples from integers to floating-point numbers:"
+        "If you are following along in your own development environment, rather than [Colab](https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/quickstart/beginner.ipynb), see the [install guide](https://www.tensorflow.org/install) for setting up TensorFlow for development.\n",
+        "\n",
+        "Note: Make sure you have upgraded to the latest `pip` to install the TensorFlow 2 package if you are using your own development environment. See the [install guide](https://www.tensorflow.org/install) for details.\n",
+        "\n",
+        "## Load a dataset\n",
+        "\n",
+        "Load and prepare the MNIST dataset. The pixel values of the images range from 0 through 255. Scale these values to a range of 0 to 1 by dividing the values by `255.0`. This also converts the sample data from integers to floating-point numbers:"
       ]
     },
     {
@@ -139,7 +148,9 @@
         "id": "BPZ68wASog_I"
       },
       "source": [
-        "Build the `tf.keras.Sequential` model by stacking layers. Choose an optimizer and loss function for training:"
+        "## Build a machine learning model\n",
+        "\n",
+        "Build a `tf.keras.Sequential` model:"
       ]
     },
     {
@@ -164,7 +175,9 @@
         "id": "l2hiez2eIUz8"
       },
       "source": [
-        "For each example the model returns a vector of \"[logits](https://developers.google.com/machine-learning/glossary#logits)\" or \"[log-odds](https://developers.google.com/machine-learning/glossary#log-odds)\" scores, one for each class."
+        "[`Sequential`](https://www.tensorflow.org/guide/keras/sequential_model) is useful for stacking layers where each layer has one input [tensor](https://www.tensorflow.org/guide/tensor) and one output tensor. Layers are functions with a known mathematical structure that can be reused and have trainable variables. Most TensorFlow models are composed of layers. This model uses the [`Flatten`](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Flatten), [`Dense`](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dense), and [`Dropout`](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dropout) layers.\n",
+        "\n",
+        "For each example, the model returns a vector of [logits](https://developers.google.com/machine-learning/glossary#logits) or [log-odds](https://developers.google.com/machine-learning/glossary#log-odds) scores, one for each class."
       ]
     },
     {
@@ -185,7 +198,7 @@
         "id": "tgjhDQGcIniO"
       },
       "source": [
-        "The `tf.nn.softmax` function converts these logits to \"probabilities\" for each class: "
+        "The `tf.nn.softmax` function converts these logits to *probabilities* for each class: "
       ]
     },
     {
@@ -205,8 +218,7 @@
         "id": "he5u_okAYS4a"
       },
       "source": [
-        "Note: It is possible to bake this `tf.nn.softmax` in as the activation function for the last layer of the network. While this can make the model output more directly interpretable, this approach is discouraged as it's impossible to\n",
-        "provide an exact and numerically stable loss calculation for all models when using a softmax output. "
+        "Note: It is possible to bake the `tf.nn.softmax` function into the activation function for the last layer of the network. While this can make the model output more directly interpretable, this approach is discouraged as it's impossible to provide an exact and numerically stable loss calculation for all models when using a softmax output. "
       ]
     },
     {
@@ -215,7 +227,7 @@
         "id": "hQyugpgRIyrA"
       },
       "source": [
-        "The `losses.SparseCategoricalCrossentropy` loss takes a vector of logits and a `True` index and returns a scalar loss for each example."
+        "Define a loss function for training using `losses.SparseCategoricalCrossentropy`:"
       ]
     },
     {
@@ -235,8 +247,7 @@
         "id": "SfR4MsSDU880"
       },
       "source": [
-        "This loss is equal to the negative log probability of the true class:\n",
-        "It is zero if the model is sure of the correct class.\n",
+        "The loss function takes a vector of ground truth values and a vector of logits and returns a scalar loss for each example. This loss is equal to the negative log probability of the true class: The loss is zero if the model is sure of the correct class.\n",
         "\n",
         "This untrained model gives probabilities close to random (1/10 for each class), so the initial loss should be close to `-tf.math.log(1/10) ~= 2.3`."
       ]
@@ -252,6 +263,15 @@
         "loss_fn(y_train[:1], predictions).numpy()"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ada44eb947d4"
+      },
+      "source": [
+        "Before you start training, configure and compile the model using Keras `Model.compile`. Set the [`optimizer`](https://www.tensorflow.org/api_docs/python/tf/keras/optimizers) class to `adam`, set the `loss` to the `loss_fn` function you defined earlier, and specify a metric to be evaluated for the model by setting the `metrics` parameter to `accuracy`."
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -271,7 +291,9 @@
         "id": "ix4mEL65on-w"
       },
       "source": [
-        "The `Model.fit` method adjusts the model parameters to minimize the loss: "
+        "## Train and evaluate your model\n",
+        "\n",
+        "Use the `Model.fit` method to adjust your model parameters and minimize the loss: "
       ]
     },
     {
@@ -291,7 +313,7 @@
         "id": "4mDAAPFqVVgn"
       },
       "source": [
-        "The `Model.evaluate` method checks the models performance, usually on a \"[Validation-set](https://developers.google.com/machine-learning/glossary#validation-set)\" or \"[Test-set](https://developers.google.com/machine-learning/glossary#test-set)\"."
+        "The `Model.evaluate` method checks the model's performance, usually on a [validation set](https://developers.google.com/machine-learning/glossary#validation-set) or [test set](https://developers.google.com/machine-learning/glossary#test-set)."
       ]
     },
     {
@@ -347,13 +369,23 @@
       "source": [
         "probability_model(x_test[:5])"
       ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-47O6_GLdRuT"
+      },
+      "source": [
+        "## Conclusion\n",
+        "\n",
+        "Congratulations! You have trained a machine learning model using a prebuilt dataset using the [Keras](https://www.tensorflow.org/guide/keras/overview) API.\n",
+        "\n",
+        "For more examples of using Keras, check out the [tutorials](https://www.tensorflow.org/tutorials/keras/). To learn more about building models with Keras, read the [guides](https://www.tensorflow.org/guide/keras). If you want learn more about loading and preparing data, see the tutorials on [image data loading](https://www.tensorflow.org/tutorials/load_data/images) or [CSV data loading](https://www.tensorflow.org/tutorials/load_data/csv).\n"
+      ]
     }
   ],
   "metadata": {
     "colab": {
-      "collapsed_sections": [
-        "rX8mhOLljYeM"
-      ],
       "name": "beginner.ipynb",
       "toc_visible": true
     },
diff --git a/site/en/tutorials/quickstart/images/beginner/run_cell_icon.png b/site/en/tutorials/quickstart/images/beginner/run_cell_icon.png
new file mode 100644
index 00000000000..92f811bb95d
Binary files /dev/null and b/site/en/tutorials/quickstart/images/beginner/run_cell_icon.png differ
diff --git a/site/en/tutorials/reinforcement_learning/actor_critic.ipynb b/site/en/tutorials/reinforcement_learning/actor_critic.ipynb
index 39fc1015b74..a13c3fe44e4 100644
--- a/site/en/tutorials/reinforcement_learning/actor_critic.ipynb
+++ b/site/en/tutorials/reinforcement_learning/actor_critic.ipynb
@@ -37,7 +37,7 @@
         "id": "p62G8M_viUJp"
       },
       "source": [
-        "# Playing CartPole with the Actor-Critic Method\n"
+        "# Playing CartPole with the Actor-Critic method\n"
       ]
     },
     {
@@ -74,8 +74,8 @@
         "id": "kFgN7h_wiUJq"
       },
       "source": [
-        "This tutorial demonstrates how to implement the [Actor-Critic](https://papers.nips.cc/paper/1786-actor-critic-algorithms.pdf) method using TensorFlow to train an agent on the [Open AI Gym](https://gym.openai.com/) CartPole-V0 environment.\n",
-        "The reader is assumed to have some familiarity with [policy gradient methods](https://papers.nips.cc/paper/1713-policy-gradient-methods-for-reinforcement-learning-with-function-approximation.pdf) of reinforcement learning. \n"
+        "This tutorial demonstrates how to implement the [Actor-Critic](https://papers.nips.cc/paper/1786-actor-critic-algorithms.pdf) method using TensorFlow to train an agent on the [Open AI Gym](https://www.gymlibrary.dev/) [`CartPole-v0`](https://www.gymlibrary.dev/environments/classic_control/cart_pole/) environment.\n",
+        "The reader is assumed to have some familiarity with [policy gradient methods](https://papers.nips.cc/paper/1713-policy-gradient-methods-for-reinforcement-learning-with-function-approximation.pdf) of [(deep) reinforcement learning](https://en.wikipedia.org/wiki/Deep_reinforcement_learning).\n"
       ]
     },
     {
@@ -86,7 +86,7 @@
       "source": [
         "**Actor-Critic methods**\n",
         "\n",
-        "Actor-Critic methods are [temporal difference (TD) learning](https://en.wikipedia.org/wiki/Temporal_difference_learning) methods that represent the policy function independent of the value function. \n",
+        "Actor-Critic methods are [temporal difference (TD) learning](https://en.wikipedia.org/wiki/Temporal_difference_learning) methods that represent the policy function independent of the value function.\n",
         "\n",
         "A policy function (or policy) returns a probability distribution over actions that the agent can take based on the given state.\n",
         "A value function determines the expected return for an agent starting at a given state and acting according to a particular policy forever after.\n",
@@ -102,16 +102,16 @@
         "id": "rBfiafKSRs2k"
       },
       "source": [
-        "**CartPole-v0**\n",
+        "**`CartPole-v0`**\n",
         "\n",
-        "In the [CartPole-v0 environment](https://gym.openai.com/envs/CartPole-v0), a pole is attached to a cart moving along a frictionless track. \n",
-        "The pole starts upright and the goal of the agent is to prevent it from falling over by applying a force of -1 or +1 to the cart. \n",
-        "A reward of +1 is given for every time step the pole remains upright.\n",
-        "An episode ends when (1) the pole is more than 15 degrees from vertical or (2) the cart moves more than 2.4 units from the center.\n",
+        "In the [`CartPole-v0` environment](https://www.gymlibrary.dev/environments/classic_control/cart_pole/), a pole is attached to a cart moving along a frictionless track.\n",
+        "The pole starts upright and the goal of the agent is to prevent it from falling over by applying a force of `-1` or `+1` to the cart.\n",
+        "A reward of `+1` is given for every time step the pole remains upright.\n",
+        "An episode ends when: 1) the pole is more than 15 degrees from vertical; or 2) the cart moves more than 2.4 units from the center.\n",
         "\n",
         "<center>\n",
         "  <figure>\n",
-        "    <image src=\"images/cartpole-v0.gif\">\n",
+        "    <image src=\"https://tensorflow.org/tutorials/reinforcement_learning/images/cartpole-v0.gif\">\n",
         "    <figcaption>\n",
         "      Trained actor-critic model in Cartpole-v0 environment\n",
         "    </figcaption>\n",
@@ -147,7 +147,7 @@
       },
       "outputs": [],
       "source": [
-        "!pip install gym\n",
+        "!pip install gym[classic_control]\n",
         "!pip install pyglet"
       ]
     },
@@ -161,8 +161,7 @@
       "source": [
         "%%bash\n",
         "# Install additional packages for visualization\n",
-        "sudo apt-get install -y xvfb python-opengl > /dev/null 2>&1\n",
-        "pip install pyvirtualdisplay > /dev/null 2>&1\n",
+        "sudo apt-get install -y python-opengl > /dev/null 2>&1\n",
         "pip install git+https://github.com/tensorflow/docs > /dev/null 2>&1"
       ]
     },
@@ -187,11 +186,10 @@
         "\n",
         "\n",
         "# Create the environment\n",
-        "env = gym.make(\"CartPole-v0\")\n",
+        "env = gym.make(\"CartPole-v1\")\n",
         "\n",
         "# Set seed for experiment reproducibility\n",
         "seed = 42\n",
-        "env.seed(seed)\n",
         "tf.random.set_seed(seed)\n",
         "np.random.seed(seed)\n",
         "\n",
@@ -205,15 +203,15 @@
         "id": "AOUCe2D0iUJu"
       },
       "source": [
-        "## Model\n",
+        "## The model\n",
         "\n",
-        "The *Actor* and *Critic* will be modeled using one neural network that generates the action probabilities and critic value respectively. This tutorial uses model subclassing to define the model. \n",
+        "The *Actor* and *Critic* will be modeled using one neural network that generates the action probabilities and Critic value respectively. This tutorial uses model subclassing to define the model.\n",
         "\n",
         "During the forward pass, the model will take in the state as the input and will output both action probabilities and critic value $V$, which models the state-dependent [value function](https://spinningup.openai.com/en/latest/spinningup/rl_intro.html#value-functions). The goal is to train a model that chooses actions based on a policy $\\pi$ that maximizes expected [return](https://spinningup.openai.com/en/latest/spinningup/rl_intro.html#reward-and-return).\n",
         "\n",
-        "For Cartpole-v0, there are four values representing the state: cart position, cart-velocity, pole angle and pole velocity respectively. The agent can take two actions to push the cart left (0) and right (1) respectively.\n",
+        "For `CartPole-v0`, there are four values representing the state: cart position, cart-velocity, pole angle and pole velocity respectively. The agent can take two actions to push the cart left (`0`) and right (`1`), respectively.\n",
         "\n",
-        "Refer to [OpenAI Gym's CartPole-v0 wiki page](http://www.derongliu.org/adp/adp-cdrom/Barto1983.pdf) for more information.\n"
+        "Refer to [Gym's Cart Pole documentation page](https://www.gymlibrary.dev/environments/classic_control/cart_pole/) and [_Neuronlike adaptive elements that can solve difficult learning control problems_](http://www.derongliu.org/adp/adp-cdrom/Barto1983.pdf) by Barto, Sutton and Anderson (1983) for more information.\n"
       ]
     },
     {
@@ -228,8 +226,8 @@
         "  \"\"\"Combined actor-critic network.\"\"\"\n",
         "\n",
         "  def __init__(\n",
-        "      self, \n",
-        "      num_actions: int, \n",
+        "      self,\n",
+        "      num_actions: int,\n",
         "      num_hidden_units: int):\n",
         "    \"\"\"Initialize.\"\"\"\n",
         "    super().__init__()\n",
@@ -263,13 +261,13 @@
         "id": "hk92njFziUJw"
       },
       "source": [
-        "## Training\n",
+        "## Train the agent\n",
         "\n",
         "To train the agent, you will follow these steps:\n",
         "\n",
         "1. Run the agent on the environment to collect training data per episode.\n",
         "2. Compute expected return at each time step.\n",
-        "3. Compute the loss for the combined actor-critic model.\n",
+        "3. Compute the loss for the combined Actor-Critic model.\n",
         "4. Compute gradients and update network parameters.\n",
         "5. Repeat 1-4 until either success criterion or max episodes has been reached.\n"
       ]
@@ -280,7 +278,7 @@
         "id": "R2nde2XDs8Gh"
       },
       "source": [
-        "### 1. Collecting training data\n",
+        "### 1. Collect training data\n",
         "\n",
         "As in supervised learning, in order to train the actor-critic model, you need\n",
         "to have training data. However, in order to collect such data, the model would\n",
@@ -301,21 +299,17 @@
       },
       "outputs": [],
       "source": [
-        "# Wrap OpenAI Gym's `env.step` call as an operation in a TensorFlow function.\n",
+        "# Wrap Gym's `env.step` call as an operation in a TensorFlow function.\n",
         "# This would allow it to be included in a callable TensorFlow graph.\n",
         "\n",
+        "@tf.numpy_function(Tout=[tf.float32, tf.int32, tf.int32])\n",
         "def env_step(action: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:\n",
         "  \"\"\"Returns state, reward and done flag given an action.\"\"\"\n",
         "\n",
-        "  state, reward, done, _ = env.step(action)\n",
-        "  return (state.astype(np.float32), \n",
-        "          np.array(reward, np.int32), \n",
-        "          np.array(done, np.int32))\n",
-        "\n",
-        "\n",
-        "def tf_env_step(action: tf.Tensor) -> List[tf.Tensor]:\n",
-        "  return tf.numpy_function(env_step, [action], \n",
-        "                           [tf.float32, tf.int32, tf.int32])"
+        "  state, reward, done, truncated, info = env.step(action)\n",
+        "  return (state.astype(np.float32),\n",
+        "          np.array(reward, np.int32),\n",
+        "          np.array(done, np.int32))\n"
       ]
     },
     {
@@ -327,8 +321,8 @@
       "outputs": [],
       "source": [
         "def run_episode(\n",
-        "    initial_state: tf.Tensor,  \n",
-        "    model: tf.keras.Model, \n",
+        "    initial_state: tf.Tensor,\n",
+        "    model: tf.keras.Model,\n",
         "    max_steps: int) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:\n",
         "  \"\"\"Runs a single episode to collect training data.\"\"\"\n",
         "\n",
@@ -342,10 +336,10 @@
         "  for t in tf.range(max_steps):\n",
         "    # Convert state into a batched tensor (batch size = 1)\n",
         "    state = tf.expand_dims(state, 0)\n",
-        "  \n",
+        "\n",
         "    # Run the model and to get action probabilities and critic value\n",
         "    action_logits_t, value = model(state)\n",
-        "  \n",
+        "\n",
         "    # Sample next action from the action probability distribution\n",
         "    action = tf.random.categorical(action_logits_t, 1)[0, 0]\n",
         "    action_probs_t = tf.nn.softmax(action_logits_t)\n",
@@ -355,11 +349,11 @@
         "\n",
         "    # Store log probability of the action chosen\n",
         "    action_probs = action_probs.write(t, action_probs_t[0, action])\n",
-        "  \n",
+        "\n",
         "    # Apply action to the environment to get next state and reward\n",
-        "    state, reward, done = tf_env_step(action)\n",
+        "    state, reward, done = env_step(action)\n",
         "    state.set_shape(initial_state_shape)\n",
-        "  \n",
+        "\n",
         "    # Store reward\n",
         "    rewards = rewards.write(t, reward)\n",
         "\n",
@@ -369,7 +363,7 @@
         "  action_probs = action_probs.stack()\n",
         "  values = values.stack()\n",
         "  rewards = rewards.stack()\n",
-        "  \n",
+        "\n",
         "  return action_probs, values, rewards"
       ]
     },
@@ -379,7 +373,7 @@
         "id": "lBnIHdz22dIx"
       },
       "source": [
-        "### 2. Computing expected returns\n",
+        "### 2. Compute the expected returns\n",
         "\n",
         "The sequence of rewards for each timestep $t$, $\\{r_{t}\\}^{T}_{t=1}$ collected during one episode is converted into a sequence of expected returns $\\{G_{t}\\}^{T}_{t=1}$ in which the sum of rewards is taken from the current timestep $t$ to $T$ and each reward is multiplied with an exponentially decaying discount factor $\\gamma$:\n",
         "\n",
@@ -401,8 +395,8 @@
       "outputs": [],
       "source": [
         "def get_expected_return(\n",
-        "    rewards: tf.Tensor, \n",
-        "    gamma: float, \n",
+        "    rewards: tf.Tensor,\n",
+        "    gamma: float,\n",
         "    standardize: bool = True) -> tf.Tensor:\n",
         "  \"\"\"Compute expected returns per timestep.\"\"\"\n",
         "\n",
@@ -422,7 +416,7 @@
         "  returns = returns.stack()[::-1]\n",
         "\n",
         "  if standardize:\n",
-        "    returns = ((returns - tf.math.reduce_mean(returns)) / \n",
+        "    returns = ((returns - tf.math.reduce_mean(returns)) /\n",
         "               (tf.math.reduce_std(returns) + eps))\n",
         "\n",
         "  return returns"
@@ -431,46 +425,67 @@
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "1hrPLrgGxlvb"
+        "id": "qhr50_Czxazw"
       },
       "source": [
-        "### 3. The actor-critic loss\n",
-        "\n",
-        "Since a hybrid actor-critic model is used, the chosen loss function is a combination of actor and critic losses for training, as shown below:\n",
+        "### 3. The Actor-Critic loss\n",
         "\n",
-        "$$L = L_{actor} + L_{critic}$$\n",
+        "Since you're using a hybrid Actor-Critic model, the chosen loss function is a combination of Actor and Critic losses for training, as shown below:\n",
         "\n",
-        "#### Actor loss\n",
+        "$$L = L_{actor} + L_{critic}$$"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nOQIJuG1xdTH"
+      },
+      "source": [
+        "#### The Actor loss\n",
         "\n",
-        "The actor loss is based on [policy gradients with the critic as a state dependent baseline](https://www.youtube.com/watch?v=EKqxumCuAAY&t=62m23s) and computed with single-sample (per-episode) estimates.\n",
+        "The Actor loss is based on [policy gradients with the Critic as a state dependent baseline](https://www.youtube.com/watch?v=EKqxumCuAAY&t=62m23s) and computed with single-sample (per-episode) estimates.\n",
         "\n",
-        "$$L_{actor} = -\\sum^{T}_{t=1} log\\pi_{\\theta}(a_{t} | s_{t})[G(s_{t}, a_{t})  - V^{\\pi}_{\\theta}(s_{t})]$$\n",
+        "$$L_{actor} = -\\sum^{T}_{t=1} \\log\\pi_{\\theta}(a_{t} | s_{t})[G(s_{t}, a_{t})  - V^{\\pi}_{\\theta}(s_{t})]$$\n",
         "\n",
         "where:\n",
         "- $T$: the number of timesteps per episode, which can vary per episode\n",
         "- $s_{t}$: the state at timestep $t$\n",
         "- $a_{t}$: chosen action at timestep $t$ given state $s$\n",
-        "- $\\pi_{\\theta}$: is the policy (actor) parameterized by $\\theta$\n",
-        "- $V^{\\pi}_{\\theta}$: is the value function (critic) also parameterized by $\\theta$\n",
+        "- $\\pi_{\\theta}$: is the policy (Actor) parameterized by $\\theta$\n",
+        "- $V^{\\pi}_{\\theta}$: is the value function (Critic) also parameterized by $\\theta$\n",
         "- $G = G_{t}$: the expected return for a given state, action pair at timestep $t$\n",
         "\n",
         "A negative term is added to the sum since the idea is to maximize the probabilities of actions yielding higher rewards by minimizing the combined loss.\n",
         "\n",
-        "<br>\n",
-        "\n",
-        "##### Advantage\n",
+        "<br>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Y304O4OAxiAv"
+      },
+      "source": [
+        "##### The Advantage\n",
         "\n",
-        "The $G - V$ term in our $L_{actor}$ formulation is called the [advantage](https://spinningup.openai.com/en/latest/spinningup/rl_intro.html#advantage-functions), which indicates how much better an action is given a particular state over a random action selected according to the policy $\\pi$ for that state.\n",
+        "The $G - V$ term in our $L_{actor}$ formulation is called the [Advantage](https://spinningup.openai.com/en/latest/spinningup/rl_intro.html#advantage-functions), which indicates how much better an action is given a particular state over a random action selected according to the policy $\\pi$ for that state.\n",
         "\n",
         "While it's possible to exclude a baseline, this may result in high variance during training. And the nice thing about choosing the critic $V$ as a baseline is that it trained to be as close as possible to $G$, leading to a lower variance.\n",
         "\n",
-        "In addition, without the critic, the algorithm would try to increase probabilities for actions taken on a particular state based on expected return, which may not make much of a difference if the relative probabilities between actions remain the same.\n",
+        "In addition, without the Critic, the algorithm would try to increase probabilities for actions taken on a particular state based on expected return, which may not make much of a difference if the relative probabilities between actions remain the same.\n",
         "\n",
-        "For instance, suppose that two actions for a given state would yield the same expected return. Without the critic, the algorithm would try to raise the probability of these actions based on the objective $J$. With the critic, it may turn out that there's no advantage ($G - V = 0$) and thus no benefit gained in increasing the actions' probabilities and the algorithm would set the gradients to zero.\n",
+        "For instance, suppose that two actions for a given state would yield the same expected return. Without the Critic, the algorithm would try to raise the probability of these actions based on the objective $J$. With the Critic, it may turn out that there's no Advantage ($G - V = 0$), and thus no benefit gained in increasing the actions' probabilities and the algorithm would set the gradients to zero.\n",
         "\n",
-        "<br>\n",
-        "\n",
-        "#### Critic loss\n",
+        "<br>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "1hrPLrgGxlvb"
+      },
+      "source": [
+        "#### The Critic loss\n",
         "\n",
         "Training $V$ to be as close possible to $G$ can be set up as a regression problem with the following loss function:\n",
         "\n",
@@ -490,10 +505,10 @@
         "huber_loss = tf.keras.losses.Huber(reduction=tf.keras.losses.Reduction.SUM)\n",
         "\n",
         "def compute_loss(\n",
-        "    action_probs: tf.Tensor,  \n",
-        "    values: tf.Tensor,  \n",
+        "    action_probs: tf.Tensor,\n",
+        "    values: tf.Tensor,\n",
         "    returns: tf.Tensor) -> tf.Tensor:\n",
-        "  \"\"\"Computes the combined actor-critic loss.\"\"\"\n",
+        "  \"\"\"Computes the combined Actor-Critic loss.\"\"\"\n",
         "\n",
         "  advantage = returns - values\n",
         "\n",
@@ -511,7 +526,7 @@
         "id": "HSYkQOmRfV75"
       },
       "source": [
-        "### 4. Defining the training step to update parameters\n",
+        "### 4. Define the training step to update parameters\n",
         "\n",
         "All of the steps above are combined into a training step that is run every episode. All steps leading up to the loss function are executed with the `tf.GradientTape` context to enable automatic differentiation.\n",
         "\n",
@@ -535,10 +550,10 @@
         "\n",
         "@tf.function\n",
         "def train_step(\n",
-        "    initial_state: tf.Tensor, \n",
-        "    model: tf.keras.Model, \n",
-        "    optimizer: tf.keras.optimizers.Optimizer, \n",
-        "    gamma: float, \n",
+        "    initial_state: tf.Tensor,\n",
+        "    model: tf.keras.Model,\n",
+        "    optimizer: tf.keras.optimizers.Optimizer,\n",
+        "    gamma: float,\n",
         "    max_steps_per_episode: int) -> tf.Tensor:\n",
         "  \"\"\"Runs a model training step.\"\"\"\n",
         "\n",
@@ -546,16 +561,16 @@
         "\n",
         "    # Run the model for one episode to collect training data\n",
         "    action_probs, values, rewards = run_episode(\n",
-        "        initial_state, model, max_steps_per_episode) \n",
+        "        initial_state, model, max_steps_per_episode)\n",
         "\n",
-        "    # Calculate expected returns\n",
+        "    # Calculate the expected returns\n",
         "    returns = get_expected_return(rewards, gamma)\n",
         "\n",
         "    # Convert training data to appropriate TF tensor shapes\n",
         "    action_probs, values, returns = [\n",
-        "        tf.expand_dims(x, 1) for x in [action_probs, values, returns]] \n",
+        "        tf.expand_dims(x, 1) for x in [action_probs, values, returns]]\n",
         "\n",
-        "    # Calculating loss values to update our network\n",
+        "    # Calculate the loss values to update our network\n",
         "    loss = compute_loss(action_probs, values, returns)\n",
         "\n",
         "  # Compute the gradients from the loss\n",
@@ -579,7 +594,7 @@
         "\n",
         "Training is executed by running the training step until either the success criterion or maximum number of episodes is reached.  \n",
         "\n",
-        "A running record of episode rewards is kept in a queue. Once 100 trials are reached, the oldest reward is removed at the left (tail) end of the queue and the newest one is added at the head (right). A running sum of the rewards is also maintained for computational efficiency. \n",
+        "A running record of episode rewards is kept in a queue. Once 100 trials are reached, the oldest reward is removed at the left (tail) end of the queue and the newest one is added at the head (right). A running sum of the rewards is also maintained for computational efficiency.\n",
         "\n",
         "Depending on your runtime, training can finish in less than a minute."
       ]
@@ -596,37 +611,38 @@
         "\n",
         "min_episodes_criterion = 100\n",
         "max_episodes = 10000\n",
-        "max_steps_per_episode = 1000\n",
+        "max_steps_per_episode = 500\n",
         "\n",
-        "# Cartpole-v0 is considered solved if average reward is >= 195 over 100 \n",
+        "# `CartPole-v1` is considered solved if average reward is >= 475 over 500\n",
         "# consecutive trials\n",
-        "reward_threshold = 195\n",
+        "reward_threshold = 475\n",
         "running_reward = 0\n",
         "\n",
-        "# Discount factor for future rewards\n",
+        "# The discount factor for future rewards\n",
         "gamma = 0.99\n",
         "\n",
-        "# Keep last episodes reward\n",
+        "# Keep the last episodes reward\n",
         "episodes_reward: collections.deque = collections.deque(maxlen=min_episodes_criterion)\n",
         "\n",
-        "with tqdm.trange(max_episodes) as t:\n",
-        "  for i in t:\n",
-        "    initial_state = tf.constant(env.reset(), dtype=tf.float32)\n",
+        "t = tqdm.trange(max_episodes)\n",
+        "for i in t:\n",
+        "    initial_state, info = env.reset()\n",
+        "    initial_state = tf.constant(initial_state, dtype=tf.float32)\n",
         "    episode_reward = int(train_step(\n",
         "        initial_state, model, optimizer, gamma, max_steps_per_episode))\n",
-        "    \n",
+        "\n",
         "    episodes_reward.append(episode_reward)\n",
         "    running_reward = statistics.mean(episodes_reward)\n",
-        "  \n",
-        "    t.set_description(f'Episode {i}')\n",
+        "\n",
+        "\n",
         "    t.set_postfix(\n",
         "        episode_reward=episode_reward, running_reward=running_reward)\n",
-        "  \n",
-        "    # Show average episode reward every 10 episodes\n",
+        "\n",
+        "    # Show the average episode reward every 10 episodes\n",
         "    if i % 10 == 0:\n",
         "      pass # print(f'Episode {i}: average reward: {avg_reward}')\n",
-        "  \n",
-        "    if running_reward > reward_threshold and i >= min_episodes_criterion:  \n",
+        "\n",
+        "    if running_reward > reward_threshold and i >= min_episodes_criterion:\n",
         "        break\n",
         "\n",
         "print(f'\\nSolved at episode {i}: average reward: {running_reward:.2f}!')"
@@ -640,7 +656,7 @@
       "source": [
         "## Visualization\n",
         "\n",
-        "After training, it would be good to visualize how the model performs in the environment. You can run the cells below to generate a GIF animation of one episode run of the model. Note that additional packages need to be installed for OpenAI Gym to render the environment's images correctly in Colab."
+        "After training, it would be good to visualize how the model performs in the environment. You can run the cells below to generate a GIF animation of one episode run of the model. Note that additional packages need to be installed for Gym to render the environment's images correctly in Colab."
       ]
     },
     {
@@ -655,42 +671,37 @@
         "\n",
         "from IPython import display as ipythondisplay\n",
         "from PIL import Image\n",
-        "from pyvirtualdisplay import Display\n",
-        "\n",
-        "\n",
-        "display = Display(visible=0, size=(400, 300))\n",
-        "display.start()\n",
         "\n",
+        "render_env = gym.make(\"CartPole-v1\", render_mode='rgb_array')\n",
         "\n",
-        "def render_episode(env: gym.Env, model: tf.keras.Model, max_steps: int): \n",
-        "  screen = env.render(mode='rgb_array')\n",
-        "  im = Image.fromarray(screen)\n",
+        "def render_episode(env: gym.Env, model: tf.keras.Model, max_steps: int):\n",
+        "  state, info = env.reset()\n",
+        "  state = tf.constant(state, dtype=tf.float32)\n",
+        "  screen = env.render()\n",
+        "  images = [Image.fromarray(screen)]\n",
         "\n",
-        "  images = [im]\n",
-        "  \n",
-        "  state = tf.constant(env.reset(), dtype=tf.float32)\n",
         "  for i in range(1, max_steps + 1):\n",
         "    state = tf.expand_dims(state, 0)\n",
         "    action_probs, _ = model(state)\n",
         "    action = np.argmax(np.squeeze(action_probs))\n",
         "\n",
-        "    state, _, done, _ = env.step(action)\n",
+        "    state, reward, done, truncated, info = env.step(action)\n",
         "    state = tf.constant(state, dtype=tf.float32)\n",
         "\n",
         "    # Render screen every 10 steps\n",
         "    if i % 10 == 0:\n",
-        "      screen = env.render(mode='rgb_array')\n",
+        "      screen = env.render()\n",
         "      images.append(Image.fromarray(screen))\n",
-        "  \n",
+        "\n",
         "    if done:\n",
         "      break\n",
-        "  \n",
+        "\n",
         "  return images\n",
         "\n",
         "\n",
         "# Save GIF image\n",
-        "images = render_episode(env, model, max_steps_per_episode)\n",
-        "image_file = 'cartpole-v0.gif'\n",
+        "images = render_episode(render_env, model, max_steps_per_episode)\n",
+        "image_file = 'cartpole-v1.gif'\n",
         "# loop=0: loop forever, duration=1: play each frame for 1ms\n",
         "images[0].save(\n",
         "    image_file, save_all=True, append_images=images[1:], loop=0, duration=1)"
@@ -716,15 +727,15 @@
       "source": [
         "## Next steps\n",
         "\n",
-        "This tutorial demonstrated how to implement the actor-critic method using Tensorflow.\n",
+        "This tutorial demonstrated how to implement the Actor-Critic method using Tensorflow.\n",
         "\n",
-        "As a next step, you could try training a model on a different environment in OpenAI Gym. \n",
+        "As a next step, you could try training a model on a different environment in Gym.\n",
         "\n",
-        "For additional information regarding actor-critic methods and the Cartpole-v0 problem, you may refer to the following resources:\n",
+        "For additional information regarding Actor-Critic methods and the Cartpole-v0 problem, you may refer to the following resources:\n",
         "\n",
-        "- [Actor Critic Method](https://hal.inria.fr/hal-00840470/document)\n",
-        "- [Actor Critic Lecture (CAL)](https://www.youtube.com/watch?v=EKqxumCuAAY&list=PLkFD6_40KJIwhWJpGazJ9VSj9CFMkb79A&index=7&t=0s)\n",
-        "- [Cartpole learning control problem \\[Barto, et al. 1983\\]](http://www.derongliu.org/adp/adp-cdrom/Barto1983.pdf) \n",
+        "- [The Actor-Critic method](https://hal.inria.fr/hal-00840470/document)\n",
+        "- [The Actor-Critic lecture (CAL)](https://www.youtube.com/watch?v=EKqxumCuAAY&list=PLkFD6_40KJIwhWJpGazJ9VSj9CFMkb79A&index=7&t=0s)\n",
+        "- [Cart Pole learning control problem \\[Barto, et al. 1983\\]](http://www.derongliu.org/adp/adp-cdrom/Barto1983.pdf)\n",
         "\n",
         "For more reinforcement learning examples in TensorFlow, you can check the following resources:\n",
         "- [Reinforcement learning code examples (keras.io)](https://keras.io/examples/rl/)\n",
diff --git a/site/en/tutorials/structured_data/imbalanced_data.ipynb b/site/en/tutorials/structured_data/imbalanced_data.ipynb
index d47c330809a..25b55071817 100644
--- a/site/en/tutorials/structured_data/imbalanced_data.ipynb
+++ b/site/en/tutorials/structured_data/imbalanced_data.ipynb
@@ -68,7 +68,7 @@
         "id": "mthoSGBAOoX-"
       },
       "source": [
-        "This tutorial demonstrates how to classify a highly imbalanced dataset in which the number of examples in one class greatly outnumbers the examples in another. You will work with the [Credit Card Fraud Detection](https://www.kaggle.com/mlg-ulb/creditcardfraud) dataset hosted on Kaggle. The aim is to detect a mere 492 fraudulent transactions from 284,807 transactions in total. You will use [Keras](../../guide/keras/overview.ipynb) to define the model and [class weights](https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/Model) to help the model learn from the imbalanced data. .\n",
+        "This tutorial demonstrates how to classify a highly imbalanced dataset in which the number of examples in one class greatly outnumbers the examples in another. You will work with the [Credit Card Fraud Detection](https://www.kaggle.com/mlg-ulb/creditcardfraud) dataset hosted on Kaggle. The aim is to detect a mere 492 fraudulent transactions from 284,807 transactions in total. You will use [Keras](https://www.tensorflow.org/guide/keras/overview) to define the model and [class weights](https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/Model) to help the model learn from the imbalanced data. .\n",
         "\n",
         "This tutorial contains complete code to:\n",
         "\n",
@@ -76,9 +76,8 @@
         "* Create train, validation, and test sets.\n",
         "* Define and train a model using Keras (including setting class weights).\n",
         "* Evaluate the model using various metrics (including precision and recall).\n",
-        "* Try common techniques for dealing with imbalanced data like:\n",
-        "    * Class weighting \n",
-        "    * Oversampling\n"
+        "* Select a threshold for a probabilistic classifier to get a deterministic classifier.\n",
+        "* Try and compare with class weighted modelling and oversampling."
       ]
     },
     {
@@ -234,7 +233,7 @@
         "\n",
         "# The `Amount` column covers a huge range. Convert to log-space.\n",
         "eps = 0.001 # 0 => 0.1¢\n",
-        "cleaned_df['Log Ammount'] = np.log(cleaned_df.pop('Amount')+eps)"
+        "cleaned_df['Log Amount'] = np.log(cleaned_df.pop('Amount')+eps)"
       ]
     },
     {
@@ -259,10 +258,10 @@
         "train_df, val_df = train_test_split(train_df, test_size=0.2)\n",
         "\n",
         "# Form np arrays of labels and features.\n",
-        "train_labels = np.array(train_df.pop('Class'))\n",
-        "bool_train_labels = train_labels != 0\n",
-        "val_labels = np.array(val_df.pop('Class'))\n",
-        "test_labels = np.array(test_df.pop('Class'))\n",
+        "train_labels = np.array(train_df.pop('Class')).reshape(-1, 1)\n",
+        "bool_train_labels = train_labels[:, 0] != 0\n",
+        "val_labels = np.array(val_df.pop('Class')).reshape(-1, 1)\n",
+        "test_labels = np.array(test_df.pop('Class')).reshape(-1, 1)\n",
         "\n",
         "train_features = np.array(train_df)\n",
         "val_features = np.array(val_df)\n",
@@ -275,10 +274,34 @@
         "id": "8a_Z_kBmr7Oh"
       },
       "source": [
+        "We check whether the distribution of the classes in the three sets is about the same or not."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "96520cffee66"
+      },
+      "outputs": [],
+      "source": [
+        "print(f'Average class probability in training set:   {train_labels.mean():.4f}')\n",
+        "print(f'Average class probability in validation set: {val_labels.mean():.4f}')\n",
+        "print(f'Average class probability in test set:       {test_labels.mean():.4f}')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ueKV4cmcoRnf"
+      },
+      "source": [
+        "Given the small number of positive labels, this seems about right.\n",
+        "\n",
         "Normalize the input features using the sklearn StandardScaler.\n",
         "This will set the mean to 0 and standard deviation to 1.\n",
         "\n",
-        "Note: The `StandardScaler` is only fit using the `train_features` to be sure the model is not peeking at the validation or test sets. "
+        "Note: The `StandardScaler` is only fit using the `train_features` to be sure the model is not peeking at the validation or test sets."
       ]
     },
     {
@@ -328,7 +351,7 @@
         "\n",
         "Next compare the distributions of the positive and negative examples over a few features. Good questions to ask yourself at this point are:\n",
         "\n",
-        "* Do these distributions make sense? \n",
+        "* Do these distributions make sense?\n",
         "    * Yes. You've normalized the input and these are mostly concentrated in the `+/- 2` range.\n",
         "* Can you see the difference between the distributions?\n",
         "    * Yes the positive examples contain a much higher rate of extreme values."
@@ -345,11 +368,11 @@
         "pos_df = pd.DataFrame(train_features[ bool_train_labels], columns=train_df.columns)\n",
         "neg_df = pd.DataFrame(train_features[~bool_train_labels], columns=train_df.columns)\n",
         "\n",
-        "sns.jointplot(pos_df['V5'], pos_df['V6'],\n",
+        "sns.jointplot(x=pos_df['V5'], y=pos_df['V6'],\n",
         "              kind='hex', xlim=(-5,5), ylim=(-5,5))\n",
         "plt.suptitle(\"Positive distribution\")\n",
         "\n",
-        "sns.jointplot(neg_df['V5'], neg_df['V6'],\n",
+        "sns.jointplot(x=neg_df['V5'], y=neg_df['V6'],\n",
         "              kind='hex', xlim=(-5,5), ylim=(-5,5))\n",
         "_ = plt.suptitle(\"Negative distribution\")"
       ]
@@ -362,7 +385,7 @@
       "source": [
         "## Define the model and metrics\n",
         "\n",
-        "Define a function that creates a simple neural network with a densly connected hidden layer, a [dropout](https://developers.google.com/machine-learning/glossary/#dropout_regularization) layer to reduce overfitting, and an output sigmoid layer that returns the probability of a transaction being fraudulent: "
+        "Define a function that creates a simple neural network with a densly connected hidden layer, a [dropout](https://developers.google.com/machine-learning/glossary/#dropout_regularization) layer to reduce overfitting, and an output sigmoid layer that returns the probability of a transaction being fraudulent:"
       ]
     },
     {
@@ -374,10 +397,12 @@
       "outputs": [],
       "source": [
         "METRICS = [\n",
+        "      keras.metrics.BinaryCrossentropy(name='cross entropy'),  # same as model's loss\n",
+        "      keras.metrics.MeanSquaredError(name='Brier score'),\n",
         "      keras.metrics.TruePositives(name='tp'),\n",
         "      keras.metrics.FalsePositives(name='fp'),\n",
         "      keras.metrics.TrueNegatives(name='tn'),\n",
-        "      keras.metrics.FalseNegatives(name='fn'), \n",
+        "      keras.metrics.FalseNegatives(name='fn'),\n",
         "      keras.metrics.BinaryAccuracy(name='accuracy'),\n",
         "      keras.metrics.Precision(name='precision'),\n",
         "      keras.metrics.Recall(name='recall'),\n",
@@ -414,8 +439,22 @@
         "### Understanding useful metrics\n",
         "\n",
         "Notice that there are a few metrics defined above that can be computed by the model that will be helpful when evaluating the performance.\n",
+        "These can be divided into three groups.\n",
+        "\n",
+        "#### Metrics for probability predictions\n",
         "\n",
+        "As we train our network with the cross entropy as a loss function, it is fully capable of predicting class probabilities, i.e., it is a probabilistic classifier.\n",
+        "Good metrics to assess probabilistic predictions are, in fact, **proper scoring rules**. Their key property is that predicting the true probability is optimal. We give two well-known examples:\n",
         "\n",
+        "*   **cross entropy** also known as log loss\n",
+        "*   **Mean squared error** also known as the Brier score\n",
+        "\n",
+        "#### Metrics for deterministic 0/1 predictions\n",
+        "\n",
+        "In the end, one often wants to predict a class label, 0 or 1, *no fraud* or *fraud*.\n",
+        "This is called a deterministic classifier.\n",
+        "To get a label prediction from our probabilistic classifier, one needs to choose a probability threshold $t$.\n",
+        "The default is to predict label 1 (fraud) if the predicted probability is larger than $t=50\\%$ and all the following metrics implicitly use this default.\n",
         "\n",
         "*   **False** negatives and **false** positives are samples that were **incorrectly** classified\n",
         "*   **True** negatives and **true** positives are samples that were **correctly** classified\n",
@@ -425,14 +464,21 @@
         ">   $\\frac{\\text{true positives}}{\\text{true positives + false positives}}$\n",
         "*   **Recall** is the percentage of **actual** positives that were correctly classified\n",
         ">   $\\frac{\\text{true positives}}{\\text{true positives + false negatives}}$\n",
+        "\n",
+        "**Note:** Accuracy is not a helpful metric for this task. You can have 99.8%+ accuracy on this task by predicting False all the time.  \n",
+        "\n",
+        "#### Other metrices\n",
+        "\n",
+        "The following metrics take into account all possible choices of thresholds $t$.\n",
+        "\n",
         "*   **AUC** refers to the Area Under the Curve of a Receiver Operating Characteristic curve (ROC-AUC). This metric is equal to the probability that a classifier will rank a random positive sample higher than a random negative sample.\n",
-        "*   **AUPRC** refers to Area Under the Curve of the Precision-Recall Curve. This metric computes precision-recall pairs for different probability thresholds. \n",
+        "*   **AUPRC** refers to Area Under the Curve of the Precision-Recall Curve. This metric computes precision-recall pairs for different probability thresholds.\n",
         "\n",
-        "Note: Accuracy is not a helpful metric for this task. You can 99.8%+ accuracy on this task by predicting False all the time.  \n",
         "\n",
-        "Read more:\n",
-        "*  [True vs. False and Positive vs. Negative](https://developers.google.com/machine-learning/crash-course/classification/true-false-positive-negative)\n",
-        "*  [Accuracy](https://developers.google.com/machine-learning/crash-course/classification/accuracy)\n",
+        "#### Read more:\n",
+        "*   [Strictly Proper Scoring Rules, Prediction, and Estimation](https://www.stat.washington.edu/people/raftery/Research/PDF/Gneiting2007jasa.pdf)\n",
+        "*   [True vs. False and Positive vs. Negative](https://developers.google.com/machine-learning/crash-course/classification/true-false-positive-negative)\n",
+        "*   [Accuracy](https://developers.google.com/machine-learning/crash-course/classification/accuracy)\n",
         "*   [Precision and Recall](https://developers.google.com/machine-learning/crash-course/classification/precision-and-recall)\n",
         "*   [ROC-AUC](https://developers.google.com/machine-learning/crash-course/classification/roc-and-auc)\n",
         "*   [Relationship between Precision-Recall and ROC Curves](https://www.biostat.wisc.edu/~page/rocpr.pdf)"
@@ -458,7 +504,7 @@
         "Now create and train your model using the function that was defined earlier. Notice that the model is fit using a larger than default batch size of 2048, this is important to ensure that each batch has a decent chance of containing a few positive samples. If the batch size was too small, they would likely have no fraudulent transactions to learn from.\n",
         "\n",
         "\n",
-        "Note: this model will not handle the class imbalance well. You will improve it later in this tutorial."
+        "Note: Fitting this model will not handle the class imbalance efficiently. You will improve it later in this tutorial."
       ]
     },
     {
@@ -472,8 +518,9 @@
         "EPOCHS = 100\n",
         "BATCH_SIZE = 2048\n",
         "\n",
-        "early_stopping = tf.keras.callbacks.EarlyStopping(\n",
-        "    monitor='val_prc', \n",
+        "def early_stopping():\n",
+        " return tf.keras.callbacks.EarlyStopping(\n",
+        "    monitor='val_prc',\n",
         "    verbose=1,\n",
         "    patience=10,\n",
         "    mode='max',\n",
@@ -527,7 +574,7 @@
         "id": "qk_3Ry6EoYDq"
       },
       "source": [
-        "These initial guesses are not great. You know the dataset is imbalanced. Set the output layer's bias to reflect that (See: [A Recipe for Training Neural Networks: \"init well\"](http://karpathy.github.io/2019/04/25/recipe/#2-set-up-the-end-to-end-trainingevaluation-skeleton--get-dumb-baselines)). This can help with initial convergence."
+        "These initial guesses are not great. You know the dataset is imbalanced. Set the output layer's bias to reflect that, see [A Recipe for Training Neural Networks: \"init well\"](http://karpathy.github.io/2019/04/25/recipe/#2-set-up-the-end-to-end-trainingevaluation-skeleton--get-dumb-baselines). This can help with initial convergence."
       ]
     },
     {
@@ -536,7 +583,7 @@
         "id": "PdbfWDuVpo6k"
       },
       "source": [
-        "With the default bias initialization the loss should be about `math.log(2) = 0.69314` "
+        "With the default bias initialization the loss should be about `math.log(2) = 0.69314`"
       ]
     },
     {
@@ -582,7 +629,7 @@
         "id": "d1juXI9yY1KD"
       },
       "source": [
-        "Set that as the initial bias, and the model will give much more reasonable initial guesses. \n",
+        "Set that as the initial bias, and the model will give much more reasonable initial guesses.\n",
         "\n",
         "It should be near: `pos/total = 0.0018`"
       ]
@@ -628,9 +675,9 @@
         "id": "FrDC8hvNr9yw"
       },
       "source": [
-        "This initial loss is about 50 times less than if would have been with naive initialization.\n",
+        "This initial loss is about 50 times less than it would have been with naive initialization.\n",
         "\n",
-        "This way the model doesn't need to spend the first few epochs just learning that positive examples are unlikely. This also makes it easier to read plots of the loss during training."
+        "This way the model doesn't need to spend the first few epochs just learning that positive examples are unlikely. It also makes it easier to read plots of the loss during training."
       ]
     },
     {
@@ -652,7 +699,7 @@
       },
       "outputs": [],
       "source": [
-        "initial_weights = os.path.join(tempfile.mkdtemp(), 'initial_weights')\n",
+        "initial_weights = os.path.join(tempfile.mkdtemp(), 'initial.weights.h5')\n",
         "model.save_weights(initial_weights)"
       ]
     },
@@ -666,7 +713,7 @@
         "\n",
         "Before moving on, confirm quick that the careful bias initialization actually helped.\n",
         "\n",
-        "Train the model for 20 epochs, with and without this careful initialization, and compare the losses: "
+        "Train the model for 20 epochs, with and without this careful initialization, and compare the losses:"
       ]
     },
     {
@@ -685,7 +732,7 @@
         "    train_labels,\n",
         "    batch_size=BATCH_SIZE,\n",
         "    epochs=20,\n",
-        "    validation_data=(val_features, val_labels), \n",
+        "    validation_data=(val_features, val_labels),\n",
         "    verbose=0)"
       ]
     },
@@ -704,7 +751,7 @@
         "    train_labels,\n",
         "    batch_size=BATCH_SIZE,\n",
         "    epochs=20,\n",
-        "    validation_data=(val_features, val_labels), \n",
+        "    validation_data=(val_features, val_labels),\n",
         "    verbose=0)"
       ]
     },
@@ -724,7 +771,8 @@
         "               color=colors[n], label='Val ' + label,\n",
         "               linestyle=\"--\")\n",
         "  plt.xlabel('Epoch')\n",
-        "  plt.ylabel('Loss')"
+        "  plt.ylabel('Loss')\n",
+        "  plt.legend()"
       ]
     },
     {
@@ -745,7 +793,7 @@
         "id": "fKMioV0ddG3R"
       },
       "source": [
-        "The above figure makes it clear: In terms of validation loss, on this problem, this careful initialization gives a clear advantage. "
+        "The above figure makes it clear: In terms of validation loss, on this problem, this careful initialization gives a clear advantage."
       ]
     },
     {
@@ -772,7 +820,7 @@
         "    train_labels,\n",
         "    batch_size=BATCH_SIZE,\n",
         "    epochs=EPOCHS,\n",
-        "    callbacks=[early_stopping],\n",
+        "    callbacks=[early_stopping()],\n",
         "    validation_data=(val_features, val_labels))"
       ]
     },
@@ -868,11 +916,11 @@
       },
       "outputs": [],
       "source": [
-        "def plot_cm(labels, predictions, p=0.5):\n",
-        "  cm = confusion_matrix(labels, predictions > p)\n",
+        "def plot_cm(labels, predictions, threshold=0.5):\n",
+        "  cm = confusion_matrix(labels, predictions > threshold)\n",
         "  plt.figure(figsize=(5,5))\n",
         "  sns.heatmap(cm, annot=True, fmt=\"d\")\n",
-        "  plt.title('Confusion matrix @{:.2f}'.format(p))\n",
+        "  plt.title('Confusion matrix @{:.2f}'.format(threshold))\n",
         "  plt.ylabel('Actual label')\n",
         "  plt.xlabel('Predicted label')\n",
         "\n",
@@ -915,7 +963,7 @@
         "id": "PyZtSr1v6L4t"
       },
       "source": [
-        "If the model had predicted everything perfectly, this would be a [diagonal matrix](https://en.wikipedia.org/wiki/Diagonal_matrix) where values off the main diagonal, indicating incorrect predictions, would be zero. In this case the matrix shows that you have relatively few false positives, meaning that there were relatively few legitimate transactions that were incorrectly flagged. However, you would likely want to have even fewer false negatives despite the cost of increasing the number of false positives. This trade off may be preferable because false negatives would allow fraudulent transactions to go through, whereas false positives may cause an email to be sent to a customer to ask them to verify their card activity."
+        "If the model had predicted everything perfectly (impossible with true randomness), this would be a [diagonal matrix](https://en.wikipedia.org/wiki/Diagonal_matrix) where values off the main diagonal, indicating incorrect predictions, would be zero. In this case, the matrix shows that you have relatively few false positives, meaning that there were relatively few legitimate transactions that were incorrectly flagged."
       ]
     },
     {
@@ -923,10 +971,38 @@
       "metadata": {
         "id": "P-QpQsip_F2Q"
       },
+      "source": [
+        "### Changing the threshold\n",
+        "\n",
+        "The default threshold of $t=50\\%$ corresponds to equal costs of false negatives and false positives.\n",
+        "In the case of fraud detection, however, you would likely associate higher costs to false negatives than to false positives.\n",
+        "This trade off may be preferable because false negatives would allow fraudulent transactions to go through, whereas false positives may cause an email to be sent to a customer to ask them to verify their card activity.\n",
+        "\n",
+        "By decreasing the threshold, we attribute higher cost to false negatives, thereby increasing missed transactions at the price of more false positives.\n",
+        "We test thresholds at 10% and at 1%."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "52bd793e04bb"
+      },
+      "outputs": [],
+      "source": [
+        "plot_cm(test_labels, test_predictions_baseline, threshold=0.1)\n",
+        "plot_cm(test_labels, test_predictions_baseline, threshold=0.01)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kF8k-g9goRni"
+      },
       "source": [
         "### Plot the ROC\n",
         "\n",
-        "Now plot the [ROC](https://developers.google.com/machine-learning/glossary#ROC). This plot is useful because it shows, at a glance, the range of performance the model can reach just by tuning the output threshold."
+        "Now plot the [ROC](https://developers.google.com/machine-learning/glossary#ROC). This plot is useful because it shows, at a glance, the range of performance the model can reach by tuning the output threshold over its full range (0 to 1). So each point corresponds to a single value of the threshold."
       ]
     },
     {
@@ -960,7 +1036,7 @@
       "source": [
         "plot_roc(\"Train Baseline\", train_labels, train_predictions_baseline, color=colors[0])\n",
         "plot_roc(\"Test Baseline\", test_labels, test_predictions_baseline, color=colors[0], linestyle='--')\n",
-        "plt.legend(loc='lower right')"
+        "plt.legend(loc='lower right');"
       ]
     },
     {
@@ -969,9 +1045,9 @@
         "id": "Y5twGRLfNwmO"
       },
       "source": [
-        "### Plot the AUPRC\r\n",
+        "### Plot the PRC\n",
         "\n",
-        "Now plot the [AUPRC](https://developers.google.com/machine-learning/glossary?hl=en#PR_AUC). Area under the interpolated precision-recall curve, obtained by plotting (recall, precision) points for different values of the classification threshold. Depending on how it's calculated, PR AUC may be equivalent to the average precision of the model.\r\n"
+        "Now plot the [AUPRC](https://developers.google.com/machine-learning/glossary?hl=en#PR_AUC). Area under the interpolated precision-recall curve, obtained by plotting (recall, precision) points for different values of the classification threshold. Depending on how it's calculated, PR AUC may be equivalent to the average precision of the model.\n"
       ]
     },
     {
@@ -982,14 +1058,14 @@
       },
       "outputs": [],
       "source": [
-        "def plot_prc(name, labels, predictions, **kwargs):\r\n",
-        "    precision, recall, _ = sklearn.metrics.precision_recall_curve(labels, predictions)\r\n",
-        "\r\n",
-        "    plt.plot(precision, recall, label=name, linewidth=2, **kwargs)\r\n",
-        "    plt.xlabel('Recall')\r\n",
-        "    plt.ylabel('Precision')\r\n",
-        "    plt.grid(True)\r\n",
-        "    ax = plt.gca()\r\n",
+        "def plot_prc(name, labels, predictions, **kwargs):\n",
+        "    precision, recall, _ = sklearn.metrics.precision_recall_curve(labels, predictions)\n",
+        "\n",
+        "    plt.plot(precision, recall, label=name, linewidth=2, **kwargs)\n",
+        "    plt.xlabel('Precision')\n",
+        "    plt.ylabel('Recall')\n",
+        "    plt.grid(True)\n",
+        "    ax = plt.gca()\n",
         "    ax.set_aspect('equal')"
       ]
     },
@@ -1001,9 +1077,9 @@
       },
       "outputs": [],
       "source": [
-        "plot_prc(\"Train Baseline\", train_labels, train_predictions_baseline, color=colors[0])\r\n",
-        "plot_prc(\"Test Baseline\", test_labels, test_predictions_baseline, color=colors[0], linestyle='--')\r\n",
-        "plt.legend(loc='lower right')"
+        "plot_prc(\"Train Baseline\", train_labels, train_predictions_baseline, color=colors[0])\n",
+        "plot_prc(\"Test Baseline\", test_labels, test_predictions_baseline, color=colors[0], linestyle='--')\n",
+        "plt.legend(loc='lower right');"
       ]
     },
     {
@@ -1032,7 +1108,7 @@
       "source": [
         "### Calculate class weights\n",
         "\n",
-        "The goal is to identify fraudulent transactions, but you don't have very many of those positive samples to work with, so you would want to have the classifier heavily weight the few examples that are available. You can do this by passing Keras weights for each class through a parameter. These will cause the model to \"pay more attention\" to examples from an under-represented class."
+        "The goal is to identify fraudulent transactions, but you don't have very many of those positive samples to work with, so you would want to have the classifier heavily weight the few examples that are available. You can do this by passing Keras weights for each class through a parameter. These will cause the model to \"pay more attention\" to examples from an under-represented class. Note, however, that this does not increase in any way the amount of information of your dataset. In the end, using class weights is more or less equivalent to changing the output bias or to changing the threshold. Let's see how it works out."
       ]
     },
     {
@@ -1083,10 +1159,10 @@
         "    train_labels,\n",
         "    batch_size=BATCH_SIZE,\n",
         "    epochs=EPOCHS,\n",
-        "    callbacks=[early_stopping],\n",
+        "    callbacks=[early_stopping()],\n",
         "    validation_data=(val_features, val_labels),\n",
         "    # The class weights go here\n",
-        "    class_weight=class_weight) "
+        "    class_weight=class_weight)"
       ]
     },
     {
@@ -1153,7 +1229,9 @@
         "id": "PTh1rtDn8r4-"
       },
       "source": [
-        "Here you can see that with class weights the accuracy and precision are lower because there are more false positives, but conversely the recall and AUC are higher because the model also found more true positives. Despite having lower accuracy, this model has higher recall (and identifies more fraudulent transactions). Of course, there is a cost to both types of error (you wouldn't want to bug users by flagging too many legitimate transactions as fraudulent, either). Carefully consider the trade-offs between these different types of errors for your application."
+        "Here you can see that with class weights the accuracy and precision are lower because there are more false positives, but conversely the recall and AUC are higher because the model also found more true positives. Despite having lower accuracy, this model has higher recall (and identifies more fraudulent transactions than the baseline model at threshold 50%). Of course, there is a cost to both types of error (you wouldn't want to bug users by flagging too many legitimate transactions as fraudulent, either). Carefully consider the trade-offs between these different types of errors for your application.\n",
+        "\n",
+        "Compared to the baseline model with changed threshold, the class weighted model is clearly inferior. The superiority of the baseline model is further confirmed by the lower test loss value (cross entropy and mean squared error) and additionally can be seen by plotting the ROC curves of both models together."
       ]
     },
     {
@@ -1180,7 +1258,7 @@
         "plot_roc(\"Test Weighted\", test_labels, test_predictions_weighted, color=colors[1], linestyle='--')\n",
         "\n",
         "\n",
-        "plt.legend(loc='lower right')"
+        "plt.legend(loc='lower right');"
       ]
     },
     {
@@ -1189,7 +1267,7 @@
         "id": "_0krS8g1OTbD"
       },
       "source": [
-        "### Plot the AUPRC"
+        "### Plot the PRC"
       ]
     },
     {
@@ -1200,14 +1278,14 @@
       },
       "outputs": [],
       "source": [
-        "plot_prc(\"Train Baseline\", train_labels, train_predictions_baseline, color=colors[0])\r\n",
-        "plot_prc(\"Test Baseline\", test_labels, test_predictions_baseline, color=colors[0], linestyle='--')\r\n",
-        "\r\n",
-        "plot_prc(\"Train Weighted\", train_labels, train_predictions_weighted, color=colors[1])\r\n",
-        "plot_prc(\"Test Weighted\", test_labels, test_predictions_weighted, color=colors[1], linestyle='--')\r\n",
-        "\r\n",
-        "\r\n",
-        "plt.legend(loc='lower right')"
+        "plot_prc(\"Train Baseline\", train_labels, train_predictions_baseline, color=colors[0])\n",
+        "plot_prc(\"Test Baseline\", test_labels, test_predictions_baseline, color=colors[0], linestyle='--')\n",
+        "\n",
+        "plot_prc(\"Train Weighted\", train_labels, train_predictions_weighted, color=colors[1])\n",
+        "plot_prc(\"Test Weighted\", test_labels, test_predictions_weighted, color=colors[1], linestyle='--')\n",
+        "\n",
+        "\n",
+        "plt.legend(loc='lower right');"
       ]
     },
     {
@@ -1253,7 +1331,7 @@
       "source": [
         "#### Using NumPy\n",
         "\n",
-        "You can balance the dataset manually by choosing the right number of random \n",
+        "You can balance the dataset manually by choosing the right number of random\n",
         "indices from the positive examples:"
       ]
     },
@@ -1359,7 +1437,7 @@
         "id": "sLEfjZO0-vbN"
       },
       "source": [
-        "Merge the two together using `experimental.sample_from_datasets`:"
+        "Merge the two together using `tf.data.Dataset.sample_from_datasets`:"
       ]
     },
     {
@@ -1370,7 +1448,7 @@
       },
       "outputs": [],
       "source": [
-        "resampled_ds = tf.data.experimental.sample_from_datasets([pos_ds, neg_ds], weights=[0.5, 0.5])\n",
+        "resampled_ds = tf.data.Dataset.sample_from_datasets([pos_ds, neg_ds], weights=[0.5, 0.5])\n",
         "resampled_ds = resampled_ds.batch(BATCH_SIZE).prefetch(2)"
       ]
     },
@@ -1405,7 +1483,7 @@
       },
       "outputs": [],
       "source": [
-        "resampled_steps_per_epoch = np.ceil(2.0*neg/BATCH_SIZE)\n",
+        "resampled_steps_per_epoch = int(np.ceil(2.0*neg/BATCH_SIZE))\n",
         "resampled_steps_per_epoch"
       ]
     },
@@ -1419,7 +1497,7 @@
         "\n",
         "Now try training the model with the resampled data set instead of using class weights to see how these methods compare.\n",
         "\n",
-        "Note: Because the data was balanced by replicating the positive examples, the total dataset size is larger, and each epoch runs for more training steps. "
+        "Note: Because the data was balanced by replicating the positive examples, the total dataset size is larger, and each epoch runs for more training steps."
       ]
     },
     {
@@ -1434,17 +1512,17 @@
         "resampled_model.load_weights(initial_weights)\n",
         "\n",
         "# Reset the bias to zero, since this dataset is balanced.\n",
-        "output_layer = resampled_model.layers[-1] \n",
+        "output_layer = resampled_model.layers[-1]\n",
         "output_layer.bias.assign([0])\n",
         "\n",
         "val_ds = tf.data.Dataset.from_tensor_slices((val_features, val_labels)).cache()\n",
-        "val_ds = val_ds.batch(BATCH_SIZE).prefetch(2) \n",
+        "val_ds = val_ds.batch(BATCH_SIZE).prefetch(2)\n",
         "\n",
         "resampled_history = resampled_model.fit(\n",
         "    resampled_ds,\n",
         "    epochs=EPOCHS,\n",
         "    steps_per_epoch=resampled_steps_per_epoch,\n",
-        "    callbacks=[early_stopping],\n",
+        "    callbacks=[early_stopping()],\n",
         "    validation_data=val_ds)"
       ]
     },
@@ -1456,7 +1534,7 @@
       "source": [
         "If the training process were considering the whole dataset on each gradient update, this oversampling would be basically identical to the class weighting.\n",
         "\n",
-        "But when training the model batch-wise, as you did here, the oversampled data provides a smoother gradient signal: Instead of each positive example being shown in one batch with a large weight, they're shown in many different batches each time with a small weight. \n",
+        "But when training the model batch-wise, as you did here, the oversampled data provides a smoother gradient signal: Instead of each positive example being shown in one batch with a large weight, they're shown in many different batches each time with a small weight.\n",
         "\n",
         "This smoother gradient signal makes it easier to train the model."
       ]
@@ -1469,7 +1547,7 @@
       "source": [
         "### Check training history\n",
         "\n",
-        "Note that the distributions of metrics will be different here, because the training data has a totally different distribution from the validation and test data. "
+        "Note that the distributions of metrics will be different here, because the training data has a totally different distribution from the validation and test data."
       ]
     },
     {
@@ -1498,7 +1576,7 @@
         "id": "KFLxRL8eoDE5"
       },
       "source": [
-        "Because training is easier on the balanced data, the above training procedure may overfit quickly. \n",
+        "Because training is easier on the balanced data, the above training procedure may overfit quickly.\n",
         "\n",
         "So break up the epochs to give the `tf.keras.callbacks.EarlyStopping` finer control over when to stop training."
       ]
@@ -1515,7 +1593,7 @@
         "resampled_model.load_weights(initial_weights)\n",
         "\n",
         "# Reset the bias to zero, since this dataset is balanced.\n",
-        "output_layer = resampled_model.layers[-1] \n",
+        "output_layer = resampled_model.layers[-1]\n",
         "output_layer.bias.assign([0])\n",
         "\n",
         "resampled_history = resampled_model.fit(\n",
@@ -1523,7 +1601,7 @@
         "    # These are not real epochs\n",
         "    steps_per_epoch=20,\n",
         "    epochs=10*EPOCHS,\n",
-        "    callbacks=[early_stopping],\n",
+        "    callbacks=[early_stopping()],\n",
         "    validation_data=(val_ds))"
       ]
     },
@@ -1581,7 +1659,6 @@
         "for name, value in zip(resampled_model.metrics_names, resampled_results):\n",
         "  print(name, ': ', value)\n",
         "print()\n",
-        "\n",
         "plot_cm(test_labels, test_predictions_resampled)"
       ]
     },
@@ -1604,13 +1681,11 @@
       "source": [
         "plot_roc(\"Train Baseline\", train_labels, train_predictions_baseline, color=colors[0])\n",
         "plot_roc(\"Test Baseline\", test_labels, test_predictions_baseline, color=colors[0], linestyle='--')\n",
-        "\n",
         "plot_roc(\"Train Weighted\", train_labels, train_predictions_weighted, color=colors[1])\n",
         "plot_roc(\"Test Weighted\", test_labels, test_predictions_weighted, color=colors[1], linestyle='--')\n",
-        "\n",
         "plot_roc(\"Train Resampled\", train_labels, train_predictions_resampled, color=colors[2])\n",
         "plot_roc(\"Test Resampled\", test_labels, test_predictions_resampled, color=colors[2], linestyle='--')\n",
-        "plt.legend(loc='lower right')"
+        "plt.legend(loc='lower right');"
       ]
     },
     {
@@ -1619,7 +1694,7 @@
         "id": "vayGnv0VOe_v"
       },
       "source": [
-        "### Plot the AUPRC\r\n"
+        "### Plot the AUPRC\n"
       ]
     },
     {
@@ -1630,15 +1705,15 @@
       },
       "outputs": [],
       "source": [
-        "plot_prc(\"Train Baseline\", train_labels, train_predictions_baseline, color=colors[0])\r\n",
-        "plot_prc(\"Test Baseline\", test_labels, test_predictions_baseline, color=colors[0], linestyle='--')\r\n",
-        "\r\n",
-        "plot_prc(\"Train Weighted\", train_labels, train_predictions_weighted, color=colors[1])\r\n",
-        "plot_prc(\"Test Weighted\", test_labels, test_predictions_weighted, color=colors[1], linestyle='--')\r\n",
-        "\r\n",
-        "plot_prc(\"Train Resampled\", train_labels, train_predictions_resampled, color=colors[2])\r\n",
-        "plot_prc(\"Test Resampled\", test_labels, test_predictions_resampled, color=colors[2], linestyle='--')\r\n",
-        "plt.legend(loc='lower right')"
+        "plot_prc(\"Train Baseline\", train_labels, train_predictions_baseline, color=colors[0])\n",
+        "plot_prc(\"Test Baseline\", test_labels, test_predictions_baseline, color=colors[0], linestyle='--')\n",
+        "\n",
+        "plot_prc(\"Train Weighted\", train_labels, train_predictions_weighted, color=colors[1])\n",
+        "plot_prc(\"Test Weighted\", test_labels, test_predictions_weighted, color=colors[1], linestyle='--')\n",
+        "\n",
+        "plot_prc(\"Train Resampled\", train_labels, train_predictions_resampled, color=colors[2])\n",
+        "plot_prc(\"Test Resampled\", test_labels, test_predictions_resampled, color=colors[2], linestyle='--')\n",
+        "plt.legend(loc='lower right');"
       ]
     },
     {
@@ -1655,7 +1730,6 @@
   ],
   "metadata": {
     "colab": {
-      "collapsed_sections": [],
       "name": "imbalanced_data.ipynb",
       "toc_visible": true
     },
diff --git a/site/en/tutorials/structured_data/preprocessing_layers.ipynb b/site/en/tutorials/structured_data/preprocessing_layers.ipynb
index f99c11adccc..d05df3c6d21 100644
--- a/site/en/tutorials/structured_data/preprocessing_layers.ipynb
+++ b/site/en/tutorials/structured_data/preprocessing_layers.ipynb
@@ -37,7 +37,7 @@
         "id": "sMYQvJuBi7MS"
       },
       "source": [
-        "# Classify structured data using Keras Preprocessing Layers"
+        "# Classify structured data using Keras preprocessing layers"
       ]
     },
     {
@@ -74,12 +74,16 @@
         "id": "Nna1tOKxyEqe"
       },
       "source": [
-        "This tutorial demonstrates how to classify structured data (e.g. tabular data in a CSV). You will use [Keras](https://www.tensorflow.org/guide/keras) to define the model, and [preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers) as a bridge to map from columns in a CSV to features used to train the model. This tutorial contains complete code to:\n",
+        "This tutorial demonstrates how to classify structured data, such as tabular data, using a simplified version of the <a href=\"https://www.kaggle.com/c/petfinder-adoption-prediction\" class=\"external\">PetFinder dataset from a Kaggle competition</a> stored in a CSV file.\n",
         "\n",
-        "* Load a CSV file using [Pandas](https://pandas.pydata.org/).\n",
-        "* Build an input pipeline to batch and shuffle the rows using [tf.data](https://www.tensorflow.org/guide/datasets).\n",
-        "* Map from columns in the CSV to features used to train the model using Keras Preprocessing layers.\n",
-        "* Build, train, and evaluate a model using Keras."
+        "You will use [Keras](https://www.tensorflow.org/guide/keras) to define the model, and [Keras preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers) as a bridge to map from columns in a CSV file to features used to train the model. The goal is to predict if a pet will be adopted.\n",
+        "\n",
+        "This tutorial contains complete code for:\n",
+        "\n",
+        "* Loading a CSV file into a <a href=\"https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html\" class=\"external\">DataFrame</a> using <a href=\"https://pandas.pydata.org/\" class=\"external\">pandas</a>.\n",
+        "* Building an input pipeline to batch and shuffle the rows using `tf.data`. (Visit [tf.data: Build TensorFlow input pipelines](../../guide/data.ipynb) for more details.)\n",
+        "* Mapping from columns in the CSV file to features used to train the model with the Keras preprocessing layers.\n",
+        "* Building, training, and evaluating a model using the Keras built-in methods."
       ]
     },
     {
@@ -88,7 +92,7 @@
         "id": "h5xkXCicjFQD"
       },
       "source": [
-        "Note: This tutorial is similar to [Classify structured data with feature columns](https://www.tensorflow.org/tutorials/structured_data/feature_columns). This version uses new experimental Keras [Preprocessing Layers](https://www.tensorflow.org/api_docs/python/tf/keras/layers/experimental/preprocessing) instead of `tf.feature_column`. Keras Preprocessing Layers are more intuitive, and can be easily included inside your model to simplify deployment."
+        "Note: This tutorial is similar to [Classify structured data with feature columns](../structured_data/feature_columns.ipynb). This version uses the [Keras preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers) instead of the `tf.feature_column` API, as the former are more intuitive and can be easily included inside your model to simplify deployment."
       ]
     },
     {
@@ -97,28 +101,28 @@
         "id": "ZHxU1FMNpomc"
       },
       "source": [
-        "## The Dataset\n",
+        "## The PetFinder.my mini dataset\n",
         "\n",
-        "You will use a simplified version of the PetFinder [dataset](https://www.kaggle.com/c/petfinder-adoption-prediction). There are several thousand rows in the CSV. Each row describes a pet, and each column describes an attribute. You will use this information to predict if the pet will be adopted.\n",
+        "There are several thousand rows in the PetFinder.my mini's CSV dataset file, where each row describes a pet (a dog or a cat) and each column describes an attribute, such as age, breed, color, and so on.\n",
         "\n",
-        "Following is a description of this dataset. Notice there are both numeric and categorical columns. There is a free text column which you will not use in this tutorial.\n",
+        "In the dataset's summary below, notice there are mostly numerical and categorical columns. In this tutorial, you will only be dealing with those two feature types, dropping `Description` (a free text feature) and `AdoptionSpeed` (a classification feature) during data preprocessing.\n",
         "\n",
-        "Column | Description| Feature Type | Data Type\n",
-        "------------|--------------------|----------------------|-----------------\n",
-        "Type | Type of animal (Dog, Cat) | Categorical | string\n",
-        "Age |  Age of the pet | Numerical | integer\n",
-        "Breed1 | Primary breed of the pet | Categorical | string\n",
-        "Color1 | Color 1 of pet | Categorical | string\n",
-        "Color2 | Color 2 of pet | Categorical | string\n",
-        "MaturitySize | Size at maturity | Categorical | string\n",
-        "FurLength | Fur length | Categorical | string\n",
-        "Vaccinated | Pet has been vaccinated | Categorical | string\n",
-        "Sterilized | Pet has been sterilized | Categorical | string\n",
-        "Health | Health Condition | Categorical | string\n",
-        "Fee | Adoption Fee | Numerical | integer\n",
-        "Description | Profile write-up for this pet | Text | string\n",
-        "PhotoAmt | Total uploaded photos for this pet | Numerical | integer\n",
-        "AdoptionSpeed | Speed of adoption | Classification | integer"
+        "| Column          | Pet description               | Feature type   | Data type |\n",
+        "| --------------- | ----------------------------- | -------------- | --------- |\n",
+        "| `Type`          | Type of animal (`Dog`, `Cat`) | Categorical    | String    |\n",
+        "| `Age`           | Age                           | Numerical      | Integer   |\n",
+        "| `Breed1`        | Primary breed                 | Categorical    | String    |\n",
+        "| `Color1`        | Color 1                       | Categorical    | String    |\n",
+        "| `Color2`        | Color 2                       | Categorical    | String    |\n",
+        "| `MaturitySize`  | Size at maturity              | Categorical    | String    |\n",
+        "| `FurLength`     | Fur length                    | Categorical    | String    |\n",
+        "| `Vaccinated`    | Pet has been vaccinated       | Categorical    | String    |\n",
+        "| `Sterilized`    | Pet has been sterilized       | Categorical    | String    |\n",
+        "| `Health`        | Health condition              | Categorical    | String    |\n",
+        "| `Fee`           | Adoption fee                  | Numerical      | Integer   |\n",
+        "| `Description`   | Profile write-up              | Text           | String    |\n",
+        "| `PhotoAmt`      | Total uploaded photos         | Numerical      | Integer   |\n",
+        "| `AdoptionSpeed` | Categorical speed of adoption | Classification | Integer   |"
       ]
     },
     {
@@ -130,17 +134,6 @@
         "## Import TensorFlow and other libraries\n"
       ]
     },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "S_BdyQlPjfDW"
-      },
-      "outputs": [],
-      "source": [
-        "!pip install -q sklearn"
-      ]
-    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -153,9 +146,7 @@
         "import pandas as pd\n",
         "import tensorflow as tf\n",
         "\n",
-        "from sklearn.model_selection import train_test_split\n",
-        "from tensorflow.keras import layers\n",
-        "from tensorflow.keras.layers.experimental import preprocessing"
+        "from tensorflow.keras import layers"
       ]
     },
     {
@@ -175,9 +166,9 @@
         "id": "UXvBvobayEqi"
       },
       "source": [
-        "## Use Pandas to create a dataframe\n",
+        "## Load the dataset and read it into a pandas DataFrame\n",
         "\n",
-        "[Pandas](https://pandas.pydata.org/) is a Python library with many helpful utilities for loading and working with structured data. You will use Pandas to download the dataset from a URL, and load it into a dataframe."
+        "<a href=\"https://pandas.pydata.org/\" class=\"external\">pandas</a> is a Python library with many helpful utilities for loading and working with structured data. Use `tf.keras.utils.get_file` to download and extract the CSV file with the PetFinder.my mini dataset, and load it into a <a href=\"https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html\" class=\"external\">DataFrame</a> with <a href=\"https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html\" class=\"external\">`pandas.read_csv`</a>:"
       ]
     },
     {
@@ -188,8 +179,6 @@
       },
       "outputs": [],
       "source": [
-        "import pathlib\n",
-        "\n",
         "dataset_url = 'http://storage.googleapis.com/download.tensorflow.org/data/petfinder-mini.zip'\n",
         "csv_file = 'datasets/petfinder-mini/petfinder-mini.csv'\n",
         "\n",
@@ -198,6 +187,15 @@
         "dataframe = pd.read_csv(csv_file)"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "efa6910dfa5f"
+      },
+      "source": [
+        "Inspect the dataset by checking the first five rows of the DataFrame:"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -215,11 +213,13 @@
         "id": "C3zDbrozyEqq"
       },
       "source": [
-        "## Create target variable\n",
+        "## Create a target variable\n",
+        "\n",
+        "The original task in Kaggle's <a href=\"https://www.kaggle.com/c/petfinder-adoption-prediction\" class=\"external\">PetFinder.my Adoption Prediction competition</a> was to predict the speed at which a pet will be adopted (e.g. in the first week, the first month, the first three months, and so on).\n",
         "\n",
-        "The task in the Kaggle competition is to predict the speed at which a pet will be adopted (e.g., in the first week, the first month, the first three months, and so on). Let's simplify this for our tutorial. Here, you will transform this into a binary classification problem, and simply predict whether the pet was adopted, or not.\n",
+        "In this tutorial, you will simplify the task by transforming it into a binary classification problem, where you simply have to predict whether a pet was adopted or not.\n",
         "\n",
-        "After modifying the label column, 0 will indicate the pet was not adopted, and 1 will indicate it was."
+        "After modifying the `AdoptionSpeed` column, `0` will indicate the pet was not adopted, and `1` will indicate it was."
       ]
     },
     {
@@ -230,10 +230,11 @@
       },
       "outputs": [],
       "source": [
-        "# In the original dataset \"4\" indicates the pet was not adopted.\n",
+        "# In the original dataset, `'AdoptionSpeed'` of `4` indicates\n",
+        "# a pet was not adopted.\n",
         "dataframe['target'] = np.where(dataframe['AdoptionSpeed']==4, 0, 1)\n",
         "\n",
-        "# Drop un-used columns.\n",
+        "# Drop unused features.\n",
         "dataframe = dataframe.drop(columns=['AdoptionSpeed', 'Description'])"
       ]
     },
@@ -243,22 +244,31 @@
         "id": "sp0NCbswyEqs"
       },
       "source": [
-        "## Split the dataframe into train, validation, and test\n",
+        "## Split the DataFrame into training, validation, and test sets\n",
         "\n",
-        "The dataset you downloaded was a single CSV file. You will split this into train, validation, and test sets."
+        "The dataset is in a single pandas DataFrame. Split it into training, validation, and test sets using a, for example, 80:10:10 ratio, respectively:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "XvSinthO8oMj"
+      },
+      "outputs": [],
+      "source": [
+        "train, val, test = np.split(dataframe.sample(frac=1), [int(0.8*len(dataframe)), int(0.9*len(dataframe))])"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "qT6HdyEwyEqt"
+        "id": "U02Q1moWoPwQ"
       },
       "outputs": [],
       "source": [
-        "train, test = train_test_split(dataframe, test_size=0.2)\n",
-        "train, val = train_test_split(train, test_size=0.2)\n",
-        "print(len(train), 'train examples')\n",
+        "print(len(train), 'training examples')\n",
         "print(len(val), 'validation examples')\n",
         "print(len(test), 'test examples')"
       ]
@@ -271,7 +281,9 @@
       "source": [
         "## Create an input pipeline using tf.data\n",
         "\n",
-        "Next, you will wrap the dataframes with [tf.data](https://www.tensorflow.org/guide/datasets), in order to shuffle and batch the data. If you were working with a very large CSV file (so large that it does not fit into memory), you would use tf.data to read it from disk directly. That is not covered in this tutorial."
+        "Next, create a utility function that converts each training, validation, and test set DataFrame into a `tf.data.Dataset`, then shuffles and batches the data.\n",
+        "\n",
+        "Note: If you were working with a very large CSV file (so large that it does not fit into memory), you would use the `tf.data` API to read it from disk directly. That is not covered in this tutorial."
       ]
     },
     {
@@ -282,11 +294,11 @@
       },
       "outputs": [],
       "source": [
-        "# A utility method to create a tf.data dataset from a Pandas Dataframe\n",
         "def df_to_dataset(dataframe, shuffle=True, batch_size=32):\n",
-        "  dataframe = dataframe.copy()\n",
-        "  labels = dataframe.pop('target')\n",
-        "  ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))\n",
+        "  df = dataframe.copy()\n",
+        "  labels = df.pop('target')\n",
+        "  df = {key: value.to_numpy()[:,tf.newaxis] for key, value in dataframe.items()}\n",
+        "  ds = tf.data.Dataset.from_tensor_slices((dict(df), labels))\n",
         "  if shuffle:\n",
         "    ds = ds.shuffle(buffer_size=len(dataframe))\n",
         "  ds = ds.batch(batch_size)\n",
@@ -300,7 +312,7 @@
         "id": "PYxIXH579uS9"
       },
       "source": [
-        "Now that you have created the input pipeline, let's call it to see the format of the data it returns. You have used a small batch size to keep the output readable."
+        "Now, use the newly created function (`df_to_dataset`) to check the format of the data the input pipeline helper function returns by calling it on the training data, and use a small batch size to keep the output readable:"
       ]
     },
     {
@@ -335,7 +347,7 @@
         "id": "geqHWW54Hmte"
       },
       "source": [
-        "You can see that the dataset returns a dictionary of column names (from the dataframe) that map to column values from rows in the dataframe."
+        "As the output demonstrates, the training set returns a dictionary of column names (from the DataFrame) that map to column values from rows."
       ]
     },
     {
@@ -344,16 +356,22 @@
         "id": "-v50jBIuj4gb"
       },
       "source": [
-        "## Demonstrate the use of preprocessing layers.\n",
+        "## Apply the Keras preprocessing layers\n",
+        "\n",
+        "The Keras preprocessing layers allow you to build Keras-native input processing pipelines, which can be used as independent preprocessing code in non-Keras workflows, combined directly with Keras models, and exported as part of a Keras SavedModel.\n",
+        "\n",
+        "In this tutorial, you will use the following four preprocessing layers to demonstrate how to perform preprocessing, structured data encoding, and feature engineering:\n",
         "\n",
-        "The Keras preprocessing layers API allows you to build Keras-native input processing pipelines. You will use 3 preprocessing layers to demonstrate the feature preprocessing code.\n",
+        "- `tf.keras.layers.Normalization`: Performs feature-wise normalization of input features.\n",
+        "- `tf.keras.layers.CategoryEncoding`: Turns integer categorical features into one-hot, multi-hot, or <a href=\"https://en.wikipedia.org/wiki/Tf%E2%80%93idf\" class=\"external\">tf-idf</a>\n",
+        "dense representations.\n",
+        "- `tf.keras.layers.StringLookup`: Turns string categorical values into integer indices.\n",
+        "- `tf.keras.layers.IntegerLookup`: Turns integer categorical values into integer indices.\n",
         "\n",
-        "*   [`Normalization`](https://www.tensorflow.org/api_docs/python/tf/keras/layers/experimental/preprocessing/Normalization) - Feature-wise normalization of the data.\n",
-        "*   [`CategoryEncoding`](https://www.tensorflow.org/api_docs/python/tf/keras/layers/experimental/preprocessing/CategoryEncoding) - Category encoding layer.\n",
-        "*   [`StringLookup`](https://www.tensorflow.org/api_docs/python/tf/keras/layers/experimental/preprocessing/StringLookup) - Maps strings from a vocabulary to integer indices.\n",
-        "*   [`IntegerLookup`](https://www.tensorflow.org/api_docs/python/tf/keras/layers/experimental/preprocessing/IntegerLookup) - Maps integers from a vocabulary to integer indices.\n",
+        "You can learn more about the available layers in the [Working with preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers) guide.\n",
         "\n",
-        "You can find a list of available preprocessing layers [here](https://www.tensorflow.org/api_docs/python/tf/keras/layers/experimental/preprocessing)."
+        "- For _numerical features_ of the PetFinder.my mini dataset, you will use a `tf.keras.layers.Normalization` layer to standardize the distribution of the data.\n",
+        "- For _categorical features_, such as pet `Type`s (`Dog` and `Cat` strings), you will transform them to multi-hot encoded tensors with `tf.keras.layers.CategoryEncoding`."
       ]
     },
     {
@@ -362,17 +380,11 @@
         "id": "twXBSxnT66o8"
       },
       "source": [
-        "### Numeric columns\n",
-        "For each of the Numeric feature, you will use a Normalization() layer to make sure the mean of each feature is 0 and its standard deviation is 1."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "OosUh4kTsK_q"
-      },
-      "source": [
-        "`get_normalization_layer` function returns a layer which applies featurewise normalization to numerical features."
+        "### Numerical columns\n",
+        "\n",
+        "For each numeric feature in the PetFinder.my mini dataset, you will use a `tf.keras.layers.Normalization` layer to standardize the distribution of the data.\n",
+        "\n",
+        "Define a new utility function that returns a layer which applies feature-wise normalization to numerical features using that Keras preprocessing layer:"
       ]
     },
     {
@@ -384,10 +396,10 @@
       "outputs": [],
       "source": [
         "def get_normalization_layer(name, dataset):\n",
-        "  # Create a Normalization layer for our feature.\n",
-        "  normalizer = preprocessing.Normalization(axis=None)\n",
+        "  # Create a Normalization layer for the feature.\n",
+        "  normalizer = layers.Normalization(axis=None)\n",
         "\n",
-        "  # Prepare a Dataset that only yields our feature.\n",
+        "  # Prepare a Dataset that only yields the feature.\n",
         "  feature_ds = dataset.map(lambda x, y: x[name])\n",
         "\n",
         "  # Learn the statistics of the data.\n",
@@ -396,6 +408,15 @@
         "  return normalizer"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "lL4TRreQCPjV"
+      },
+      "source": [
+        "Next, test the new function by calling it on the total uploaded pet photo features to normalize `'PhotoAmt'`:"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -415,7 +436,7 @@
         "id": "foWY00YBUx9N"
       },
       "source": [
-        "Note: If you many numeric features (hundreds, or more), it is more efficient to concatenate them first and use a single [normalization](https://www.tensorflow.org/api_docs/python/tf/keras/layers/experimental/preprocessing/Normalization) layer."
+        "Note: If you have many numeric features (hundreds, or more), it is more efficient to concatenate them first and use a single `tf.keras.layers.Normalization` layer."
       ]
     },
     {
@@ -425,16 +446,10 @@
       },
       "source": [
         "### Categorical columns\n",
-        "In this dataset, Type is represented as a string (e.g. 'Dog', or 'Cat'). You cannot feed strings directly to a model. The preprocessing layer takes care of representing strings as a one-hot vector."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "LWlkOPwMsxdv"
-      },
-      "source": [
-        "`get_category_encoding_layer` function returns a layer which maps values from a vocabulary to integer indices and one-hot encodes the features."
+        "\n",
+        "Pet `Type`s in the dataset are represented as strings—`Dog`s and `Cat`s—which need to be multi-hot encoded before being fed into the model. The `Age` feature\n",
+        "\n",
+        "Define another new utility function that returns a layer which maps values from a vocabulary to integer indices and multi-hot encodes the features using the `tf.keras.layers.StringLookup`, `tf.keras.layers.IntegerLookup`, and `tf.keras.CategoryEncoding` preprocessing layers:"
       ]
     },
     {
@@ -446,26 +461,36 @@
       "outputs": [],
       "source": [
         "def get_category_encoding_layer(name, dataset, dtype, max_tokens=None):\n",
-        "  # Create a StringLookup layer which will turn strings into integer indices\n",
+        "  # Create a layer that turns strings into integer indices.\n",
         "  if dtype == 'string':\n",
-        "    index = preprocessing.StringLookup(max_tokens=max_tokens)\n",
+        "    index = layers.StringLookup(max_tokens=max_tokens)\n",
+        "  # Otherwise, create a layer that turns integer values into integer indices.\n",
         "  else:\n",
-        "    index = preprocessing.IntegerLookup(max_tokens=max_tokens)\n",
+        "    index = layers.IntegerLookup(max_tokens=max_tokens)\n",
         "\n",
-        "  # Prepare a Dataset that only yields our feature\n",
+        "  # Prepare a `tf.data.Dataset` that only yields the feature.\n",
         "  feature_ds = dataset.map(lambda x, y: x[name])\n",
         "\n",
         "  # Learn the set of possible values and assign them a fixed integer index.\n",
         "  index.adapt(feature_ds)\n",
         "\n",
-        "  # Create a Discretization for our integer indices.\n",
-        "  encoder = preprocessing.CategoryEncoding(num_tokens=index.vocabulary_size())\n",
+        "  # Encode the integer indices.\n",
+        "  encoder = layers.CategoryEncoding(num_tokens=index.vocabulary_size())\n",
         "\n",
-        "  # Apply one-hot encoding to our indices. The lambda function captures the\n",
-        "  # layer so we can use them, or include them in the functional model later.\n",
+        "  # Apply multi-hot encoding to the indices. The lambda function captures the\n",
+        "  # layer, so you can use them, or include them in the Keras Functional model later.\n",
         "  return lambda feature: encoder(index(feature))"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "7b3DwtTeCPjX"
+      },
+      "source": [
+        "Test the `get_category_encoding_layer` function by calling it on pet `'Type'` features to turn them into multi-hot encoded tensors:"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -474,9 +499,11 @@
       },
       "outputs": [],
       "source": [
-        "type_col = train_features['Type']\n",
-        "layer = get_category_encoding_layer('Type', train_ds, 'string')\n",
-        "layer(type_col)"
+        "test_type_col = train_features['Type']\n",
+        "test_type_layer = get_category_encoding_layer(name='Type',\n",
+        "                                              dataset=train_ds,\n",
+        "                                              dtype='string')\n",
+        "test_type_layer(test_type_col)"
       ]
     },
     {
@@ -485,7 +512,7 @@
         "id": "j6eDongw8knz"
       },
       "source": [
-        "Often, you don't want to feed a number directly into the model, but instead use a one-hot encoding of those inputs. Consider raw data that represents a pet's age."
+        "Repeat the process on the pet `'Age'` features:"
       ]
     },
     {
@@ -496,10 +523,12 @@
       },
       "outputs": [],
       "source": [
-        "type_col = train_features['Age']\n",
-        "category_encoding_layer = get_category_encoding_layer('Age', train_ds,\n",
-        "                                                      'int64', 5)\n",
-        "category_encoding_layer(type_col)"
+        "test_age_col = train_features['Age']\n",
+        "test_age_layer = get_category_encoding_layer(name='Age',\n",
+        "                                             dataset=train_ds,\n",
+        "                                             dtype='int64',\n",
+        "                                             max_tokens=5)\n",
+        "test_age_layer(test_age_col)"
       ]
     },
     {
@@ -508,12 +537,16 @@
         "id": "SiE0glOPkMyh"
       },
       "source": [
-        "## Choose which columns to use\n",
-        "You have seen how to use several types of preprocessing layers. Now you will use them to train a model. You will be using [Keras-functional API](https://www.tensorflow.org/guide/keras/functional) to build the model. The Keras functional API is a way to create models that are more flexible than the [tf.keras.Sequential](https://www.tensorflow.org/api_docs/python/tf/keras/Sequential) API.\n",
+        "## Preprocess selected features to train the model on\n",
+        "\n",
+        "You have learned how to use several types of Keras preprocessing layers. Next, you will:\n",
         "\n",
-        "The goal of this tutorial is to show you the complete code (e.g. mechanics) needed to work with preprocessing layers. A few columns have been selected arbitrarily to train our model.\n",
+        "- Apply the preprocessing utility functions defined earlier on 13 numerical and categorical features from the PetFinder.my mini dataset.\n",
+        "- Add all the feature inputs to a list.\n",
         "\n",
-        "Key point: If your aim is to build an accurate model, try a larger dataset of your own, and think carefully about which features are the most meaningful to include, and how they should be represented."
+        "As mentioned in the beginning, to train the model, you will use the PetFinder.my mini dataset's numerical (`'PhotoAmt'`, `'Fee'`) and categorical (`'Age'`, `'Type'`, `'Color1'`, `'Color2'`, `'Gender'`, `'MaturitySize'`, `'FurLength'`, `'Vaccinated'`, `'Sterilized'`, `'Health'`, `'Breed1'`) features.\n",
+        "\n",
+        "Note: If your aim is to build an accurate model, try a larger dataset of your own, and think carefully about which features are the most meaningful to include, and how they should be represented."
       ]
     },
     {
@@ -522,7 +555,7 @@
         "id": "Uj1GoHSZ9R3H"
       },
       "source": [
-        "Earlier, you used a small batch size to demonstrate the input pipeline. Let's now create a new input pipeline with a larger batch size.\n"
+        "Earlier, you used a small batch size to demonstrate the input pipeline. Let's now create a new input pipeline with a larger batch size of 256:"
       ]
     },
     {
@@ -539,6 +572,15 @@
         "test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5bIGNYN2V7iR"
+      },
+      "source": [
+        "Normalize the numerical features (the number of pet photos and the adoption fee), and add them to one list of inputs called `encoded_features`:"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -547,18 +589,27 @@
       },
       "outputs": [],
       "source": [
-        "all_inputs = []\n",
+        "all_inputs = {}\n",
         "encoded_features = []\n",
         "\n",
-        "# Numeric features.\n",
+        "# Numerical features.\n",
         "for header in ['PhotoAmt', 'Fee']:\n",
         "  numeric_col = tf.keras.Input(shape=(1,), name=header)\n",
         "  normalization_layer = get_normalization_layer(header, train_ds)\n",
         "  encoded_numeric_col = normalization_layer(numeric_col)\n",
-        "  all_inputs.append(numeric_col)\n",
+        "  all_inputs[header] = numeric_col\n",
         "  encoded_features.append(encoded_numeric_col)"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "qVcUAFd6bvlT"
+      },
+      "source": [
+        "Turn the integer categorical values from the dataset (the pet age) into integer indices, perform multi-hot encoding, and add the resulting feature inputs to `encoded_features`:"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -567,15 +618,26 @@
       },
       "outputs": [],
       "source": [
-        "# Categorical features encoded as integers.\n",
         "age_col = tf.keras.Input(shape=(1,), name='Age', dtype='int64')\n",
-        "encoding_layer = get_category_encoding_layer('Age', train_ds, dtype='int64',\n",
+        "\n",
+        "encoding_layer = get_category_encoding_layer(name='Age',\n",
+        "                                             dataset=train_ds,\n",
+        "                                             dtype='int64',\n",
         "                                             max_tokens=5)\n",
         "encoded_age_col = encoding_layer(age_col)\n",
-        "all_inputs.append(age_col)\n",
+        "all_inputs['Age'] = age_col\n",
         "encoded_features.append(encoded_age_col)"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "QYzynk6wdqKe"
+      },
+      "source": [
+        "Repeat the same step for the string categorical values:"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -584,16 +646,18 @@
       },
       "outputs": [],
       "source": [
-        "# Categorical features encoded as string.\n",
         "categorical_cols = ['Type', 'Color1', 'Color2', 'Gender', 'MaturitySize',\n",
         "                    'FurLength', 'Vaccinated', 'Sterilized', 'Health', 'Breed1']\n",
+        "\n",
         "for header in categorical_cols:\n",
         "  categorical_col = tf.keras.Input(shape=(1,), name=header, dtype='string')\n",
-        "  encoding_layer = get_category_encoding_layer(header, train_ds, dtype='string',\n",
+        "  encoding_layer = get_category_encoding_layer(name=header,\n",
+        "                                               dataset=train_ds,\n",
+        "                                               dtype='string',\n",
         "                                               max_tokens=5)\n",
         "  encoded_categorical_col = encoding_layer(categorical_col)\n",
-        "  all_inputs.append(categorical_col)\n",
-        "  encoded_features.append(encoded_categorical_col)\n"
+        "  all_inputs[header] = categorical_col\n",
+        "  encoded_features.append(encoded_categorical_col)"
       ]
     },
     {
@@ -611,7 +675,18 @@
         "id": "IDGyN_wpo0XS"
       },
       "source": [
-        "Now you can create our end-to-end model."
+        "The next step is to create a model using the [Keras Functional API](https://www.tensorflow.org/guide/keras/functional). For the first layer in your model, merge the list of feature inputs—`encoded_features`—into one vector via concatenation with `tf.keras.layers.concatenate`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "EtkwHC-akvcv"
+      },
+      "outputs": [],
+      "source": [
+        "encoded_features"
       ]
     },
     {
@@ -626,10 +701,31 @@
         "x = tf.keras.layers.Dense(32, activation=\"relu\")(all_features)\n",
         "x = tf.keras.layers.Dropout(0.5)(x)\n",
         "output = tf.keras.layers.Dense(1)(x)\n",
-        "model = tf.keras.Model(all_inputs, output)\n",
+        "\n",
+        "model = tf.keras.Model(all_inputs, output)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "NRLDRcYAefTA"
+      },
+      "source": [
+        "Configure the model with Keras `Model.compile`:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "GZDb_lJdelSg"
+      },
+      "outputs": [],
+      "source": [
         "model.compile(optimizer='adam',\n",
         "              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),\n",
-        "              metrics=[\"accuracy\"])"
+        "              metrics=[\"accuracy\"],\n",
+        "              run_eagerly=True)"
       ]
     },
     {
@@ -638,7 +734,7 @@
         "id": "f6mNMfG6yEq5"
       },
       "source": [
-        "Let's visualize our connectivity graph:\n"
+        "Let's visualize the connectivity graph:\n"
       ]
     },
     {
@@ -649,8 +745,8 @@
       },
       "outputs": [],
       "source": [
-        "# rankdir='LR' is used to make the graph horizontal.\n",
-        "tf.keras.utils.plot_model(model, show_shapes=True, rankdir=\"LR\")\n"
+        "# Use `rankdir='LR'` to make the graph horizontal.\n",
+        "tf.keras.utils.plot_model(model, show_shapes=True, show_layer_names=True, rankdir=\"LR\")"
       ]
     },
     {
@@ -659,7 +755,7 @@
         "id": "CED6OStLyEq7"
       },
       "source": [
-        "### Train the model\n"
+        "Next, train and test the model:"
       ]
     },
     {
@@ -681,8 +777,8 @@
       },
       "outputs": [],
       "source": [
-        "loss, accuracy = model.evaluate(test_ds)\n",
-        "print(\"Accuracy\", accuracy)"
+        "result = model.evaluate(test_ds, return_dict=True)\n",
+        "print(result)"
       ]
     },
     {
@@ -691,18 +787,11 @@
         "id": "LmZMnTKaCZda"
       },
       "source": [
-        "## Inference on new data\n",
+        "## Perform inference\n",
         "\n",
-        "Key point: The model you have developed can now classify a row from a CSV file directly, because the preprocessing code is included inside the model itself.\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "4xkOlK8Zweeh"
-      },
-      "source": [
-        "You can now save and reload the Keras model. Follow the tutorial [here](https://www.tensorflow.org/tutorials/keras/save_and_load) for more information on TensorFlow models."
+        "The model you have developed can now classify a row from a CSV file directly after you've included the preprocessing layers inside the model itself.\n",
+        "\n",
+        "You can now [save and reload the Keras model](../keras/save_and_load.ipynb) with `Model.save` and `Model.load_model` before performing inference on new data:"
       ]
     },
     {
@@ -713,8 +802,8 @@
       },
       "outputs": [],
       "source": [
-        "model.save('my_pet_classifier')\n",
-        "reloaded_model = tf.keras.models.load_model('my_pet_classifier')"
+        "model.save('my_pet_classifier.keras')\n",
+        "reloaded_model = tf.keras.models.load_model('my_pet_classifier.keras')"
       ]
     },
     {
@@ -723,10 +812,10 @@
         "id": "D973plJrdwQ9"
       },
       "source": [
-        "To get a prediction for a new sample, you can simply call `model.predict()`. There are just two things you need to do:\n",
+        "To get a prediction for a new sample, you can simply call the Keras `Model.predict` method. There are just two things you need to do:\n",
         "\n",
-        "1.   Wrap scalars into a list so as to have a batch dimension (models only process batches of data, not single samples)\n",
-        "2.   Call `convert_to_tensor` on each feature"
+        "1.   Wrap scalars into a list so as to have a batch dimension (`Model`s only process batches of data, not single samples).\n",
+        "2.   Call `tf.convert_to_tensor` on each feature."
       ]
     },
     {
@@ -769,7 +858,7 @@
         "id": "XJQQZEiH2FaB"
       },
       "source": [
-        "Key point: You will typically see best results with deep learning with larger and more complex datasets. When working with a small dataset like this one, we recommend using a decision tree or random forest as a strong baseline. The goal of this tutorial is to demonstrate the mechanics of working with structured data, so you have code to use as a starting point when working with your own datasets in the future."
+        "Note: You will typically have better results with deep learning with larger and more complex datasets. When working with a small dataset, such as the simplified PetFinder.my one, you can use a <a href=\"https://developers.google.com/machine-learning/glossary#decision-tree\" class=\"external\">decision tree</a> or a <a href=\"https://developers.google.com/machine-learning/glossary#random-forest\" class=\"external\">random forest</a> as a strong baseline. The goal of this tutorial is to demonstrate the mechanics of working with structured data, so you have a starting point when working with your own datasets in the future.\n"
       ]
     },
     {
@@ -779,25 +868,20 @@
       },
       "source": [
         "## Next steps\n",
-        "The best way to learn more about classifying structured data is to try it yourself. You may want to find another dataset to work with, and training a model to classify it using code similar to the above. To improve accuracy, think carefully about which features to include in your model, and how they should be represented."
+        "\n",
+        "To learn more about classifying structured data, try working with other datasets. To improve accuracy during training and testing your models, think carefully about which features to include in your model and how they should be represented.\n",
+        "\n",
+        "Below are some suggestions for datasets:\n",
+        "\n",
+        "- [TensorFlow Datasets: MovieLens](https://www.tensorflow.org/datasets/catalog/movie_lens): A set of movie ratings from a movie recommendation service.\n",
+        "- [TensorFlow Datasets: Wine Quality](https://www.tensorflow.org/datasets/catalog/wine_quality): Two datasets related to red and white variants of the Portuguese \"Vinho Verde\" wine. You can also find the Red Wine Quality dataset on <a href=\"https://www.kaggle.com/uciml/red-wine-quality-cortez-et-al-2009\" class=\"external\">Kaggle</a>.\n",
+        "- <a href=\"https://www.kaggle.com/Cornell-University/arxiv\" class=\"external\">Kaggle: arXiv Dataset</a>: A corpus of 1.7 million scholarly articles from arXiv, covering physics, computer science, math, statistics, electrical engineering, quantitative biology, and economics.\n"
       ]
     }
   ],
   "metadata": {
     "colab": {
-      "collapsed_sections": [],
       "name": "preprocessing_layers.ipynb",
-      "private_outputs": true,
-      "provenance": [
-        {
-          "file_id": "1jw-wbwNYx6Sj5V5iKYvlBZFSQZeS-2ef",
-          "timestamp": 1621892545428
-        },
-        {
-          "file_id": "https://github.com/tensorflow/docs/blob/master/site/en/tutorials/structured_data/preprocessing_layers.ipynb",
-          "timestamp": 1621891489626
-        }
-      ],
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/tutorials/structured_data/time_series.ipynb b/site/en/tutorials/structured_data/time_series.ipynb
index c81dd730f28..31aab384859 100644
--- a/site/en/tutorials/structured_data/time_series.ipynb
+++ b/site/en/tutorials/structured_data/time_series.ipynb
@@ -70,7 +70,7 @@
       "source": [
         "This tutorial is an introduction to time series forecasting using TensorFlow. It builds a few different styles of models including Convolutional and Recurrent Neural Networks (CNNs and RNNs).\n",
         "\n",
-        "This is covered in two main parts, with subsections: \n",
+        "This is covered in two main parts, with subsections:\n",
         "\n",
         "* Forecast for a single time step:\n",
         "  * A single feature.\n",
@@ -452,7 +452,7 @@
         "id": "HiurzTGQgf_D"
       },
       "source": [
-        "This gives the model access to the most important frequency features. In this case you knew ahead of time which frequencies were important. \n",
+        "This gives the model access to the most important frequency features. In this case you knew ahead of time which frequencies were important.\n",
         "\n",
         "If you don't have that information, you can determine which frequencies are important by extracting features with <a href=\"https://en.wikipedia.org/wiki/Fast_Fourier_transform\" class=\"external\">Fast Fourier Transform</a>. To check the assumptions, here is the `tf.signal.rfft` of the temperature over time. Note the obvious peaks at frequencies near `1/year` and `1/day`:\n"
       ]
@@ -590,13 +590,13 @@
       "source": [
         "## Data windowing\n",
         "\n",
-        "The models in this tutorial will make a set of predictions based on a window of consecutive samples from the data. \n",
+        "The models in this tutorial will make a set of predictions based on a window of consecutive samples from the data.\n",
         "\n",
         "The main features of the input windows are:\n",
         "\n",
         "- The width (number of time steps) of the input and label windows.\n",
         "- The time offset between them.\n",
-        "- Which features are used as inputs, labels, or both. \n",
+        "- Which features are used as inputs, labels, or both.\n",
         "\n",
         "This tutorial builds a variety of models (including Linear, DNN, CNN and RNN models), and uses them for both:\n",
         "\n",
@@ -616,11 +616,11 @@
         "\n",
         "1. For example, to make a single prediction 24 hours into the future, given 24 hours of history, you might define a window like this:\n",
         "\n",
-        "  ![One prediction 24 hours into the future.](images/raw_window_24h.png)\n",
+        "  ![One prediction 24 hours into the future.](https://github.com/tensorflow/docs/blob/master/site/en/tutorials/structured_data/images/raw_window_24h.png?raw=1)\n",
         "\n",
         "2. A model that makes a prediction one hour into the future, given six hours of history, would need a window like this:\n",
         "\n",
-        "  ![One prediction one hour into the future.](images/raw_window_1h.png)"
+        "  ![One prediction one hour into the future.](https://github.com/tensorflow/docs/blob/master/site/en/tutorials/structured_data/images/raw_window_1h.png?raw=1)"
       ]
     },
     {
@@ -744,7 +744,7 @@
         "\n",
         "The example `w2` you define earlier will be split like this:\n",
         "\n",
-        "![The initial window is all consecutive samples, this splits it into an (inputs, labels) pairs](images/split_window.png)\n",
+        "![The initial window is all consecutive samples, this splits it into an (inputs, labels) pairs](https://github.com/tensorflow/docs/blob/master/site/en/tutorials/structured_data/images/split_window.png?raw=1)\n",
         "\n",
         "This diagram doesn't show the `features` axis of the data, but this `split_window` function also handles the `label_columns` so it can be used for both the single output and multi-output examples."
       ]
@@ -936,7 +936,7 @@
         "id": "kLO3SFR9Osdf"
       },
       "source": [
-        "Finally, this `make_dataset` method will take a time series DataFrame and convert it to a `tf.data.Dataset` of `(input_window, label_window)` pairs using the `preprocessing.timeseries_dataset_from_array` function:"
+        "Finally, this `make_dataset` method will take a time series DataFrame and convert it to a `tf.data.Dataset` of `(input_window, label_window)` pairs using the `tf.keras.utils.timeseries_dataset_from_array` function:"
       ]
     },
     {
@@ -949,7 +949,7 @@
       "source": [
         "def make_dataset(self, data):\n",
         "  data = np.array(data, dtype=np.float32)\n",
-        "  ds = tf.keras.preprocessing.timeseries_dataset_from_array(\n",
+        "  ds = tf.keras.utils.timeseries_dataset_from_array(\n",
         "      data=data,\n",
         "      targets=None,\n",
         "      sequence_length=self.total_window_size,\n",
@@ -1069,7 +1069,7 @@
         "\n",
         "So, start by building models to predict the `T (degC)` value one hour into the future.\n",
         "\n",
-        "![Predict the next time step](images/narrow_window.png)\n",
+        "![Predict the next time step](https://github.com/tensorflow/docs/blob/master/site/en/tutorials/structured_data/images/narrow_window.png?raw=1)\n",
         "\n",
         "Configure a `WindowGenerator` object to produce these single-step `(input, label)` pairs:"
       ]
@@ -1120,11 +1120,11 @@
         "\n",
         "Before building a trainable model it would be good to have a performance baseline as a point for comparison with the later more complicated models.\n",
         "\n",
-        "This first task is to predict temperature one hour into the future, given the current value of all features. The current values include the current temperature. \n",
+        "This first task is to predict temperature one hour into the future, given the current value of all features. The current values include the current temperature.\n",
         "\n",
         "So, start with a model that just returns the current temperature as the prediction, predicting \"No change\". This is a reasonable baseline since temperature changes slowly. Of course, this baseline will work less well if you make a prediction further in the future.\n",
         "\n",
-        "![Send the input to the output](images/baseline.png)"
+        "![Send the input to the output](https://github.com/tensorflow/docs/blob/master/site/en/tutorials/structured_data/images/baseline.png?raw=1)"
       ]
     },
     {
@@ -1166,13 +1166,13 @@
       "source": [
         "baseline = Baseline(label_index=column_indices['T (degC)'])\n",
         "\n",
-        "baseline.compile(loss=tf.losses.MeanSquaredError(),\n",
-        "                 metrics=[tf.metrics.MeanAbsoluteError()])\n",
+        "baseline.compile(loss=tf.keras.losses.MeanSquaredError(),\n",
+        "                 metrics=[tf.keras.metrics.MeanAbsoluteError()])\n",
         "\n",
         "val_performance = {}\n",
         "performance = {}\n",
-        "val_performance['Baseline'] = baseline.evaluate(single_step_window.val)\n",
-        "performance['Baseline'] = baseline.evaluate(single_step_window.test, verbose=0)"
+        "val_performance['Baseline'] = baseline.evaluate(single_step_window.val, return_dict=True)\n",
+        "performance['Baseline'] = baseline.evaluate(single_step_window.test, verbose=0, return_dict=True)"
       ]
     },
     {
@@ -1211,7 +1211,7 @@
       "source": [
         "This expanded window can be passed directly to the same `baseline` model without any code changes. This is possible because the inputs and labels have the same number of time steps, and the baseline just forwards the input to the output:\n",
         "\n",
-        "![One prediction 1h into the future, ever hour.](images/last_window.png)"
+        "![One prediction 1h into the future, ever hour.](https://github.com/tensorflow/docs/blob/master/site/en/tutorials/structured_data/images/last_window.png?raw=1)"
       ]
     },
     {
@@ -1269,7 +1269,7 @@
         "\n",
         "The simplest **trainable** model you can apply to this task is to insert linear transformation between the input and output. In this case the output from a time step only depends on that step:\n",
         "\n",
-        "![A single step prediction](images/narrow_window.png)\n",
+        "![A single step prediction](https://github.com/tensorflow/docs/blob/master/site/en/tutorials/structured_data/images/narrow_window.png?raw=1)\n",
         "\n",
         "A `tf.keras.layers.Dense` layer with no `activation` set is a linear model. The layer only transforms the last axis of the data from `(batch, time, inputs)` to `(batch, time, units)`; it is applied independently to every item across the `batch` and `time` axes."
       ]
@@ -1323,9 +1323,9 @@
         "                                                    patience=patience,\n",
         "                                                    mode='min')\n",
         "\n",
-        "  model.compile(loss=tf.losses.MeanSquaredError(),\n",
-        "                optimizer=tf.optimizers.Adam(),\n",
-        "                metrics=[tf.metrics.MeanAbsoluteError()])\n",
+        "  model.compile(loss=tf.keras.losses.MeanSquaredError(),\n",
+        "                optimizer=tf.keras.optimizers.Adam(),\n",
+        "                metrics=[tf.keras.metrics.MeanAbsoluteError()])\n",
         "\n",
         "  history = model.fit(window.train, epochs=MAX_EPOCHS,\n",
         "                      validation_data=window.val,\n",
@@ -1352,8 +1352,8 @@
       "source": [
         "history = compile_and_fit(linear, single_step_window)\n",
         "\n",
-        "val_performance['Linear'] = linear.evaluate(single_step_window.val)\n",
-        "performance['Linear'] = linear.evaluate(single_step_window.test, verbose=0)"
+        "val_performance['Linear'] = linear.evaluate(single_step_window.val, return_dict=True)\n",
+        "performance['Linear'] = linear.evaluate(single_step_window.test, verbose=0, return_dict=True)"
       ]
     },
     {
@@ -1364,7 +1364,7 @@
       "source": [
         "Like the `baseline` model, the linear model can be called on batches of wide windows. Used this way the model makes a set of independent predictions on consecutive time steps. The `time` axis acts like another `batch` axis. There are no interactions between the predictions at each time step.\n",
         "\n",
-        "![A single step prediction](images/wide_window.png)"
+        "![A single step prediction](https://github.com/tensorflow/docs/blob/master/site/en/tutorials/structured_data/images/wide_window.png?raw=1)"
       ]
     },
     {
@@ -1376,7 +1376,7 @@
       "outputs": [],
       "source": [
         "print('Input shape:', wide_window.example[0].shape)\n",
-        "print('Output shape:', baseline(wide_window.example[0]).shape)"
+        "print('Output shape:', linear(wide_window.example[0]).shape)"
       ]
     },
     {
@@ -1430,7 +1430,7 @@
         "id": "Ylng7215boIY"
       },
       "source": [
-        "Sometimes the model doesn't even place the most weight on the input `T (degC)`. This is one of the risks of random initialization. "
+        "Sometimes the model doesn't even place the most weight on the input `T (degC)`. This is one of the risks of random initialization."
       ]
     },
     {
@@ -1443,7 +1443,7 @@
         "\n",
         "Before applying models that actually operate on multiple time-steps, it's worth checking the performance of deeper, more powerful, single input step models.\n",
         "\n",
-        "Here's a model similar to the `linear` model, except it stacks several a few `Dense` layers between the input and the output: "
+        "Here's a model similar to the `linear` model, except it stacks several a few `Dense` layers between the input and the output:"
       ]
     },
     {
@@ -1462,8 +1462,8 @@
         "\n",
         "history = compile_and_fit(dense, single_step_window)\n",
         "\n",
-        "val_performance['Dense'] = dense.evaluate(single_step_window.val)\n",
-        "performance['Dense'] = dense.evaluate(single_step_window.test, verbose=0)"
+        "val_performance['Dense'] = dense.evaluate(single_step_window.val, return_dict=True)\n",
+        "performance['Dense'] = dense.evaluate(single_step_window.test, verbose=0, return_dict=True)"
       ]
     },
     {
@@ -1476,7 +1476,7 @@
         "\n",
         "A single-time-step model has no context for the current values of its inputs. It can't see how the input features are changing over time. To address this issue the model needs access to multiple time steps when making predictions:\n",
         "\n",
-        "![Three time steps are used for each prediction.](images/conv_window.png)\n"
+        "![Three time steps are used for each prediction.](https://github.com/tensorflow/docs/blob/master/site/en/tutorials/structured_data/images/conv_window.png?raw=1)\n"
       ]
     },
     {
@@ -1526,7 +1526,7 @@
       "outputs": [],
       "source": [
         "conv_window.plot()\n",
-        "plt.title(\"Given 3 hours of inputs, predict 1 hour into the future.\")"
+        "plt.suptitle(\"Given 3 hours of inputs, predict 1 hour into the future.\")"
       ]
     },
     {
@@ -1581,8 +1581,8 @@
         "history = compile_and_fit(multi_step_dense, conv_window)\n",
         "\n",
         "IPython.display.clear_output()\n",
-        "val_performance['Multi step dense'] = multi_step_dense.evaluate(conv_window.val)\n",
-        "performance['Multi step dense'] = multi_step_dense.evaluate(conv_window.test, verbose=0)"
+        "val_performance['Multi step dense'] = multi_step_dense.evaluate(conv_window.val, return_dict=True)\n",
+        "performance['Multi step dense'] = multi_step_dense.evaluate(conv_window.test, verbose=0, return_dict=True)"
       ]
     },
     {
@@ -1602,7 +1602,7 @@
         "id": "gWfrsP8mq8lV"
       },
       "source": [
-        "The main down-side of this approach is that the resulting model can only be executed on input windows of exactly this shape. "
+        "The main down-side of this approach is that the resulting model can only be executed on input windows of exactly this shape."
       ]
     },
     {
@@ -1636,7 +1636,7 @@
       },
       "source": [
         "### Convolution neural network\n",
-        " \n",
+        "\n",
         "A convolution layer (`tf.keras.layers.Conv1D`) also takes multiple time steps as input to each prediction."
       ]
     },
@@ -1646,7 +1646,7 @@
         "id": "cdLBwoaHmsWb"
       },
       "source": [
-        "Below is the **same** model as `multi_step_dense`, re-written with a convolution. \n",
+        "Below is the **same** model as `multi_step_dense`, re-written with a convolution.\n",
         "\n",
         "Note the changes:\n",
         "* The `tf.keras.layers.Flatten` and the first `tf.keras.layers.Dense` are replaced by a `tf.keras.layers.Conv1D`.\n",
@@ -1712,8 +1712,8 @@
         "history = compile_and_fit(conv_model, conv_window)\n",
         "\n",
         "IPython.display.clear_output()\n",
-        "val_performance['Conv'] = conv_model.evaluate(conv_window.val)\n",
-        "performance['Conv'] = conv_model.evaluate(conv_window.test, verbose=0)"
+        "val_performance['Conv'] = conv_model.evaluate(conv_window.val, return_dict=True)\n",
+        "performance['Conv'] = conv_model.evaluate(conv_window.test, verbose=0, return_dict=True)"
       ]
     },
     {
@@ -1724,7 +1724,7 @@
       "source": [
         "The difference between this `conv_model` and the `multi_step_dense` model is that the `conv_model` can be run on inputs of any length. The convolutional layer is applied to a sliding window of inputs:\n",
         "\n",
-        "![Executing a convolutional model on a sequence](images/wide_conv_window.png)\n",
+        "![Executing a convolutional model on a sequence](https://github.com/tensorflow/docs/blob/master/site/en/tutorials/structured_data/images/wide_conv_window.png?raw=1)\n",
         "\n",
         "If you run it on wider input, it produces wider output:"
       ]
@@ -1749,7 +1749,7 @@
         "id": "h_WGxtLIHhRF"
       },
       "source": [
-        "Note that the output is shorter than the input. To make training or plotting work, you need the labels, and prediction to have the same length. So build a `WindowGenerator` to produce wide windows with a few extra input time steps so the label and prediction lengths match: "
+        "Note that the output is shorter than the input. To make training or plotting work, you need the labels, and prediction to have the same length. So build a `WindowGenerator` to produce wide windows with a few extra input time steps so the label and prediction lengths match:"
       ]
     },
     {
@@ -1828,15 +1828,15 @@
       "source": [
         "An important constructor argument for all Keras RNN layers, such as `tf.keras.layers.LSTM`, is the `return_sequences` argument. This setting can configure the layer in one of two ways:\n",
         "\n",
-        "1. If `False`, the default, the layer only returns the output of the final time step, giving the model time to warm up its internal state before making a single prediction: \n",
+        "1. If `False`, the default, the layer only returns the output of the final time step, giving the model time to warm up its internal state before making a single prediction:\n",
         "\n",
-        "![An LSTM warming up and making a single prediction](images/lstm_1_window.png)\n",
+        "![An LSTM warming up and making a single prediction](https://github.com/tensorflow/docs/blob/master/site/en/tutorials/structured_data/images/lstm_1_window.png?raw=1)\n",
         "\n",
         "2. If `True`, the layer returns an output for each input. This is useful for:\n",
-        "  * Stacking RNN layers. \n",
+        "  * Stacking RNN layers.\n",
         "  * Training a model on multiple time steps simultaneously.\n",
         "\n",
-        "![An LSTM making a prediction after every time step](images/lstm_many_window.png)"
+        "![An LSTM making a prediction after every time step](https://github.com/tensorflow/docs/blob/master/site/en/tutorials/structured_data/images/lstm_many_window.png?raw=1)"
       ]
     },
     {
@@ -1889,8 +1889,8 @@
         "history = compile_and_fit(lstm_model, wide_window)\n",
         "\n",
         "IPython.display.clear_output()\n",
-        "val_performance['LSTM'] = lstm_model.evaluate(wide_window.val)\n",
-        "performance['LSTM'] = lstm_model.evaluate(wide_window.test, verbose=0)"
+        "val_performance['LSTM'] = lstm_model.evaluate(wide_window.val, return_dict=True)\n",
+        "performance['LSTM'] = lstm_model.evaluate(wide_window.test, verbose=0, return_dict=True)"
       ]
     },
     {
@@ -1922,6 +1922,29 @@
         "With this dataset typically each of the models does slightly better than the one before it:"
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dMPev9Nzd4mD"
+      },
+      "outputs": [],
+      "source": [
+        "cm = lstm_model.metrics[1]\n",
+        "cm.metrics"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "6is3g113eIIa"
+      },
+      "outputs": [],
+      "source": [
+        "val_performance"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -1933,9 +1956,8 @@
         "x = np.arange(len(performance))\n",
         "width = 0.3\n",
         "metric_name = 'mean_absolute_error'\n",
-        "metric_index = lstm_model.metrics_names.index('mean_absolute_error')\n",
-        "val_mae = [v[metric_index] for v in val_performance.values()]\n",
-        "test_mae = [v[metric_index] for v in performance.values()]\n",
+        "val_mae = [v[metric_name] for v in val_performance.values()]\n",
+        "test_mae = [v[metric_name] for v in performance.values()]\n",
         "\n",
         "plt.ylabel('mean_absolute_error [T (degC), normalized]')\n",
         "plt.bar(x - 0.17, val_mae, width, label='Validation')\n",
@@ -1954,7 +1976,7 @@
       "outputs": [],
       "source": [
         "for name, value in performance.items():\n",
-        "  print(f'{name:12s}: {value[1]:0.4f}')"
+        "  print(f'{name:12s}: {value[metric_name]:0.4f}')"
       ]
     },
     {
@@ -1979,7 +2001,7 @@
       "outputs": [],
       "source": [
         "single_step_window = WindowGenerator(\n",
-        "    # `WindowGenerator` returns all features as labels if you \n",
+        "    # `WindowGenerator` returns all features as labels if you\n",
         "    # don't set the `label_columns` argument.\n",
         "    input_width=1, label_width=1, shift=1)\n",
         "\n",
@@ -2020,8 +2042,8 @@
       "outputs": [],
       "source": [
         "baseline = Baseline()\n",
-        "baseline.compile(loss=tf.losses.MeanSquaredError(),\n",
-        "                 metrics=[tf.metrics.MeanAbsoluteError()])"
+        "baseline.compile(loss=tf.keras.losses.MeanSquaredError(),\n",
+        "                 metrics=[tf.keras.metrics.MeanAbsoluteError()])"
       ]
     },
     {
@@ -2034,8 +2056,8 @@
       "source": [
         "val_performance = {}\n",
         "performance = {}\n",
-        "val_performance['Baseline'] = baseline.evaluate(wide_window.val)\n",
-        "performance['Baseline'] = baseline.evaluate(wide_window.test, verbose=0)"
+        "val_performance['Baseline'] = baseline.evaluate(wide_window.val, return_dict=True)\n",
+        "performance['Baseline'] = baseline.evaluate(wide_window.test, verbose=0, return_dict=True)"
       ]
     },
     {
@@ -2073,8 +2095,8 @@
         "history = compile_and_fit(dense, single_step_window)\n",
         "\n",
         "IPython.display.clear_output()\n",
-        "val_performance['Dense'] = dense.evaluate(single_step_window.val)\n",
-        "performance['Dense'] = dense.evaluate(single_step_window.test, verbose=0)"
+        "val_performance['Dense'] = dense.evaluate(single_step_window.val, return_dict=True)\n",
+        "performance['Dense'] = dense.evaluate(single_step_window.test, verbose=0, return_dict=True)"
       ]
     },
     {
@@ -2108,8 +2130,8 @@
         "history = compile_and_fit(lstm_model, wide_window)\n",
         "\n",
         "IPython.display.clear_output()\n",
-        "val_performance['LSTM'] = lstm_model.evaluate( wide_window.val)\n",
-        "performance['LSTM'] = lstm_model.evaluate( wide_window.test, verbose=0)\n",
+        "val_performance['LSTM'] = lstm_model.evaluate( wide_window.val, return_dict=True)\n",
+        "performance['LSTM'] = lstm_model.evaluate( wide_window.test, verbose=0, return_dict=True)\n",
         "\n",
         "print()"
       ]
@@ -2132,7 +2154,7 @@
         "\n",
         "That is how you take advantage of the knowledge that the change should be small.\n",
         "\n",
-        "![A model with a residual connection](images/residual.png)\n",
+        "![A model with a residual connection](https://github.com/tensorflow/docs/blob/master/site/en/tutorials/structured_data/images/residual.png?raw=1)\n",
         "\n",
         "Essentially, this initializes the model to match the `Baseline`. For this task it helps models converge faster, with slightly better performance."
       ]
@@ -2143,7 +2165,7 @@
         "id": "yP58A_ORx0kM"
       },
       "source": [
-        "This approach can be used in conjunction with any model discussed in this tutorial. \n",
+        "This approach can be used in conjunction with any model discussed in this tutorial.\n",
         "\n",
         "Here, it is being applied to the LSTM model, note the use of the `tf.initializers.zeros` to ensure that the initial predicted changes are small, and don't overpower the residual connection. There are no symmetry-breaking concerns for the gradients here, since the `zeros` are only used on the last layer."
       ]
@@ -2192,8 +2214,8 @@
         "history = compile_and_fit(residual_lstm, wide_window)\n",
         "\n",
         "IPython.display.clear_output()\n",
-        "val_performance['Residual LSTM'] = residual_lstm.evaluate(wide_window.val)\n",
-        "performance['Residual LSTM'] = residual_lstm.evaluate(wide_window.test, verbose=0)\n",
+        "val_performance['Residual LSTM'] = residual_lstm.evaluate(wide_window.val, return_dict=True)\n",
+        "performance['Residual LSTM'] = residual_lstm.evaluate(wide_window.test, verbose=0, return_dict=True)\n",
         "print()"
       ]
     },
@@ -2227,9 +2249,8 @@
         "width = 0.3\n",
         "\n",
         "metric_name = 'mean_absolute_error'\n",
-        "metric_index = lstm_model.metrics_names.index('mean_absolute_error')\n",
-        "val_mae = [v[metric_index] for v in val_performance.values()]\n",
-        "test_mae = [v[metric_index] for v in performance.values()]\n",
+        "val_mae = [v[metric_name] for v in val_performance.values()]\n",
+        "test_mae = [v[metric_name] for v in performance.values()]\n",
         "\n",
         "plt.bar(x - 0.17, val_mae, width, label='Validation')\n",
         "plt.bar(x + 0.17, test_mae, width, label='Test')\n",
@@ -2248,7 +2269,7 @@
       "outputs": [],
       "source": [
         "for name, value in performance.items():\n",
-        "  print(f'{name:15s}: {value[1]:0.4f}')"
+        "  print(f'{name:15s}: {value[metric_name]:0.4f}')"
       ]
     },
     {
@@ -2327,7 +2348,7 @@
       "source": [
         "A simple baseline for this task is to repeat the last input time step for the required number of output time steps:\n",
         "\n",
-        "![Repeat the last input, for each output step](images/multistep_last.png)"
+        "![Repeat the last input, for each output step](https://github.com/tensorflow/docs/blob/master/site/en/tutorials/structured_data/images/multistep_last.png?raw=1)"
       ]
     },
     {
@@ -2343,14 +2364,14 @@
         "    return tf.tile(inputs[:, -1:, :], [1, OUT_STEPS, 1])\n",
         "\n",
         "last_baseline = MultiStepLastBaseline()\n",
-        "last_baseline.compile(loss=tf.losses.MeanSquaredError(),\n",
-        "                      metrics=[tf.metrics.MeanAbsoluteError()])\n",
+        "last_baseline.compile(loss=tf.keras.losses.MeanSquaredError(),\n",
+        "                      metrics=[tf.keras.metrics.MeanAbsoluteError()])\n",
         "\n",
         "multi_val_performance = {}\n",
         "multi_performance = {}\n",
         "\n",
-        "multi_val_performance['Last'] = last_baseline.evaluate(multi_window.val)\n",
-        "multi_performance['Last'] = last_baseline.evaluate(multi_window.test, verbose=0)\n",
+        "multi_val_performance['Last'] = last_baseline.evaluate(multi_window.val, return_dict=True)\n",
+        "multi_performance['Last'] = last_baseline.evaluate(multi_window.test, verbose=0, return_dict=True)\n",
         "multi_window.plot(last_baseline)"
       ]
     },
@@ -2362,7 +2383,7 @@
       "source": [
         "Since this task is to predict 24 hours into the future, given 24 hours of the past, another simple approach is to repeat the previous day, assuming tomorrow will be similar:\n",
         "\n",
-        "![Repeat the previous day](images/multistep_repeat.png)"
+        "![Repeat the previous day](https://github.com/tensorflow/docs/blob/master/site/en/tutorials/structured_data/images/multistep_repeat.png?raw=1)"
       ]
     },
     {
@@ -2378,11 +2399,11 @@
         "    return inputs\n",
         "\n",
         "repeat_baseline = RepeatBaseline()\n",
-        "repeat_baseline.compile(loss=tf.losses.MeanSquaredError(),\n",
-        "                        metrics=[tf.metrics.MeanAbsoluteError()])\n",
+        "repeat_baseline.compile(loss=tf.keras.losses.MeanSquaredError(),\n",
+        "                        metrics=[tf.keras.metrics.MeanAbsoluteError()])\n",
         "\n",
-        "multi_val_performance['Repeat'] = repeat_baseline.evaluate(multi_window.val)\n",
-        "multi_performance['Repeat'] = repeat_baseline.evaluate(multi_window.test, verbose=0)\n",
+        "multi_val_performance['Repeat'] = repeat_baseline.evaluate(multi_window.val, return_dict=True)\n",
+        "multi_performance['Repeat'] = repeat_baseline.evaluate(multi_window.test, verbose=0, return_dict=True)\n",
         "multi_window.plot(repeat_baseline)"
       ]
     },
@@ -2409,7 +2430,7 @@
         "\n",
         "A simple linear model based on the last input time step does better than either baseline, but is underpowered. The model needs to predict `OUTPUT_STEPS` time steps, from a single input time step with a linear projection. It can only capture a low-dimensional slice of the behavior, likely based mainly on the time of day and time of year.\n",
         "\n",
-        "![Predict all timesteps from the last time-step](images/multistep_dense.png)"
+        "![Predict all timesteps from the last time-step](https://github.com/tensorflow/docs/blob/master/site/en/tutorials/structured_data/images/multistep_dense.png?raw=1)"
       ]
     },
     {
@@ -2434,8 +2455,8 @@
         "history = compile_and_fit(multi_linear_model, multi_window)\n",
         "\n",
         "IPython.display.clear_output()\n",
-        "multi_val_performance['Linear'] = multi_linear_model.evaluate(multi_window.val)\n",
-        "multi_performance['Linear'] = multi_linear_model.evaluate(multi_window.test, verbose=0)\n",
+        "multi_val_performance['Linear'] = multi_linear_model.evaluate(multi_window.val, return_dict=True)\n",
+        "multi_performance['Linear'] = multi_linear_model.evaluate(multi_window.test, verbose=0, return_dict=True)\n",
         "multi_window.plot(multi_linear_model)"
       ]
     },
@@ -2474,8 +2495,8 @@
         "history = compile_and_fit(multi_dense_model, multi_window)\n",
         "\n",
         "IPython.display.clear_output()\n",
-        "multi_val_performance['Dense'] = multi_dense_model.evaluate(multi_window.val)\n",
-        "multi_performance['Dense'] = multi_dense_model.evaluate(multi_window.test, verbose=0)\n",
+        "multi_val_performance['Dense'] = multi_dense_model.evaluate(multi_window.val, return_dict=True)\n",
+        "multi_performance['Dense'] = multi_dense_model.evaluate(multi_window.test, verbose=0, return_dict=True)\n",
         "multi_window.plot(multi_dense_model)"
       ]
     },
@@ -2496,7 +2517,7 @@
       "source": [
         "A convolutional model makes predictions based on a fixed-width history, which may lead to better performance than the dense model since it can see how things are changing over time:\n",
         "\n",
-        "![A convolutional model sees how things change over time](images/multistep_conv.png)"
+        "![A convolutional model sees how things change over time](https://github.com/tensorflow/docs/blob/master/site/en/tutorials/structured_data/images/multistep_conv.png?raw=1)"
       ]
     },
     {
@@ -2524,8 +2545,8 @@
         "\n",
         "IPython.display.clear_output()\n",
         "\n",
-        "multi_val_performance['Conv'] = multi_conv_model.evaluate(multi_window.val)\n",
-        "multi_performance['Conv'] = multi_conv_model.evaluate(multi_window.test, verbose=0)\n",
+        "multi_val_performance['Conv'] = multi_conv_model.evaluate(multi_window.val, return_dict=True)\n",
+        "multi_performance['Conv'] = multi_conv_model.evaluate(multi_window.test, verbose=0, return_dict=True)\n",
         "multi_window.plot(multi_conv_model)"
       ]
     },
@@ -2548,7 +2569,7 @@
         "\n",
         "In this single-shot format, the LSTM only needs to produce an output at the last time step, so set `return_sequences=False` in `tf.keras.layers.LSTM`.\n",
         "\n",
-        "![The LSTM accumulates state over the input window, and makes a single prediction for the next 24 hours](images/multistep_lstm.png)\n"
+        "![The LSTM accumulates state over the input window, and makes a single prediction for the next 24 hours](https://github.com/tensorflow/docs/blob/master/site/en/tutorials/structured_data/images/multistep_lstm.png?raw=1)\n"
       ]
     },
     {
@@ -2574,8 +2595,8 @@
         "\n",
         "IPython.display.clear_output()\n",
         "\n",
-        "multi_val_performance['LSTM'] = multi_lstm_model.evaluate(multi_window.val)\n",
-        "multi_performance['LSTM'] = multi_lstm_model.evaluate(multi_window.test, verbose=0)\n",
+        "multi_val_performance['LSTM'] = multi_lstm_model.evaluate(multi_window.val, return_dict=True)\n",
+        "multi_performance['LSTM'] = multi_lstm_model.evaluate(multi_window.test, verbose=0, return_dict=True)\n",
         "multi_window.plot(multi_lstm_model)"
       ]
     },
@@ -2595,7 +2616,7 @@
         "\n",
         "You could take any of the single-step multi-output models trained in the first half of this tutorial and run in an autoregressive feedback loop, but here you'll focus on building a model that's been explicitly trained to do that.\n",
         "\n",
-        "![Feedback a model's output to its input](images/multistep_autoregressive.png)"
+        "![Feedback a model's output to its input](https://github.com/tensorflow/docs/blob/master/site/en/tutorials/structured_data/images/multistep_autoregressive.png?raw=1)"
       ]
     },
     {
@@ -2794,8 +2815,8 @@
         "\n",
         "IPython.display.clear_output()\n",
         "\n",
-        "multi_val_performance['AR LSTM'] = feedback_model.evaluate(multi_window.val)\n",
-        "multi_performance['AR LSTM'] = feedback_model.evaluate(multi_window.test, verbose=0)\n",
+        "multi_val_performance['AR LSTM'] = feedback_model.evaluate(multi_window.val, return_dict=True)\n",
+        "multi_performance['AR LSTM'] = feedback_model.evaluate(multi_window.test, verbose=0, return_dict=True)\n",
         "multi_window.plot(feedback_model)"
       ]
     },
@@ -2829,9 +2850,8 @@
         "width = 0.3\n",
         "\n",
         "metric_name = 'mean_absolute_error'\n",
-        "metric_index = lstm_model.metrics_names.index('mean_absolute_error')\n",
-        "val_mae = [v[metric_index] for v in multi_val_performance.values()]\n",
-        "test_mae = [v[metric_index] for v in multi_performance.values()]\n",
+        "val_mae = [v[metric_name] for v in multi_val_performance.values()]\n",
+        "test_mae = [v[metric_name] for v in multi_performance.values()]\n",
         "\n",
         "plt.bar(x - 0.17, val_mae, width, label='Validation')\n",
         "plt.bar(x + 0.17, test_mae, width, label='Test')\n",
@@ -2847,7 +2867,7 @@
         "id": "Zq3hUsedCEmJ"
       },
       "source": [
-        "The metrics for the multi-output models in the first half of this tutorial show the performance averaged across all output features. These performances are similar but also averaged across output time steps. "
+        "The metrics for the multi-output models in the first half of this tutorial show the performance averaged across all output features. These performances are similar but also averaged across output time steps."
       ]
     },
     {
@@ -2859,7 +2879,7 @@
       "outputs": [],
       "source": [
         "for name, value in multi_performance.items():\n",
-        "  print(f'{name:8s}: {value[1]:0.4f}')"
+        "  print(f'{name:8s}: {value[metric_name]:0.4f}')"
       ]
     },
     {
@@ -2894,8 +2914,8 @@
   "metadata": {
     "accelerator": "GPU",
     "colab": {
-      "collapsed_sections": [],
       "name": "time_series.ipynb",
+      "provenance": [],
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/tutorials/text/image_captioning.ipynb b/site/en/tutorials/text/image_captioning.ipynb
deleted file mode 100644
index 230896a41b1..00000000000
--- a/site/en/tutorials/text/image_captioning.ipynb
+++ /dev/null
@@ -1,1056 +0,0 @@
-{
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "K2s1A9eLRPEj"
-      },
-      "source": [
-        "##### Copyright 2018 The TensorFlow Authors.\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "cellView": "form",
-        "id": "VRLVEKiTEn04"
-      },
-      "outputs": [],
-      "source": [
-        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
-        "# you may not use this file except in compliance with the License.\n",
-        "# You may obtain a copy of the License at\n",
-        "#\n",
-        "# https://www.apache.org/licenses/LICENSE-2.0\n",
-        "#\n",
-        "# Unless required by applicable law or agreed to in writing, software\n",
-        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
-        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
-        "# See the License for the specific language governing permissions and\n",
-        "# limitations under the License."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Cffg2i257iMS"
-      },
-      "source": [
-        "# Image captioning with visual attention\n",
-        "\n",
-        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/tutorials/text/image_captioning\">\n",
-        "    <img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />\n",
-        "    View on TensorFlow.org</a>\n",
-        "  </td>\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/text/image_captioning.ipynb\">\n",
-        "    <img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />\n",
-        "    Run in Google Colab</a>\n",
-        "  </td>\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/tutorials/text/image_captioning.ipynb\">\n",
-        "    <img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />\n",
-        "    View source on GitHub</a>\n",
-        "  </td>\n",
-        "  <td>\n",
-        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/tutorials/text/image_captioning.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
-        "  </td>\n",
-        "</table>"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "QASbY_HGo4Lq"
-      },
-      "source": [
-        "Given an image like the example below, your goal is to generate a caption such as \"a surfer riding on a wave\".\n",
-        "\n",
-        "![Man Surfing](https://tensorflow.org/images/surf.jpg)\n",
-        "\n",
-        "*[Image Source](https://commons.wikimedia.org/wiki/Surfing#/media/File:Surfing_in_Hawaii.jpg); License: Public Domain*\n",
-        "\n",
-        "To accomplish this, you'll use an attention-based model, which enables us to see what parts of the image the model focuses on as it generates a caption.\n",
-        "\n",
-        "![Prediction](https://tensorflow.org/images/imcap_prediction.png)\n",
-        "\n",
-        "The model architecture is similar to [Show, Attend and Tell: Neural Image Caption Generation with Visual Attention](https://arxiv.org/abs/1502.03044).\n",
-        "\n",
-        "This notebook is an end-to-end example. When you run the notebook, it downloads the [MS-COCO](http://cocodataset.org/#home) dataset, preprocesses and caches a subset of images using Inception V3, trains an encoder-decoder model, and generates captions on new images using the trained model.\n",
-        "\n",
-        "In this example, you will train a model on a relatively small amount of data—the first 30,000 captions  for about 20,000 images (because there are multiple captions per image in the dataset)."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "U8l4RJ0XRPEm"
-      },
-      "outputs": [],
-      "source": [
-        "import tensorflow as tf\n",
-        "\n",
-        "# You'll generate plots of attention in order to see which parts of an image\n",
-        "# your model focuses on during captioning\n",
-        "import matplotlib.pyplot as plt\n",
-        "\n",
-        "import collections\n",
-        "import random\n",
-        "import numpy as np\n",
-        "import os\n",
-        "import time\n",
-        "import json\n",
-        "from PIL import Image"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "b6qbGw8MRPE5"
-      },
-      "source": [
-        "## Download and prepare the MS-COCO dataset\n",
-        "\n",
-        "You will use the [MS-COCO dataset](http://cocodataset.org/#home) to train your model. The dataset contains over 82,000 images, each of which has at least 5 different caption annotations. The code below downloads and extracts the dataset automatically.\n",
-        "\n",
-        "**Caution: large download ahead**. You'll use the training set, which is a 13GB file."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "krQuPYTtRPE7"
-      },
-      "outputs": [],
-      "source": [
-        "# Download caption annotation files\n",
-        "annotation_folder = '/annotations/'\n",
-        "if not os.path.exists(os.path.abspath('.') + annotation_folder):\n",
-        "  annotation_zip = tf.keras.utils.get_file('captions.zip',\n",
-        "                                           cache_subdir=os.path.abspath('.'),\n",
-        "                                           origin='http://images.cocodataset.org/annotations/annotations_trainval2014.zip',\n",
-        "                                           extract=True)\n",
-        "  annotation_file = os.path.dirname(annotation_zip)+'/annotations/captions_train2014.json'\n",
-        "  os.remove(annotation_zip)\n",
-        "\n",
-        "# Download image files\n",
-        "image_folder = '/train2014/'\n",
-        "if not os.path.exists(os.path.abspath('.') + image_folder):\n",
-        "  image_zip = tf.keras.utils.get_file('train2014.zip',\n",
-        "                                      cache_subdir=os.path.abspath('.'),\n",
-        "                                      origin='http://images.cocodataset.org/zips/train2014.zip',\n",
-        "                                      extract=True)\n",
-        "  PATH = os.path.dirname(image_zip) + image_folder\n",
-        "  os.remove(image_zip)\n",
-        "else:\n",
-        "  PATH = os.path.abspath('.') + image_folder"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "aANEzb5WwSzg"
-      },
-      "source": [
-        "## Optional: limit the size of the training set \n",
-        "To speed up training for this tutorial, you'll use a subset of 30,000 captions and their corresponding images to train your model. Choosing to use more data would result in improved captioning quality."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "m8iBJCyVB2ud"
-      },
-      "outputs": [],
-      "source": [
-        "with open(annotation_file, 'r') as f:\n",
-        "    annotations = json.load(f)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "miER7EHMB3Ge"
-      },
-      "outputs": [],
-      "source": [
-        "# Group all captions together having the same image ID.\n",
-        "image_path_to_caption = collections.defaultdict(list)\n",
-        "for val in annotations['annotations']:\n",
-        "  caption = f\"<start> {val['caption']} <end>\"\n",
-        "  image_path = PATH + 'COCO_train2014_' + '%012d.jpg' % (val['image_id'])\n",
-        "  image_path_to_caption[image_path].append(caption)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "7vvqkqYGMhvm"
-      },
-      "outputs": [],
-      "source": [
-        "image_paths = list(image_path_to_caption.keys())\n",
-        "random.shuffle(image_paths)\n",
-        "\n",
-        "# Select the first 6000 image_paths from the shuffled set.\n",
-        "# Approximately each image id has 5 captions associated with it, so that will\n",
-        "# lead to 30,000 examples.\n",
-        "train_image_paths = image_paths[:6000]\n",
-        "print(len(train_image_paths))"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "hrmdtMX8Lnyh"
-      },
-      "outputs": [],
-      "source": [
-        "train_captions = []\n",
-        "img_name_vector = []\n",
-        "\n",
-        "for image_path in train_image_paths:\n",
-        "  caption_list = image_path_to_caption[image_path]\n",
-        "  train_captions.extend(caption_list)\n",
-        "  img_name_vector.extend([image_path] * len(caption_list))"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "RhCND0bCUP11"
-      },
-      "outputs": [],
-      "source": [
-        "print(train_captions[0])\n",
-        "Image.open(img_name_vector[0])"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "8cSW4u-ORPFQ"
-      },
-      "source": [
-        "## Preprocess the images using InceptionV3\n",
-        "Next, you will use InceptionV3 (which is pretrained on Imagenet) to classify each image. You will extract features from the last convolutional layer.\n",
-        "\n",
-        "First, you will convert the images into InceptionV3's expected format by:\n",
-        "* Resizing the image to 299px by 299px\n",
-        "* [Preprocess the images](https://cloud.google.com/tpu/docs/inception-v3-advanced#preprocessing_stage) using the [preprocess_input](https://www.tensorflow.org/api_docs/python/tf/keras/applications/inception_v3/preprocess_input) method to normalize the image so that it contains pixels in the range of -1 to 1, which matches the format of the images used to train InceptionV3."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "zXR0217aRPFR"
-      },
-      "outputs": [],
-      "source": [
-        "def load_image(image_path):\n",
-        "    img = tf.io.read_file(image_path)\n",
-        "    img = tf.image.decode_jpeg(img, channels=3)\n",
-        "    img = tf.image.resize(img, (299, 299))\n",
-        "    img = tf.keras.applications.inception_v3.preprocess_input(img)\n",
-        "    return img, image_path"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "MDvIu4sXRPFV"
-      },
-      "source": [
-        "## Initialize InceptionV3 and load the pretrained Imagenet weights\n",
-        "\n",
-        "Now you'll create a tf.keras model where the output layer is the last convolutional layer in the InceptionV3 architecture. The shape of the output of this layer is ```8x8x2048```. You use the last convolutional layer because you are using attention in this example. You don't perform this initialization during training because it could become a bottleneck.\n",
-        "\n",
-        "* You forward each image through the network and store the resulting vector in a dictionary (image_name --> feature_vector).\n",
-        "* After all the images are passed through the network, you save the dictionary to disk.\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "RD3vW4SsRPFW"
-      },
-      "outputs": [],
-      "source": [
-        "image_model = tf.keras.applications.InceptionV3(include_top=False,\n",
-        "                                                weights='imagenet')\n",
-        "new_input = image_model.input\n",
-        "hidden_layer = image_model.layers[-1].output\n",
-        "\n",
-        "image_features_extract_model = tf.keras.Model(new_input, hidden_layer)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "rERqlR3WRPGO"
-      },
-      "source": [
-        "## Caching the features extracted from InceptionV3\n",
-        "\n",
-        "You will pre-process each image with InceptionV3 and cache the output to disk. Caching the output in RAM would be faster but also memory intensive, requiring 8 \\* 8 \\* 2048 floats per image. At the time of writing, this exceeds the memory limitations of Colab (currently 12GB of memory).\n",
-        "\n",
-        "Performance could be improved with a more sophisticated caching strategy (for example, by sharding the images to reduce random access disk I/O), but that would require more code.\n",
-        "\n",
-        "The caching will take about 10 minutes to run in Colab with a GPU. If you'd like to see a progress bar, you can: \n",
-        "\n",
-        "1. Install [tqdm](https://github.com/tqdm/tqdm):\n",
-        "\n",
-        "    `!pip install tqdm`\n",
-        "\n",
-        "2. Import tqdm:\n",
-        "\n",
-        "    `from tqdm import tqdm`\n",
-        "\n",
-        "3. Change the following line:\n",
-        "\n",
-        "    `for img, path in image_dataset:`\n",
-        "\n",
-        "    to:\n",
-        "\n",
-        "    `for img, path in tqdm(image_dataset):`\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Dx_fvbVgRPGQ"
-      },
-      "outputs": [],
-      "source": [
-        "# Get unique images\n",
-        "encode_train = sorted(set(img_name_vector))\n",
-        "\n",
-        "# Feel free to change batch_size according to your system configuration\n",
-        "image_dataset = tf.data.Dataset.from_tensor_slices(encode_train)\n",
-        "image_dataset = image_dataset.map(\n",
-        "  load_image, num_parallel_calls=tf.data.AUTOTUNE).batch(16)\n",
-        "\n",
-        "for img, path in image_dataset:\n",
-        "  batch_features = image_features_extract_model(img)\n",
-        "  batch_features = tf.reshape(batch_features,\n",
-        "                              (batch_features.shape[0], -1, batch_features.shape[3]))\n",
-        "\n",
-        "  for bf, p in zip(batch_features, path):\n",
-        "    path_of_feature = p.numpy().decode(\"utf-8\")\n",
-        "    np.save(path_of_feature, bf.numpy())"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "nyqH3zFwRPFi"
-      },
-      "source": [
-        "## Preprocess and tokenize the captions\n",
-        "\n",
-        "* First, you'll tokenize the captions (for example, by splitting on spaces). This gives us a  vocabulary of all of the unique words in the data (for example, \"surfing\", \"football\", and so on).\n",
-        "* Next, you'll limit the vocabulary size to the top 5,000 words (to save memory). You'll replace all other words with the token \"UNK\" (unknown).\n",
-        "* You then create word-to-index and index-to-word mappings.\n",
-        "* Finally, you pad all sequences to be the same length as the longest one."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "HZfK8RhQRPFj"
-      },
-      "outputs": [],
-      "source": [
-        "# Find the maximum length of any caption in the dataset\n",
-        "def calc_max_length(tensor):\n",
-        "    return max(len(t) for t in tensor)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "oJGE34aiRPFo"
-      },
-      "outputs": [],
-      "source": [
-        "# Choose the top 5000 words from the vocabulary\n",
-        "top_k = 5000\n",
-        "tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=top_k,\n",
-        "                                                  oov_token=\"<unk>\",\n",
-        "                                                  filters='!\"#$%&()*+.,-/:;=?@[\\]^_`{|}~')\n",
-        "tokenizer.fit_on_texts(train_captions)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "8Q44tNQVRPFt"
-      },
-      "outputs": [],
-      "source": [
-        "tokenizer.word_index['<pad>'] = 0\n",
-        "tokenizer.index_word[0] = '<pad>'"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "0fpJb5ojRPFv"
-      },
-      "outputs": [],
-      "source": [
-        "# Create the tokenized vectors\n",
-        "train_seqs = tokenizer.texts_to_sequences(train_captions)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "AidglIZVRPF4"
-      },
-      "outputs": [],
-      "source": [
-        "# Pad each vector to the max_length of the captions\n",
-        "# If you do not provide a max_length value, pad_sequences calculates it automatically\n",
-        "cap_vector = tf.keras.preprocessing.sequence.pad_sequences(train_seqs, padding='post')"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "gL0wkttkRPGA"
-      },
-      "outputs": [],
-      "source": [
-        "# Calculates the max_length, which is used to store the attention weights\n",
-        "max_length = calc_max_length(train_seqs)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "M3CD75nDpvTI"
-      },
-      "source": [
-        "## Split the data into training and testing"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "iS7DDMszRPGF"
-      },
-      "outputs": [],
-      "source": [
-        "img_to_cap_vector = collections.defaultdict(list)\n",
-        "for img, cap in zip(img_name_vector, cap_vector):\n",
-        "  img_to_cap_vector[img].append(cap)\n",
-        "\n",
-        "# Create training and validation sets using an 80-20 split randomly.\n",
-        "img_keys = list(img_to_cap_vector.keys())\n",
-        "random.shuffle(img_keys)\n",
-        "\n",
-        "slice_index = int(len(img_keys)*0.8)\n",
-        "img_name_train_keys, img_name_val_keys = img_keys[:slice_index], img_keys[slice_index:]\n",
-        "\n",
-        "img_name_train = []\n",
-        "cap_train = []\n",
-        "for imgt in img_name_train_keys:\n",
-        "  capt_len = len(img_to_cap_vector[imgt])\n",
-        "  img_name_train.extend([imgt] * capt_len)\n",
-        "  cap_train.extend(img_to_cap_vector[imgt])\n",
-        "\n",
-        "img_name_val = []\n",
-        "cap_val = []\n",
-        "for imgv in img_name_val_keys:\n",
-        "  capv_len = len(img_to_cap_vector[imgv])\n",
-        "  img_name_val.extend([imgv] * capv_len)\n",
-        "  cap_val.extend(img_to_cap_vector[imgv])"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "XmViPkRFRPGH"
-      },
-      "outputs": [],
-      "source": [
-        "len(img_name_train), len(cap_train), len(img_name_val), len(cap_val)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "uEWM9xrYcg45"
-      },
-      "source": [
-        "## Create a tf.data dataset for training\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "horagNvhhZiy"
-      },
-      "source": [
-        "Your images and captions are ready! Next, let's create a `tf.data` dataset to use for training your model."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Q3TnZ1ToRPGV"
-      },
-      "outputs": [],
-      "source": [
-        "# Feel free to change these parameters according to your system's configuration\n",
-        "\n",
-        "BATCH_SIZE = 64\n",
-        "BUFFER_SIZE = 1000\n",
-        "embedding_dim = 256\n",
-        "units = 512\n",
-        "vocab_size = top_k + 1\n",
-        "num_steps = len(img_name_train) // BATCH_SIZE\n",
-        "# Shape of the vector extracted from InceptionV3 is (64, 2048)\n",
-        "# These two variables represent that vector shape\n",
-        "features_shape = 2048\n",
-        "attention_features_shape = 64"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "SmZS2N0bXG3T"
-      },
-      "outputs": [],
-      "source": [
-        "# Load the numpy files\n",
-        "def map_func(img_name, cap):\n",
-        "  img_tensor = np.load(img_name.decode('utf-8')+'.npy')\n",
-        "  return img_tensor, cap"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "FDF_Nm3tRPGZ"
-      },
-      "outputs": [],
-      "source": [
-        "dataset = tf.data.Dataset.from_tensor_slices((img_name_train, cap_train))\n",
-        "\n",
-        "# Use map to load the numpy files in parallel\n",
-        "dataset = dataset.map(lambda item1, item2: tf.numpy_function(\n",
-        "          map_func, [item1, item2], [tf.float32, tf.int32]),\n",
-        "          num_parallel_calls=tf.data.AUTOTUNE)\n",
-        "\n",
-        "# Shuffle and batch\n",
-        "dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE)\n",
-        "dataset = dataset.prefetch(buffer_size=tf.data.AUTOTUNE)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "nrvoDphgRPGd"
-      },
-      "source": [
-        "## Model\n",
-        "\n",
-        "Fun fact: the decoder below is identical to the one in the example for [Neural Machine Translation with Attention](https://www.tensorflow.org/text/tutorials/nmt_with_attention).\n",
-        "\n",
-        "The model architecture is inspired by the [Show, Attend and Tell](https://arxiv.org/pdf/1502.03044.pdf) paper.\n",
-        "\n",
-        "* In this example, you extract the features from the lower convolutional layer of InceptionV3 giving us a vector of shape (8, 8, 2048).\n",
-        "* You squash that to a shape of (64, 2048).\n",
-        "* This vector is then passed through the CNN Encoder (which consists of a single Fully connected layer).\n",
-        "* The RNN (here GRU) attends over the image to predict the next word."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "ja2LFTMSdeV3"
-      },
-      "outputs": [],
-      "source": [
-        "class BahdanauAttention(tf.keras.Model):\n",
-        "  def __init__(self, units):\n",
-        "    super(BahdanauAttention, self).__init__()\n",
-        "    self.W1 = tf.keras.layers.Dense(units)\n",
-        "    self.W2 = tf.keras.layers.Dense(units)\n",
-        "    self.V = tf.keras.layers.Dense(1)\n",
-        "\n",
-        "  def call(self, features, hidden):\n",
-        "    # features(CNN_encoder output) shape == (batch_size, 64, embedding_dim)\n",
-        "\n",
-        "    # hidden shape == (batch_size, hidden_size)\n",
-        "    # hidden_with_time_axis shape == (batch_size, 1, hidden_size)\n",
-        "    hidden_with_time_axis = tf.expand_dims(hidden, 1)\n",
-        "\n",
-        "    # attention_hidden_layer shape == (batch_size, 64, units)\n",
-        "    attention_hidden_layer = (tf.nn.tanh(self.W1(features) +\n",
-        "                                         self.W2(hidden_with_time_axis)))\n",
-        "\n",
-        "    # score shape == (batch_size, 64, 1)\n",
-        "    # This gives you an unnormalized score for each image feature.\n",
-        "    score = self.V(attention_hidden_layer)\n",
-        "\n",
-        "    # attention_weights shape == (batch_size, 64, 1)\n",
-        "    attention_weights = tf.nn.softmax(score, axis=1)\n",
-        "\n",
-        "    # context_vector shape after sum == (batch_size, hidden_size)\n",
-        "    context_vector = attention_weights * features\n",
-        "    context_vector = tf.reduce_sum(context_vector, axis=1)\n",
-        "\n",
-        "    return context_vector, attention_weights"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "AZ7R1RxHRPGf"
-      },
-      "outputs": [],
-      "source": [
-        "class CNN_Encoder(tf.keras.Model):\n",
-        "    # Since you have already extracted the features and dumped it\n",
-        "    # This encoder passes those features through a Fully connected layer\n",
-        "    def __init__(self, embedding_dim):\n",
-        "        super(CNN_Encoder, self).__init__()\n",
-        "        # shape after fc == (batch_size, 64, embedding_dim)\n",
-        "        self.fc = tf.keras.layers.Dense(embedding_dim)\n",
-        "\n",
-        "    def call(self, x):\n",
-        "        x = self.fc(x)\n",
-        "        x = tf.nn.relu(x)\n",
-        "        return x"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "V9UbGQmERPGi"
-      },
-      "outputs": [],
-      "source": [
-        "class RNN_Decoder(tf.keras.Model):\n",
-        "  def __init__(self, embedding_dim, units, vocab_size):\n",
-        "    super(RNN_Decoder, self).__init__()\n",
-        "    self.units = units\n",
-        "\n",
-        "    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)\n",
-        "    self.gru = tf.keras.layers.GRU(self.units,\n",
-        "                                   return_sequences=True,\n",
-        "                                   return_state=True,\n",
-        "                                   recurrent_initializer='glorot_uniform')\n",
-        "    self.fc1 = tf.keras.layers.Dense(self.units)\n",
-        "    self.fc2 = tf.keras.layers.Dense(vocab_size)\n",
-        "\n",
-        "    self.attention = BahdanauAttention(self.units)\n",
-        "\n",
-        "  def call(self, x, features, hidden):\n",
-        "    # defining attention as a separate model\n",
-        "    context_vector, attention_weights = self.attention(features, hidden)\n",
-        "\n",
-        "    # x shape after passing through embedding == (batch_size, 1, embedding_dim)\n",
-        "    x = self.embedding(x)\n",
-        "\n",
-        "    # x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)\n",
-        "    x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)\n",
-        "\n",
-        "    # passing the concatenated vector to the GRU\n",
-        "    output, state = self.gru(x)\n",
-        "\n",
-        "    # shape == (batch_size, max_length, hidden_size)\n",
-        "    x = self.fc1(output)\n",
-        "\n",
-        "    # x shape == (batch_size * max_length, hidden_size)\n",
-        "    x = tf.reshape(x, (-1, x.shape[2]))\n",
-        "\n",
-        "    # output shape == (batch_size * max_length, vocab)\n",
-        "    x = self.fc2(x)\n",
-        "\n",
-        "    return x, state, attention_weights\n",
-        "\n",
-        "  def reset_state(self, batch_size):\n",
-        "    return tf.zeros((batch_size, self.units))"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Qs_Sr03wRPGk"
-      },
-      "outputs": [],
-      "source": [
-        "encoder = CNN_Encoder(embedding_dim)\n",
-        "decoder = RNN_Decoder(embedding_dim, units, vocab_size)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "-bYN7xA0RPGl"
-      },
-      "outputs": [],
-      "source": [
-        "optimizer = tf.keras.optimizers.Adam()\n",
-        "loss_object = tf.keras.losses.SparseCategoricalCrossentropy(\n",
-        "    from_logits=True, reduction='none')\n",
-        "\n",
-        "\n",
-        "def loss_function(real, pred):\n",
-        "  mask = tf.math.logical_not(tf.math.equal(real, 0))\n",
-        "  loss_ = loss_object(real, pred)\n",
-        "\n",
-        "  mask = tf.cast(mask, dtype=loss_.dtype)\n",
-        "  loss_ *= mask\n",
-        "\n",
-        "  return tf.reduce_mean(loss_)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "6A3Ni64joyab"
-      },
-      "source": [
-        "## Checkpoint"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "PpJAqPMWo0uE"
-      },
-      "outputs": [],
-      "source": [
-        "checkpoint_path = \"./checkpoints/train\"\n",
-        "ckpt = tf.train.Checkpoint(encoder=encoder,\n",
-        "                           decoder=decoder,\n",
-        "                           optimizer=optimizer)\n",
-        "ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "fUkbqhc_uObw"
-      },
-      "outputs": [],
-      "source": [
-        "start_epoch = 0\n",
-        "if ckpt_manager.latest_checkpoint:\n",
-        "  start_epoch = int(ckpt_manager.latest_checkpoint.split('-')[-1])\n",
-        "  # restoring the latest checkpoint in checkpoint_path\n",
-        "  ckpt.restore(ckpt_manager.latest_checkpoint)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "PHod7t72RPGn"
-      },
-      "source": [
-        "## Training\n",
-        "\n",
-        "* You extract the features stored in the respective `.npy` files and then pass those features through the encoder.\n",
-        "* The encoder output, hidden state(initialized to 0) and the decoder input (which is the start token) is passed to the decoder.\n",
-        "* The decoder returns the predictions and the decoder hidden state.\n",
-        "* The decoder hidden state is then passed back into the model and the predictions are used to calculate the loss.\n",
-        "* Use teacher forcing to decide the next input to the decoder.\n",
-        "* Teacher forcing is the technique where the target word is passed as the next input to the decoder.\n",
-        "* The final step is to calculate the gradients and apply it to the optimizer and backpropagate.\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Vt4WZ5mhJE-E"
-      },
-      "outputs": [],
-      "source": [
-        "# adding this in a separate cell because if you run the training cell\n",
-        "# many times, the loss_plot array will be reset\n",
-        "loss_plot = []"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "sqgyz2ANKlpU"
-      },
-      "outputs": [],
-      "source": [
-        "@tf.function\n",
-        "def train_step(img_tensor, target):\n",
-        "  loss = 0\n",
-        "\n",
-        "  # initializing the hidden state for each batch\n",
-        "  # because the captions are not related from image to image\n",
-        "  hidden = decoder.reset_state(batch_size=target.shape[0])\n",
-        "\n",
-        "  dec_input = tf.expand_dims([tokenizer.word_index['<start>']] * target.shape[0], 1)\n",
-        "\n",
-        "  with tf.GradientTape() as tape:\n",
-        "      features = encoder(img_tensor)\n",
-        "\n",
-        "      for i in range(1, target.shape[1]):\n",
-        "          # passing the features through the decoder\n",
-        "          predictions, hidden, _ = decoder(dec_input, features, hidden)\n",
-        "\n",
-        "          loss += loss_function(target[:, i], predictions)\n",
-        "\n",
-        "          # using teacher forcing\n",
-        "          dec_input = tf.expand_dims(target[:, i], 1)\n",
-        "\n",
-        "  total_loss = (loss / int(target.shape[1]))\n",
-        "\n",
-        "  trainable_variables = encoder.trainable_variables + decoder.trainable_variables\n",
-        "\n",
-        "  gradients = tape.gradient(loss, trainable_variables)\n",
-        "\n",
-        "  optimizer.apply_gradients(zip(gradients, trainable_variables))\n",
-        "\n",
-        "  return loss, total_loss"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "UlA4VIQpRPGo"
-      },
-      "outputs": [],
-      "source": [
-        "EPOCHS = 20\n",
-        "\n",
-        "for epoch in range(start_epoch, EPOCHS):\n",
-        "    start = time.time()\n",
-        "    total_loss = 0\n",
-        "\n",
-        "    for (batch, (img_tensor, target)) in enumerate(dataset):\n",
-        "        batch_loss, t_loss = train_step(img_tensor, target)\n",
-        "        total_loss += t_loss\n",
-        "\n",
-        "        if batch % 100 == 0:\n",
-        "            average_batch_loss = batch_loss.numpy()/int(target.shape[1])\n",
-        "            print(f'Epoch {epoch+1} Batch {batch} Loss {average_batch_loss:.4f}')\n",
-        "    # storing the epoch end loss value to plot later\n",
-        "    loss_plot.append(total_loss / num_steps)\n",
-        "\n",
-        "    if epoch % 5 == 0:\n",
-        "      ckpt_manager.save()\n",
-        "\n",
-        "    print(f'Epoch {epoch+1} Loss {total_loss/num_steps:.6f}')\n",
-        "    print(f'Time taken for 1 epoch {time.time()-start:.2f} sec\\n')"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "1Wm83G-ZBPcC"
-      },
-      "outputs": [],
-      "source": [
-        "plt.plot(loss_plot)\n",
-        "plt.xlabel('Epochs')\n",
-        "plt.ylabel('Loss')\n",
-        "plt.title('Loss Plot')\n",
-        "plt.show()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "xGvOcLQKghXN"
-      },
-      "source": [
-        "## Caption!\n",
-        "\n",
-        "* The evaluate function is similar to the training loop, except you don't use teacher forcing here. The input to the decoder at each time step is its previous predictions along with the hidden state and the encoder output.\n",
-        "* Stop predicting when the model predicts the end token.\n",
-        "* And store the attention weights for every time step."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "RCWpDtyNRPGs"
-      },
-      "outputs": [],
-      "source": [
-        "def evaluate(image):\n",
-        "    attention_plot = np.zeros((max_length, attention_features_shape))\n",
-        "\n",
-        "    hidden = decoder.reset_state(batch_size=1)\n",
-        "\n",
-        "    temp_input = tf.expand_dims(load_image(image)[0], 0)\n",
-        "    img_tensor_val = image_features_extract_model(temp_input)\n",
-        "    img_tensor_val = tf.reshape(img_tensor_val, (img_tensor_val.shape[0],\n",
-        "                                                 -1,\n",
-        "                                                 img_tensor_val.shape[3]))\n",
-        "\n",
-        "    features = encoder(img_tensor_val)\n",
-        "\n",
-        "    dec_input = tf.expand_dims([tokenizer.word_index['<start>']], 0)\n",
-        "    result = []\n",
-        "\n",
-        "    for i in range(max_length):\n",
-        "        predictions, hidden, attention_weights = decoder(dec_input,\n",
-        "                                                         features,\n",
-        "                                                         hidden)\n",
-        "\n",
-        "        attention_plot[i] = tf.reshape(attention_weights, (-1, )).numpy()\n",
-        "\n",
-        "        predicted_id = tf.random.categorical(predictions, 1)[0][0].numpy()\n",
-        "        result.append(tokenizer.index_word[predicted_id])\n",
-        "\n",
-        "        if tokenizer.index_word[predicted_id] == '<end>':\n",
-        "            return result, attention_plot\n",
-        "\n",
-        "        dec_input = tf.expand_dims([predicted_id], 0)\n",
-        "\n",
-        "    attention_plot = attention_plot[:len(result), :]\n",
-        "    return result, attention_plot"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "fD_y7PD6RPGt"
-      },
-      "outputs": [],
-      "source": [
-        "def plot_attention(image, result, attention_plot):\n",
-        "    temp_image = np.array(Image.open(image))\n",
-        "\n",
-        "    fig = plt.figure(figsize=(10, 10))\n",
-        "\n",
-        "    len_result = len(result)\n",
-        "    for i in range(len_result):\n",
-        "        temp_att = np.resize(attention_plot[i], (8, 8))\n",
-        "        grid_size = max(np.ceil(len_result/2), 2)\n",
-        "        ax = fig.add_subplot(grid_size, grid_size, i+1)\n",
-        "        ax.set_title(result[i])\n",
-        "        img = ax.imshow(temp_image)\n",
-        "        ax.imshow(temp_att, cmap='gray', alpha=0.6, extent=img.get_extent())\n",
-        "\n",
-        "    plt.tight_layout()\n",
-        "    plt.show()"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "7x8RiPHe_4qI"
-      },
-      "outputs": [],
-      "source": [
-        "# captions on the validation set\n",
-        "rid = np.random.randint(0, len(img_name_val))\n",
-        "image = img_name_val[rid]\n",
-        "real_caption = ' '.join([tokenizer.index_word[i]\n",
-        "                        for i in cap_val[rid] if i not in [0]])\n",
-        "result, attention_plot = evaluate(image)\n",
-        "\n",
-        "print('Real Caption:', real_caption)\n",
-        "print('Prediction Caption:', ' '.join(result))\n",
-        "plot_attention(image, result, attention_plot)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Rprk3HEvZuxb"
-      },
-      "source": [
-        "## Try it on your own images\n",
-        "\n",
-        "For fun, below you're provided a method you can use to caption your own images with the model you've just trained. Keep in mind, it was trained on a relatively small amount of data, and your images may be different from the training data (so be prepared for weird results!)\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "9Psd1quzaAWg"
-      },
-      "outputs": [],
-      "source": [
-        "image_url = 'https://tensorflow.org/images/surf.jpg'\n",
-        "image_extension = image_url[-4:]\n",
-        "image_path = tf.keras.utils.get_file('image'+image_extension, origin=image_url)\n",
-        "\n",
-        "result, attention_plot = evaluate(image_path)\n",
-        "print('Prediction Caption:', ' '.join(result))\n",
-        "plot_attention(image_path, result, attention_plot)\n",
-        "# opening the image\n",
-        "Image.open(image_path)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "VJZXyJco6uLO"
-      },
-      "source": [
-        "# Next steps\n",
-        "\n",
-        "Congrats! You've just trained an image captioning model with attention. Next, take a look at this example [Neural Machine Translation with Attention](https://www.tensorflow.org/text/tutorials/nmt_with_attention). It uses a similar architecture to translate between Spanish and English sentences. You can also experiment with training the code in this notebook on a different dataset."
-      ]
-    }
-  ],
-  "metadata": {
-    "accelerator": "GPU",
-    "colab": {
-      "collapsed_sections": [],
-      "name": "image_captioning.ipynb",
-      "toc_visible": true
-    },
-    "kernelspec": {
-      "display_name": "Python 3",
-      "name": "python3"
-    }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 0
-}
diff --git a/site/en/tutorials/text/images/word2vec_full_softmax.png b/site/en/tutorials/text/images/word2vec_full_softmax.png
deleted file mode 100644
index b6f8f43e06b..00000000000
Binary files a/site/en/tutorials/text/images/word2vec_full_softmax.png and /dev/null differ
diff --git a/site/en/tutorials/text/images/word2vec_negative_sampling.png b/site/en/tutorials/text/images/word2vec_negative_sampling.png
deleted file mode 100644
index 9bcf8b9e9fd..00000000000
Binary files a/site/en/tutorials/text/images/word2vec_negative_sampling.png and /dev/null differ
diff --git a/site/en/tutorials/text/images/word2vec_skipgram.png b/site/en/tutorials/text/images/word2vec_skipgram.png
deleted file mode 100644
index 542d488d074..00000000000
Binary files a/site/en/tutorials/text/images/word2vec_skipgram.png and /dev/null differ
diff --git a/site/en/tutorials/text/images/word2vec_skipgram_objective.png b/site/en/tutorials/text/images/word2vec_skipgram_objective.png
deleted file mode 100644
index 7f31134e8d4..00000000000
Binary files a/site/en/tutorials/text/images/word2vec_skipgram_objective.png and /dev/null differ
diff --git a/site/en/tutorials/text/images/word2vec_tensorboard.png b/site/en/tutorials/text/images/word2vec_tensorboard.png
deleted file mode 100644
index 5d3fe7e46f3..00000000000
Binary files a/site/en/tutorials/text/images/word2vec_tensorboard.png and /dev/null differ
diff --git a/site/en/tutorials/text/index.md b/site/en/tutorials/text/index.md
new file mode 100644
index 00000000000..5f8e5b0e36c
--- /dev/null
+++ b/site/en/tutorials/text/index.md
@@ -0,0 +1,91 @@
+# Text and natural language processing with TensorFlow
+
+Before you can train a model on text data, you'll typically need to process
+(or preprocess) the text. In many cases, text needs to be tokenized and
+vectorized before it can be fed to a model, and in some cases the text requires
+additional preprocessing steps such as normalization and feature selection.
+
+After text is processed into a suitable format, you can use it in natural
+language processing (NLP) workflows such as text classification, text
+generation, summarization, and translation.
+
+TensorFlow provides two libraries for text and natural language processing:
+KerasNLP ([GitHub](https://github.com/keras-team/keras-nlp)) and
+TensorFlow Text ([GitHub](https://github.com/tensorflow/text)).
+
+KerasNLP is a high-level NLP modeling library that includes all the latest
+transformer-based models as well as lower-level tokenization utilities. It's the
+recommended solution for most NLP use cases. Built on TensorFlow Text, KerasNLP
+abstracts low-level text processing operations into an API that's designed for
+ease of use. But if you prefer not to work with the Keras API, or you need
+access to the lower-level text processing ops, you can use TensorFlow Text
+directly.
+
+## KerasNLP
+
+The easiest way to get started processing text in TensorFlow is to use
+[KerasNLP](https://keras.io/keras_nlp/). KerasNLP is a natural language
+processing library that supports workflows built from modular components that
+have state-of-the-art preset weights and architectures. You can use KerasNLP
+components with their out-of-the-box configuration. If you need more control,
+you can easily customize components. KerasNLP provides in-graph computation for
+all workflows so you can expect easy productionization using the TensorFlow
+ecosystem.
+
+KerasNLP contains end-to-end implementations of popular
+[model architectures](https://keras.io/api/keras_nlp/models/) like
+[BERT](https://keras.io/api/keras_nlp/models/bert/) and
+[FNet](https://keras.io/api/keras_nlp/models/f_net/). Using KerasNLP models,
+layers, and tokenizers, you can complete many state-of-the-art NLP workflows,
+including
+[machine translation](https://keras.io/examples/nlp/neural_machine_translation_with_keras_nlp/),
+[text generation](https://keras.io/examples/generative/text_generation_gpt/),
+[text classification](https://keras.io/examples/nlp/fnet_classification_with_keras_nlp/),
+and
+[transformer model training](https://keras.io/guides/keras_nlp/transformer_pretraining/).
+
+KerasNLP is an extension of the core Keras API, and every high-level KerasNLP
+module is a `Layer` or `Model`. If you're familiar with Keras, you already
+understand most of KerasNLP.
+
+## TensorFlow Text
+
+KerasNLP provides high-level text processing modules that are available as
+layers or models. If you need access to lower-level tools, you can use
+[TensorFlow Text](https://www.tensorflow.org/text/guide/tf_text_intro).
+TensorFlow Text provides operations and libraries to help you work with raw text
+strings and documents. TensorFlow Text can perform the preprocessing regularly
+required by text-based models, and it also includes other features useful for
+sequence modeling.
+
+Using TensorFlow Text, you can do the following:
+
+* Apply feature-rich tokenizers that can split strings on whitespace, separate
+  words and punctuation, and return byte offsets with tokens, so that you know
+  where a string can be found in the source text.
+* Check if a token matches a specified string pattern. You can check for
+  capitalization, punctuation, numerical data, and other token features.
+* Combine tokens into n-grams.
+* Process text within the TensorFlow graph, so that tokenization during training
+  matches tokenization at inference.
+
+## Where to start
+
+The following resources will help you get started with TensorFlow text
+processing:
+
+* [TensorFlow Text](https://www.tensorflow.org/text): Tutorials, guides, and
+  other resources to help you process text using TensorFlow Text and KerasNLP.
+* [KerasNLP](https://keras.io/keras_nlp/): Documentation and resources for
+  KerasNLP.
+  * [Getting Started with KerasNLP](https://keras.io/guides/keras_nlp/getting_started/)
+  * [Pretraining a Transformer from scratch with KerasNLP](https://keras.io/guides/keras_nlp/transformer_pretraining/)
+* [TensorFlow tutorials](https://www.tensorflow.org/tutorials): The core
+  TensorFlow documentation (this guide) includes several text processing
+  tutorials.
+  * [Basic text classification](https://www.tensorflow.org/tutorials/keras/text_classification)
+  * [Text classification with TensorFlow Hub: Movie reviews](https://www.tensorflow.org/tutorials/keras/text_classification_with_hub)
+  * [Load text](https://www.tensorflow.org/tutorials/load_data/text)
+* [Google Machine Learning: Text Classification guide](https://developers.google.com/machine-learning/guides/text-classification):
+  A step-by-step introduction to text classification. This is a good place to
+  start if you're new to machine learning.
\ No newline at end of file
diff --git a/site/en/tutorials/text/word2vec.ipynb b/site/en/tutorials/text/word2vec.ipynb
deleted file mode 100644
index a9fced047b6..00000000000
--- a/site/en/tutorials/text/word2vec.ipynb
+++ /dev/null
@@ -1,1433 +0,0 @@
-{
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "hX4n9TsbGw-f"
-      },
-      "source": [
-        "##### Copyright 2020 The TensorFlow Authors."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "cellView": "form",
-        "id": "0nbI5DtDGw-i"
-      },
-      "outputs": [],
-      "source": [
-        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
-        "# you may not use this file except in compliance with the License.\n",
-        "# You may obtain a copy of the License at\n",
-        "#\n",
-        "# https://www.apache.org/licenses/LICENSE-2.0\n",
-        "#\n",
-        "# Unless required by applicable law or agreed to in writing, software\n",
-        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
-        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
-        "# See the License for the specific language governing permissions and\n",
-        "# limitations under the License."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "AOpGoE2T-YXS"
-      },
-      "source": [
-        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/tutorials/text/word2vec\">\n",
-        "    <img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />\n",
-        "    View on TensorFlow.org</a>\n",
-        "  </td>\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/text/word2vec.ipynb\">\n",
-        "    <img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />\n",
-        "    Run in Google Colab</a>\n",
-        "  </td>\n",
-        "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/tutorials/text/word2vec.ipynb\">\n",
-        "    <img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />\n",
-        "    View source on GitHub</a>\n",
-        "  </td>\n",
-        "  <td>\n",
-        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/tutorials/text/word2vec.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
-        "  </td>\n",
-        "</table>"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "haJUNjSB60Kh"
-      },
-      "source": [
-        "# Word2Vec"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "99d4ky2lWFvn"
-      },
-      "source": [
-        "Word2Vec is not a singular algorithm, rather, it is a family of model architectures and optimizations that can be used to learn word embeddings from large datasets. Embeddings learned through Word2Vec have proven to be successful on a variety of downstream natural language processing tasks.\n",
-        "\n",
-        "Note: This tutorial is based on [Efficient Estimation of Word Representations in Vector Space](https://arxiv.org/pdf/1301.3781.pdf) and\n",
-        "[Distributed\n",
-        "Representations of Words and Phrases and their Compositionality](https://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf). It is not an exact implementation of the papers. Rather, it is intended to illustrate the key ideas.\n",
-        "\n",
-        "These papers proposed two methods for learning representations of words: \n",
-        "\n",
-        "*   **Continuous Bag-of-Words Model** which predicts the middle word based on surrounding context words. The context consists of a few words before and after the current (middle) word. This architecture is called a bag-of-words model as the order of words in the context is not important.\n",
-        "*   **Continuous Skip-gram Model** which predict words within a certain range before and after the current word in the same sentence. A worked example of this is given below.\n",
-        "\n",
-        "\n",
-        "You'll use the skip-gram approach in this tutorial. First, you'll explore skip-grams and other concepts using a single sentence for illustration. Next, you'll train your own Word2Vec model on a small dataset. This tutorial also contains code to export the trained embeddings and visualize them in the [TensorFlow Embedding Projector](http://projector.tensorflow.org/).\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "xP00WlaMWBZC"
-      },
-      "source": [
-        "## Skip-gram and Negative Sampling "
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Zr2wjv0bW236"
-      },
-      "source": [
-        "While a bag-of-words model predicts a word given the neighboring context, a skip-gram model predicts the context (or neighbors) of a word, given the word itself. The model is trained on skip-grams, which are n-grams that allow tokens to be skipped (see the diagram below for an example). The context of a word can be represented through a set of skip-gram pairs of `(target_word, context_word)` where `context_word` appears in the neighboring context of `target_word`. "
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ICjc-McbaVTd"
-      },
-      "source": [
-        "Consider the following sentence of 8 words.\n",
-        "> The wide road shimmered in the hot sun. \n",
-        "\n",
-        "The context words for each of the 8 words of this sentence are defined by a window size. The window size determines the span of words on either side of a `target_word` that can be considered `context word`. Take a look at this table of skip-grams for target words based on different window sizes."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "YKE87IKT_YT8"
-      },
-      "source": [
-        "Note: For this tutorial, a window size of *n* implies n words on each side with a total window span of 2*n+1 words across a word."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "RsCwQ07E8mqU"
-      },
-      "source": [
-        "![word2vec_skipgrams](https://tensorflow.org/tutorials/text/images/word2vec_skipgram.png)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "gK1gN1jwkMpU"
-      },
-      "source": [
-        "The training objective of the skip-gram model is to maximize the probability of predicting context words given the target word. For a sequence of words *w<sub>1</sub>, w<sub>2</sub>, ... w<sub>T</sub>*, the objective can be written as the average log probability"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "pILO_iAc84e-"
-      },
-      "source": [
-        "![word2vec_skipgram_objective](https://tensorflow.org/tutorials/text/images/word2vec_skipgram_objective.png)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Gsy6TUbtnz_K"
-      },
-      "source": [
-        "where `c` is the size of the training context. The basic skip-gram formulation defines this probability using the softmax function."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "P81Qavbb9APd"
-      },
-      "source": [
-        "![word2vec_full_softmax](https://tensorflow.org/tutorials/text/images/word2vec_full_softmax.png)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "axZvd-hhotVB"
-      },
-      "source": [
-        "where *v* and *v<sup>'<sup>* are target and context vector representations of words and *W* is vocabulary size. "
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "SoLzxbqSpT6_"
-      },
-      "source": [
-        "Computing the denominator of this formulation involves performing a full softmax over the entire vocabulary words which is often large (10<sup>5</sup>-10<sup>7</sup>) terms. "
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Y5VWYtmFzHkU"
-      },
-      "source": [
-        "The [Noise Contrastive Estimation](https://www.tensorflow.org/api_docs/python/tf/nn/nce_loss) loss function is an efficient approximation for a full softmax. With an objective to learn word embeddings instead of modelling the word distribution, NCE loss can be [simplified](https://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf) to use negative sampling. "
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "WTZBPf1RsOsg"
-      },
-      "source": [
-        "The simplified negative sampling objective for a target word is to distinguish  the context word from *num_ns* negative samples drawn from noise distribution *P<sub>n</sub>(w)* of words. More precisely, an efficient approximation of full softmax over the vocabulary is, for a skip-gram pair, to pose the loss for a target word as a classification problem between the context word and *num_ns* negative samples. "
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Cl0rSfHjt6Mf"
-      },
-      "source": [
-        "A negative sample is defined as a (target_word, context_word) pair such that the context_word does not appear in the `window_size` neighborhood of the target_word. For the example sentence, these are few potential negative samples (when `window_size` is 2).\n",
-        "\n",
-        "```\n",
-        "(hot, shimmered)\n",
-        "(wide, hot)\n",
-        "(wide, sun)\n",
-        "```"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "kq0q2uqbucFg"
-      },
-      "source": [
-        "In the next section, you'll generate skip-grams and negative samples for a single sentence. You'll also learn about subsampling techniques and train a classification model for positive and negative training examples later in the tutorial."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "mk4-Hpe1CH16"
-      },
-      "source": [
-        "## Setup"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "RutaI-Tpev3T"
-      },
-      "outputs": [],
-      "source": [
-        "import io\n",
-        "import re\n",
-        "import string\n",
-        "import tqdm\n",
-        "\n",
-        "import numpy as np\n",
-        "\n",
-        "import tensorflow as tf\n",
-        "from tensorflow.keras import layers"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "10pyUMFkGKVQ"
-      },
-      "outputs": [],
-      "source": [
-        "# Load the TensorBoard notebook extension\n",
-        "%load_ext tensorboard"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "XkJ5299Tek6B"
-      },
-      "outputs": [],
-      "source": [
-        "SEED = 42\n",
-        "AUTOTUNE = tf.data.AUTOTUNE"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "RW-g5buCHwh3"
-      },
-      "source": [
-        "### Vectorize an example sentence"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "y8TfZIgoQrcP"
-      },
-      "source": [
-        "Consider the following sentence:    \n",
-        "`The wide road shimmered in the hot sun.`\n",
-        "\n",
-        "Tokenize the sentence:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "bsl7jBzV6_KK"
-      },
-      "outputs": [],
-      "source": [
-        "sentence = \"The wide road shimmered in the hot sun\"\n",
-        "tokens = list(sentence.lower().split())\n",
-        "print(len(tokens))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "PU-bs1XtThEw"
-      },
-      "source": [
-        "Create a vocabulary to save mappings from tokens to integer indices."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "UdYv1HJUQ8XA"
-      },
-      "outputs": [],
-      "source": [
-        "vocab, index = {}, 1  # start indexing from 1\n",
-        "vocab['<pad>'] = 0  # add a padding token\n",
-        "for token in tokens:\n",
-        "  if token not in vocab:\n",
-        "    vocab[token] = index\n",
-        "    index += 1\n",
-        "vocab_size = len(vocab)\n",
-        "print(vocab)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ZpuP43Dddasr"
-      },
-      "source": [
-        "Create an inverse vocabulary to save mappings from integer indices to tokens."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "o9ULAJYtEvKl"
-      },
-      "outputs": [],
-      "source": [
-        "inverse_vocab = {index: token for token, index in vocab.items()}\n",
-        "print(inverse_vocab)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "n3qtuyxIRyii"
-      },
-      "source": [
-        "Vectorize your sentence.\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "CsB3-9uQQYyl"
-      },
-      "outputs": [],
-      "source": [
-        "example_sequence = [vocab[word] for word in tokens]\n",
-        "print(example_sequence)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ox1I28JRIOdM"
-      },
-      "source": [
-        "### Generate skip-grams from one sentence"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "t7NNKAmSiHvy"
-      },
-      "source": [
-        "The `tf.keras.preprocessing.sequence` module provides useful functions that simplify data preparation for Word2Vec. You can use the `tf.keras.preprocessing.sequence.skipgrams` to generate skip-gram pairs from the `example_sequence` with a given `window_size` from tokens in the range `[0, vocab_size)`.\n",
-        "\n",
-        "Note: `negative_samples` is set to `0` here as batching negative samples generated by this function requires a bit of code. You will use another function to perform negative sampling in the next section.\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "USAJxW4RD7pn"
-      },
-      "outputs": [],
-      "source": [
-        "window_size = 2\n",
-        "positive_skip_grams, _ = tf.keras.preprocessing.sequence.skipgrams(\n",
-        "      example_sequence,\n",
-        "      vocabulary_size=vocab_size,\n",
-        "      window_size=window_size,\n",
-        "      negative_samples=0)\n",
-        "print(len(positive_skip_grams))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "uc9uhiMwY-AQ"
-      },
-      "source": [
-        "Take a look at few positive skip-grams."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "SCnqEukIE9pt"
-      },
-      "outputs": [],
-      "source": [
-        "for target, context in positive_skip_grams[:5]:\n",
-        "  print(f\"({target}, {context}): ({inverse_vocab[target]}, {inverse_vocab[context]})\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "_ua9PkMTISF0"
-      },
-      "source": [
-        "### Negative sampling for one skip-gram "
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Esqn8WBfZnEK"
-      },
-      "source": [
-        "The `skipgrams` function returns all positive skip-gram pairs by sliding over a given window span. To produce additional skip-gram pairs that would serve as negative samples for training, you need to sample random words from the vocabulary. Use the `tf.random.log_uniform_candidate_sampler` function to sample `num_ns` number of negative samples for a given target word in a window. You can call the function on one skip-grams's target word and pass the context word as true class to exclude it from being sampled.\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "AgH3aSvw3xTD"
-      },
-      "source": [
-        "Key point: *num_ns* (number of negative samples per positive context word) between [5, 20] is [shown to work](https://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf) best for smaller datasets, while *num_ns* between [2,5] suffices for larger datasets. "
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "m_LmdzqIGr5L"
-      },
-      "outputs": [],
-      "source": [
-        "# Get target and context words for one positive skip-gram.\n",
-        "target_word, context_word = positive_skip_grams[0]\n",
-        "\n",
-        "# Set the number of negative samples per positive context.\n",
-        "num_ns = 4\n",
-        "\n",
-        "context_class = tf.reshape(tf.constant(context_word, dtype=\"int64\"), (1, 1))\n",
-        "negative_sampling_candidates, _, _ = tf.random.log_uniform_candidate_sampler(\n",
-        "    true_classes=context_class,  # class that should be sampled as 'positive'\n",
-        "    num_true=1,  # each positive skip-gram has 1 positive context class\n",
-        "    num_sampled=num_ns,  # number of negative context words to sample\n",
-        "    unique=True,  # all the negative samples should be unique\n",
-        "    range_max=vocab_size,  # pick index of the samples from [0, vocab_size]\n",
-        "    seed=SEED,  # seed for reproducibility\n",
-        "    name=\"negative_sampling\"  # name of this operation\n",
-        ")\n",
-        "print(negative_sampling_candidates)\n",
-        "print([inverse_vocab[index.numpy()] for index in negative_sampling_candidates])"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "8MSxWCrLIalp"
-      },
-      "source": [
-        "### Construct one training example"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Q6uEWdj8vKKv"
-      },
-      "source": [
-        "For a given positive `(target_word, context_word)` skip-gram, you now also have `num_ns` negative sampled context words that do not appear in the window size neighborhood of `target_word`. Batch the `1` positive `context_word` and `num_ns` negative context words into one tensor. This produces a set of positive skip-grams (labelled as `1`) and negative samples (labelled as `0`) for each target word."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "zSiZwifuLvHf"
-      },
-      "outputs": [],
-      "source": [
-        "# Add a dimension so you can use concatenation (on the next step).\n",
-        "negative_sampling_candidates = tf.expand_dims(negative_sampling_candidates, 1)\n",
-        "\n",
-        "# Concat positive context word with negative sampled words.\n",
-        "context = tf.concat([context_class, negative_sampling_candidates], 0)\n",
-        "\n",
-        "# Label first context word as 1 (positive) followed by num_ns 0s (negative).\n",
-        "label = tf.constant([1] + [0]*num_ns, dtype=\"int64\")\n",
-        "\n",
-        "# Reshape target to shape (1,) and context and label to (num_ns+1,).\n",
-        "target = tf.squeeze(target_word)\n",
-        "context = tf.squeeze(context)\n",
-        "label = tf.squeeze(label)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "OIJeoFCAwtXJ"
-      },
-      "source": [
-        "Take a look at the context and the corresponding labels for the target word from the skip-gram example above. "
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "tzyCPCuZwmdL"
-      },
-      "outputs": [],
-      "source": [
-        "print(f\"target_index    : {target}\")\n",
-        "print(f\"target_word     : {inverse_vocab[target_word]}\")\n",
-        "print(f\"context_indices : {context}\")\n",
-        "print(f\"context_words   : {[inverse_vocab[c.numpy()] for c in context]}\")\n",
-        "print(f\"label           : {label}\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "gBtTcUVQr8EO"
-      },
-      "source": [
-        "A tuple of `(target, context, label)` tensors constitutes one training example for training your skip-gram negative sampling Word2Vec model. Notice that the target is of shape `(1,)` while the context and label are of shape `(1+num_ns,)`"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "x-FwkR8jx9-Z"
-      },
-      "outputs": [],
-      "source": [
-        "print(\"target  :\", target)\n",
-        "print(\"context :\", context)\n",
-        "print(\"label   :\", label)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "4bRJIlow4Dlv"
-      },
-      "source": [
-        "### Summary"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "pWkuha0oykG5"
-      },
-      "source": [
-        "This picture summarizes the procedure of generating training example from a sentence. \n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "_KlwdiAa9crJ"
-      },
-      "source": [
-        "![word2vec_negative_sampling](https://tensorflow.org/tutorials/text/images/word2vec_negative_sampling.png)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "9wmdO_MEIpaM"
-      },
-      "source": [
-        "## Compile all steps into one function\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "iLKwNAczHsKg"
-      },
-      "source": [
-        "### Skip-gram Sampling table "
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "TUUK3uDtFNFE"
-      },
-      "source": [
-        "A large dataset means larger vocabulary with higher number of more frequent words such as stopwords. Training examples obtained from sampling commonly occurring words (such as `the`, `is`, `on`) don't add much useful information  for the model to learn from. [Mikolov et al.](https://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf) suggest subsampling of frequent words as a helpful practice to improve embedding quality. "
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "bPtbv7zNP7Dx"
-      },
-      "source": [
-        "The `tf.keras.preprocessing.sequence.skipgrams` function accepts a sampling table argument to encode probabilities of sampling any token. You can use the `tf.keras.preprocessing.sequence.make_sampling_table` to  generate a word-frequency rank based probabilistic sampling table and pass it to `skipgrams` function. Take a look at the sampling probabilities for a `vocab_size` of 10."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Rn9zAnDccyRg"
-      },
-      "outputs": [],
-      "source": [
-        "sampling_table = tf.keras.preprocessing.sequence.make_sampling_table(size=10)\n",
-        "print(sampling_table)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "EHvSptcPk5fp"
-      },
-      "source": [
-        "`sampling_table[i]` denotes the probability of sampling the i-th most common word in a dataset. The function assumes a [Zipf's distribution](https://en.wikipedia.org/wiki/Zipf%27s_law) of the word frequencies for sampling."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "mRHMssMmHgH-"
-      },
-      "source": [
-        "Key point: The `tf.random.log_uniform_candidate_sampler` already assumes that the vocabulary frequency follows a log-uniform (Zipf's) distribution. Using these distribution weighted sampling also helps approximate the Noise Contrastive Estimation (NCE) loss with simpler loss functions for training a negative sampling objective."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "aj--8RFK6fgW"
-      },
-      "source": [
-        "### Generate training data"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "dy5hl4lQ0B2M"
-      },
-      "source": [
-        "Compile all the steps described above into a function that can be called on a list of vectorized sentences obtained from any text dataset. Notice that the sampling table is built before sampling skip-gram word pairs. You will use this function in the later sections."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "63INISDEX1Hu"
-      },
-      "outputs": [],
-      "source": [
-        "# Generates skip-gram pairs with negative sampling for a list of sequences\n",
-        "# (int-encoded sentences) based on window size, number of negative samples\n",
-        "# and vocabulary size.\n",
-        "def generate_training_data(sequences, window_size, num_ns, vocab_size, seed):\n",
-        "  # Elements of each training example are appended to these lists.\n",
-        "  targets, contexts, labels = [], [], []\n",
-        "\n",
-        "  # Build the sampling table for vocab_size tokens.\n",
-        "  sampling_table = tf.keras.preprocessing.sequence.make_sampling_table(vocab_size)\n",
-        "\n",
-        "  # Iterate over all sequences (sentences) in dataset.\n",
-        "  for sequence in tqdm.tqdm(sequences):\n",
-        "\n",
-        "    # Generate positive skip-gram pairs for a sequence (sentence).\n",
-        "    positive_skip_grams, _ = tf.keras.preprocessing.sequence.skipgrams(\n",
-        "          sequence,\n",
-        "          vocabulary_size=vocab_size,\n",
-        "          sampling_table=sampling_table,\n",
-        "          window_size=window_size,\n",
-        "          negative_samples=0)\n",
-        "\n",
-        "    # Iterate over each positive skip-gram pair to produce training examples\n",
-        "    # with positive context word and negative samples.\n",
-        "    for target_word, context_word in positive_skip_grams:\n",
-        "      context_class = tf.expand_dims(\n",
-        "          tf.constant([context_word], dtype=\"int64\"), 1)\n",
-        "      negative_sampling_candidates, _, _ = tf.random.log_uniform_candidate_sampler(\n",
-        "          true_classes=context_class,\n",
-        "          num_true=1,\n",
-        "          num_sampled=num_ns,\n",
-        "          unique=True,\n",
-        "          range_max=vocab_size,\n",
-        "          seed=SEED,\n",
-        "          name=\"negative_sampling\")\n",
-        "\n",
-        "      # Build context and label vectors (for one target word)\n",
-        "      negative_sampling_candidates = tf.expand_dims(\n",
-        "          negative_sampling_candidates, 1)\n",
-        "\n",
-        "      context = tf.concat([context_class, negative_sampling_candidates], 0)\n",
-        "      label = tf.constant([1] + [0]*num_ns, dtype=\"int64\")\n",
-        "\n",
-        "      # Append each element from the training example to global lists.\n",
-        "      targets.append(target_word)\n",
-        "      contexts.append(context)\n",
-        "      labels.append(label)\n",
-        "\n",
-        "  return targets, contexts, labels"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "shvPC8Ji2cMK"
-      },
-      "source": [
-        "## Prepare training data for Word2Vec"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "j5mbZsZu6uKg"
-      },
-      "source": [
-        "With an understanding of how to work with one sentence for a skip-gram negative sampling based Word2Vec model, you can proceed to generate training examples from a larger list of sentences!"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "OFlikI6L26nh"
-      },
-      "source": [
-        "### Download text corpus\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "rEFavOgN98al"
-      },
-      "source": [
-        "You will use a text file of Shakespeare's writing for this tutorial. Change the following line to run this code on your own data."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "QFkitxzVVaAi"
-      },
-      "outputs": [],
-      "source": [
-        "path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "sOsbLq8a37dr"
-      },
-      "source": [
-        "Read text from the file and take a look at the first few lines. "
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "lfgnsUw3ofMD"
-      },
-      "outputs": [],
-      "source": [
-        "with open(path_to_file) as f: \n",
-        "  lines = f.read().splitlines()\n",
-        "for line in lines[:20]:\n",
-        "  print(line)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "gTNZYqUs5C2V"
-      },
-      "source": [
-        "Use the non empty lines to construct a `tf.data.TextLineDataset` object for next steps."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "ViDrwy-HjAs9"
-      },
-      "outputs": [],
-      "source": [
-        "text_ds = tf.data.TextLineDataset(path_to_file).filter(lambda x: tf.cast(tf.strings.length(x), bool))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "vfsc88zE9upk"
-      },
-      "source": [
-        "### Vectorize sentences from the corpus"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "XfgZo8zR94KK"
-      },
-      "source": [
-        "You can use the `TextVectorization` layer to vectorize sentences from the corpus. Learn more about using this layer in this [Text Classification](https://www.tensorflow.org/tutorials/keras/text_classification) tutorial. Notice from the first few sentences above that the text needs to be in one case and punctuation needs to be removed. To do this, define a `custom_standardization function` that can be used in the TextVectorization layer."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "2MlsXzo-ZlfK"
-      },
-      "outputs": [],
-      "source": [
-        "# Now, create a custom standardization function to lowercase the text and\n",
-        "# remove punctuation.\n",
-        "def custom_standardization(input_data):\n",
-        "  lowercase = tf.strings.lower(input_data)\n",
-        "  return tf.strings.regex_replace(lowercase,\n",
-        "                                  '[%s]' % re.escape(string.punctuation), '')\n",
-        "\n",
-        "\n",
-        "# Define the vocabulary size and number of words in a sequence.\n",
-        "vocab_size = 4096\n",
-        "sequence_length = 10\n",
-        "\n",
-        "# Use the TextVectorization layer to normalize, split, and map strings to\n",
-        "# integers. Set output_sequence_length length to pad all samples to same length.\n",
-        "vectorize_layer = layers.TextVectorization(\n",
-        "    standardize=custom_standardization,\n",
-        "    max_tokens=vocab_size,\n",
-        "    output_mode='int',\n",
-        "    output_sequence_length=sequence_length)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "g92LuvnyBmz1"
-      },
-      "source": [
-        "Call `adapt` on the text dataset to create vocabulary.\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "seZau_iYMPFT"
-      },
-      "outputs": [],
-      "source": [
-        "vectorize_layer.adapt(text_ds.batch(1024))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "jg2z7eeHMnH-"
-      },
-      "source": [
-        "Once the state of the layer has been adapted to represent the text corpus, the vocabulary can be accessed with `get_vocabulary()`. This function returns a list of all vocabulary tokens sorted (descending) by their frequency. "
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "jgw9pTA7MRaU"
-      },
-      "outputs": [],
-      "source": [
-        "# Save the created vocabulary for reference.\n",
-        "inverse_vocab = vectorize_layer.get_vocabulary()\n",
-        "print(inverse_vocab[:20])"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "DOQ30Tx6KA2G"
-      },
-      "source": [
-        "The vectorize_layer can now be used to generate vectors for each element in the `text_ds`."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "yUVYrDp0araQ"
-      },
-      "outputs": [],
-      "source": [
-        "# Vectorize the data in text_ds.\n",
-        "text_vector_ds = text_ds.batch(1024).prefetch(AUTOTUNE).map(vectorize_layer).unbatch()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "7YyH_SYzB72p"
-      },
-      "source": [
-        "### Obtain sequences from the dataset"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "NFUQLX0_KaRC"
-      },
-      "source": [
-        "You now have a `tf.data.Dataset` of integer encoded sentences. To prepare the dataset for training a Word2Vec model, flatten the dataset into a list of sentence vector sequences. This step is required as you would iterate over each sentence in the dataset to produce positive and negative examples. \n",
-        "\n",
-        "Note: Since the `generate_training_data()` defined earlier uses non-TF python/numpy functions, you could also use a `tf.py_function` or `tf.numpy_function` with `tf.data.Dataset.map()`."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "sGXoOh9y11pM"
-      },
-      "outputs": [],
-      "source": [
-        "sequences = list(text_vector_ds.as_numpy_iterator())\n",
-        "print(len(sequences))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "tDc4riukLTqg"
-      },
-      "source": [
-        "Take a look at few examples from `sequences`.\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "WZf1RIbB2Dfb"
-      },
-      "outputs": [],
-      "source": [
-        "for seq in sequences[:5]:\n",
-        "  print(f\"{seq} => {[inverse_vocab[i] for i in seq]}\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "yDzSOjNwCWNh"
-      },
-      "source": [
-        "### Generate training examples from sequences"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "BehvYr-nEKyY"
-      },
-      "source": [
-        "`sequences` is now a list of int encoded sentences. Just call the `generate_training_data()` function defined earlier to generate training examples for the Word2Vec model. To recap, the function iterates over each word from each sequence to collect positive and negative context words. Length of target, contexts and labels should be same, representing the total number of training examples."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "44DJ22M6nX5o"
-      },
-      "outputs": [],
-      "source": [
-        "targets, contexts, labels = generate_training_data(\n",
-        "    sequences=sequences,\n",
-        "    window_size=2,\n",
-        "    num_ns=4,\n",
-        "    vocab_size=vocab_size,\n",
-        "    seed=SEED)\n",
-        "\n",
-        "targets = np.array(targets)\n",
-        "contexts = np.array(contexts)[:,:,0]\n",
-        "labels = np.array(labels)\n",
-        "\n",
-        "print('\\n')\n",
-        "print(f\"targets.shape: {targets.shape}\")\n",
-        "print(f\"contexts.shape: {contexts.shape}\")\n",
-        "print(f\"labels.shape: {labels.shape}\")\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "97PqsusOFEpc"
-      },
-      "source": [
-        "### Configure the dataset for performance"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "7jnFVySViQTj"
-      },
-      "source": [
-        "To perform efficient batching for the potentially large number of training examples, use the `tf.data.Dataset` API. After this step, you would have a `tf.data.Dataset` object of `(target_word, context_word), (label)` elements to train your Word2Vec model!"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "nbu8PxPSnVY2"
-      },
-      "outputs": [],
-      "source": [
-        "BATCH_SIZE = 1024\n",
-        "BUFFER_SIZE = 10000\n",
-        "dataset = tf.data.Dataset.from_tensor_slices(((targets, contexts), labels))\n",
-        "dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)\n",
-        "print(dataset)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "tyrNX6Fs6K3F"
-      },
-      "source": [
-        "Add `cache()` and `prefetch()` to improve performance."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Y5Ueg6bcFPVL"
-      },
-      "outputs": [],
-      "source": [
-        "dataset = dataset.cache().prefetch(buffer_size=AUTOTUNE)\n",
-        "print(dataset)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "1S-CmUMszyEf"
-      },
-      "source": [
-        "## Model and Training"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "sQFqaBMPwBqC"
-      },
-      "source": [
-        "The Word2Vec model can be implemented as a classifier to distinguish between true context words from skip-grams and false context words obtained through negative sampling. You can perform a dot product between the embeddings of target and context words to obtain predictions for labels and compute loss against true labels in the dataset."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "oc7kTbiwD9sy"
-      },
-      "source": [
-        "### Subclassed Word2Vec Model"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Jvr9pM1G1sQN"
-      },
-      "source": [
-        "Use the [Keras Subclassing API](https://www.tensorflow.org/guide/keras/custom_layers_and_models) to define your Word2Vec model with the following layers:\n",
-        "\n",
-        "\n",
-        "* `target_embedding`: A `tf.keras.layers.Embedding` layer which looks up the embedding of a word when it appears as a target word. The number of parameters in this layer are `(vocab_size * embedding_dim)`.\n",
-        "* `context_embedding`: Another `tf.keras.layers.Embedding` layer which looks up the embedding of a word when it appears as a context word. The number of parameters in this layer are the same as those in `target_embedding`, i.e. `(vocab_size * embedding_dim)`.\n",
-        "* `dots`: A `tf.keras.layers.Dot` layer that computes the dot product of target and context embeddings from a training pair.\n",
-        "* `flatten`: A `tf.keras.layers.Flatten` layer to flatten the results of `dots` layer into logits.\n",
-        "\n",
-        "With the subclassed model, you can define the `call()` function that accepts `(target, context)` pairs which can then be passed into their corresponding embedding layer. Reshape the `context_embedding` to perform a dot product with `target_embedding` and return the flattened result."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "KiAwuIqqw7-7"
-      },
-      "source": [
-        "Key point: The `target_embedding` and `context_embedding` layers can be shared as well. You could also use a concatenation of both embeddings as the final Word2Vec embedding."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "i9ec-sS6xd8Z"
-      },
-      "outputs": [],
-      "source": [
-        "class Word2Vec(tf.keras.Model):\n",
-        "  def __init__(self, vocab_size, embedding_dim):\n",
-        "    super(Word2Vec, self).__init__()\n",
-        "    self.target_embedding = layers.Embedding(vocab_size,\n",
-        "                                      embedding_dim,\n",
-        "                                      input_length=1,\n",
-        "                                      name=\"w2v_embedding\")\n",
-        "    self.context_embedding = layers.Embedding(vocab_size,\n",
-        "                                       embedding_dim,\n",
-        "                                       input_length=num_ns+1)\n",
-        "\n",
-        "  def call(self, pair):\n",
-        "    target, context = pair\n",
-        "    # target: (batch, dummy?)  # The dummy axis doesn't exist in TF2.7+\n",
-        "    # context: (batch, context)\n",
-        "    if len(target.shape) == 2:\n",
-        "      target = tf.squeeze(target, axis=1)\n",
-        "    # target: (batch,)\n",
-        "    word_emb = self.target_embedding(target)\n",
-        "    # word_emb: (batch, embed)\n",
-        "    context_emb = self.context_embedding(context)\n",
-        "    # context_emb: (batch, context, embed)\n",
-        "    dots = tf.einsum('be,bce->bc', word_emb, context_emb)\n",
-        "    # dots: (batch, context)\n",
-        "    return dots"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "-RLKz9LFECXu"
-      },
-      "source": [
-        "### Define loss function and compile model\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "I3Md-9QanqBM"
-      },
-      "source": [
-        "For simplicity, you can use `tf.keras.losses.CategoricalCrossEntropy` as an alternative to the negative sampling loss. If you would like to write your own custom loss function, you can also do so as follows:\n",
-        "\n",
-        "``` python\n",
-        "def custom_loss(x_logit, y_true):\n",
-        "      return tf.nn.sigmoid_cross_entropy_with_logits(logits=x_logit, labels=y_true)\n",
-        "```\n",
-        "\n",
-        "It's time to build your model! Instantiate your Word2Vec class with an embedding dimension of 128 (you could experiment with different values). Compile the model with the `tf.keras.optimizers.Adam` optimizer. "
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "ekQg_KbWnnmQ"
-      },
-      "outputs": [],
-      "source": [
-        "embedding_dim = 128\n",
-        "word2vec = Word2Vec(vocab_size, embedding_dim)\n",
-        "word2vec.compile(optimizer='adam',\n",
-        "                 loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),\n",
-        "                 metrics=['accuracy'])"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "P3MUMrluqNX2"
-      },
-      "source": [
-        "Also define a callback to log training statistics for tensorboard."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "9d-ftBCeEZIR"
-      },
-      "outputs": [],
-      "source": [
-        "tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=\"logs\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "h5wEBotlGZ7B"
-      },
-      "source": [
-        "Train the model with `dataset` prepared above for some number of epochs."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "gmC1BJalEZIY"
-      },
-      "outputs": [],
-      "source": [
-        "word2vec.fit(dataset, epochs=20, callbacks=[tensorboard_callback])"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "wze38jG57XvZ"
-      },
-      "source": [
-        "Tensorboard now shows the Word2Vec model's accuracy and loss."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "22E9eqS55rgz"
-      },
-      "outputs": [],
-      "source": [
-        "#docs_infra: no_execute\n",
-        "%tensorboard --logdir logs"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "awF3iRQCZOLj"
-      },
-      "source": [
-        "<!-- <img class=\"tfo-display-only-on-site\" src=\"images/word2vec_tensorboard.png\"/> -->"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "TaDW2tIIz8fL"
-      },
-      "source": [
-        "## Embedding lookup and analysis"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Zp5rv01WG2YA"
-      },
-      "source": [
-        "Obtain the weights from the model using `get_layer()` and `get_weights()`. The `get_vocabulary()` function provides the vocabulary to build a metadata file with one token per line. "
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "_Uamp1YH8RzU"
-      },
-      "outputs": [],
-      "source": [
-        "weights = word2vec.get_layer('w2v_embedding').get_weights()[0]\n",
-        "vocab = vectorize_layer.get_vocabulary()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "gWzdmUzS8Sl4"
-      },
-      "source": [
-        "Create and save the vectors and metadata file. "
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "VLIahl9s53XT"
-      },
-      "outputs": [],
-      "source": [
-        "out_v = io.open('vectors.tsv', 'w', encoding='utf-8')\n",
-        "out_m = io.open('metadata.tsv', 'w', encoding='utf-8')\n",
-        "\n",
-        "for index, word in enumerate(vocab):\n",
-        "  if index == 0:\n",
-        "    continue  # skip 0, it's padding.\n",
-        "  vec = weights[index]\n",
-        "  out_v.write('\\t'.join([str(x) for x in vec]) + \"\\n\")\n",
-        "  out_m.write(word + \"\\n\")\n",
-        "out_v.close()\n",
-        "out_m.close()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "1T8KcThhIU8-"
-      },
-      "source": [
-        "Download the `vectors.tsv` and `metadata.tsv` to analyze the obtained embeddings in the [Embedding Projector](https://projector.tensorflow.org/)."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "lUsjQOKMIV2z"
-      },
-      "outputs": [],
-      "source": [
-        "try:\n",
-        "  from google.colab import files\n",
-        "  files.download('vectors.tsv')\n",
-        "  files.download('metadata.tsv')\n",
-        "except Exception:\n",
-        "  pass"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "iS_uMeMw3Xpj"
-      },
-      "source": [
-        "## Next steps\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "BSgAZpwF5xF_"
-      },
-      "source": [
-        "This tutorial has shown you how to implement a skip-gram Word2Vec model with negative sampling from scratch and visualize the obtained word embeddings.\n",
-        "\n",
-        "* To learn more about word vectors and their mathematical representations, refer to these [notes](https://web.stanford.edu/class/cs224n/readings/cs224n-2019-notes01-wordvecs1.pdf).\n",
-        "\n",
-        "* To learn more about advanced text processing, read the [Transformer model for language understanding](https://www.tensorflow.org/tutorials/text/transformer) tutorial.\n",
-        "\n",
-        "* If you’re interested in pre-trained embedding models, you may also be interested in [Exploring the TF-Hub CORD-19 Swivel Embeddings](https://www.tensorflow.org/hub/tutorials/cord_19_embeddings_keras), or the [Multilingual Universal Sentence Encoder](https://www.tensorflow.org/hub/tutorials/cross_lingual_similarity_with_tf_hub_multilingual_universal_encoder)\n",
-        "\n",
-        "* You may also like to train the model on a new dataset (there are many available in [TensorFlow Datasets](https://www.tensorflow.org/datasets)).\n"
-      ]
-    }
-  ],
-  "metadata": {
-    "colab": {
-      "collapsed_sections": [],
-      "name": "word2vec.ipynb",
-      "toc_visible": true
-    },
-    "kernelspec": {
-      "display_name": "Python 3",
-      "name": "python3"
-    }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 0
-}
diff --git a/site/en/tutorials/understanding/sngp.ipynb b/site/en/tutorials/understanding/sngp.ipynb
index e694d48a03b..a5be4cbd54c 100644
--- a/site/en/tutorials/understanding/sngp.ipynb
+++ b/site/en/tutorials/understanding/sngp.ipynb
@@ -68,13 +68,13 @@
         "id": "UvW1QEMtP7Gy"
       },
       "source": [
-        "In AI applications that are safety-critical (e.g., medical decision making and autonomous driving) or where the data is inherently noisy (e.g., natural language understanding), it is important for a deep classifier to reliably quantify its uncertainty. The deep classifier should be able to be aware of its own limitations and when it should hand control over to the human experts. This tutorial shows how to improve a deep classifier's ability in quantifying uncertainty using a technique called **Spectral-normalized Neural Gaussian Process ([SNGP](https://arxiv.org/abs/2006.10108))**.\n",
+        "In AI applications that are safety-critical, such as medical decision making and autonomous driving, or where the data is inherently noisy (for example, natural language understanding), it is important for a deep classifier to reliably quantify its uncertainty. The deep classifier should be able to be aware of its own limitations and when it should hand control over to the human experts. This tutorial shows how to improve a deep classifier's ability in quantifying uncertainty using a technique called **Spectral-normalized Neural Gaussian Process ([SNGP](https://arxiv.org/abs/2006.10108){.external})**.\n",
         "\n",
-        "The core idea of SNGP is to improve a deep classifier's _**distance awareness**_ by applying simple modifications to the network. A model's _distance awareness_ is a measure of how its predictive probability reflects the distance between the test example and the training data. This is a desirable property that is common for gold-standard probablistic models (e.g., the [Gaussian process](https://en.wikipedia.org/wiki/Gaussian_process) with RBF kernels) but is lacking in models with deep neural networks. SNGP provides a simple way to inject this Gaussian-process behavior into a deep classifier while maintaining its predictive accuracy.\n",
+        "The core idea of SNGP is to improve a deep classifier's _**distance awareness**_ by applying simple modifications to the network. A model's _distance awareness_ is a measure of how its predictive probability reflects the distance between the test example and the training data. This is a desirable property that is common for gold-standard probabilistic models (for example, the [Gaussian process](https://en.wikipedia.org/wiki/Gaussian_process){.external} with RBF kernels) but is lacking in models with deep neural networks. SNGP provides a simple way to inject this Gaussian-process behavior into a deep classifier while maintaining its predictive accuracy.\n",
         "\n",
-        "This tutorial implements a deep residual network (ResNet)-based SNGP model on the [two moons](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_moons.html) dataset, and compares its uncertainty surface with that of two other popular uncertainty approaches - [Monte Carlo dropout](https://arxiv.org/abs/1506.02142) and [Deep ensemble](https://arxiv.org/abs/1612.01474)).\n",
+        "This tutorial implements a deep residual network (ResNet)-based SNGP model on [scikit-learn’s two moons](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_moons.html){.external} dataset, and compares its uncertainty surface with that of two other popular uncertainty approaches: [Monte Carlo dropout](https://arxiv.org/abs/1506.02142){.external} and [Deep ensemble](https://arxiv.org/abs/1612.01474){.external}.\n",
         "\n",
-        "This tutorial illustrates the SNGP model on a toy 2D dataset. For an example of applying SNGP to a real-world natural language understanding task using  BERT-base, please see the [SNGP-BERT tutorial](https://www.tensorflow.org/official_models/tutorials/uncertainty_quantification_with_sngp_bert). For high-quality implementations of SNGP model (and many other uncertainty methods) on a wide variety of benchmark datasets (e.g., [CIFAR-100](https://www.tensorflow.org/datasets/catalog/cifar100), [ImageNet](https://www.tensorflow.org/datasets/catalog/imagenet2012), [Jigsaw toxicity detection](https://www.tensorflow.org/datasets/catalog/wikipedia_toxicity_subtypes), etc), please check out the [Uncertainty Baselines](https://github.com/google/uncertainty-baselines) benchmark."
+        "This tutorial illustrates the SNGP model on a toy 2D dataset. For an example of applying SNGP to a real-world natural language understanding task using a BERT-base, check out the [SNGP-BERT tutorial](https://www.tensorflow.org/text/tutorials/uncertainty_quantification_with_sngp_bert). For high-quality implementations of an SNGP model (and many other uncertainty methods) on a wide variety of benchmark datasets (such as [CIFAR-100](https://www.tensorflow.org/datasets/catalog/cifar100), [ImageNet](https://www.tensorflow.org/datasets/catalog/imagenet2012), [Jigsaw toxicity detection](https://www.tensorflow.org/datasets/catalog/wikipedia_toxicity_subtypes), etc), refer to the [Uncertainty Baselines](https://github.com/google/uncertainty-baselines){.external} benchmark."
       ]
     },
     {
@@ -92,7 +92,7 @@
         "id": "ysyslHCyvYi-"
       },
       "source": [
-        "[Spectral-normalized Neural Gaussian Process](https://arxiv.org/abs/2006.10108)  (SNGP) is a simple approach to improve a deep classifier's uncertainty quality while maintaining a similar level of accuracy and latency. Given a deep residual network, SNGP makes two simple changes to the model:\n",
+        "SNGP is a simple approach to improve a deep classifier's uncertainty quality while maintaining a similar level of accuracy and latency. Given a deep residual network, SNGP makes two simple changes to the model:\n",
         "\n",
         "* It applies spectral normalization to the hidden residual layers.\n",
         "* It replaces the Dense output layer with a Gaussian process layer."
@@ -113,17 +113,17 @@
         "id": "2L88PoKr6XaE"
       },
       "source": [
-        "Compared to other uncertainty approaches (e.g., Monte Carlo dropout or Deep ensemble), SNGP has several advantages:\n",
+        "Compared to other uncertainty approaches (such as Monte Carlo dropout or Deep ensemble), SNGP has several advantages:\n",
         "\n",
-        "* It works for a wide range of state-of-the-art residual-based architectures (e.g., (Wide) ResNet, DenseNet, BERT, etc).\n",
-        "* It is a single-model method (i.e., does not rely on ensemble averaging). Therefore SNGP has a similar level of latency as a single deterministic network, and can be scaled easily to large datasets like [ImageNet](https://github.com/google/uncertainty-baselines/tree/main/baselines/imagenet) and [Jigsaw Toxic Comments classification](https://github.com/google/uncertainty-baselines/tree/main/baselines/toxic_comments).\n",
+        "* It works for a wide range of state-of-the-art residual-based architectures (for example, (Wide) ResNet, DenseNet, or BERT).\n",
+        "* It is a single-model method—it does not rely on ensemble averaging). Therefore, SNGP has a similar level of latency as a single deterministic network, and can be scaled easily to large datasets like [ImageNet](https://github.com/google/uncertainty-baselines/tree/main/baselines/imagenet){.external} and [Jigsaw Toxic Comments classification](https://github.com/google/uncertainty-baselines/tree/main/baselines/toxic_comments){.external}.\n",
         "* It has strong out-of-domain detection performance due to the _distance-awareness_ property.\n",
         "\n",
         "The downsides of this method are:\n",
         "\n",
-        "* The predictive uncertainty of a SNGP is computed using the [Laplace approximation](http://www.gaussianprocess.org/gpml/chapters/RW3.pdf). Therefore theoretically, the posterior uncertainty of SNGP is different from that of an exact Gaussian process.\n",
+        "* The predictive uncertainty of SNGP is computed using the [Laplace approximation](http://www.gaussianprocess.org/gpml/chapters/RW3.pdf){.external}. Therefore, theoretically, the posterior uncertainty of SNGP is different from that of an exact Gaussian process.\n",
         "\n",
-        "* SNGP training needs a covariance reset step at the begining of a new epoch. This can add a tiny amount of extra complexity to a training pipeline. This tutorial shows a simple way to implement this using Keras callbacks."
+        "* SNGP training needs a covariance reset step at the beginning of a new epoch. This can add a tiny amount of extra complexity to a training pipeline. This tutorial shows a simple way to implement this using Keras callbacks."
       ]
     },
     {
@@ -135,28 +135,6 @@
         "## Setup"
       ]
     },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "MOS9qFlU2o3J"
-      },
-      "outputs": [],
-      "source": [
-        "!pip uninstall -y tf-nightly keras-nightly"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "MOS9qFlV2o3J"
-      },
-      "outputs": [],
-      "source": [
-        "!pip install tensorflow"
-      ]
-    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -165,7 +143,7 @@
       },
       "outputs": [],
       "source": [
-        "!pip install --use-deprecated=legacy-resolver tf-models-official"
+        "!pip install -U -q --use-deprecated=legacy-resolver tf-models-official tensorflow"
       ]
     },
     {
@@ -242,7 +220,7 @@
         "id": "nqazrSzhd24R"
       },
       "source": [
-        "Create the trainining and evaluation datasets from the [two moon dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_moons.html)."
+        "Create the training and evaluation datasets from the [scikit-learn two moon dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_moons.html){.external}."
       ]
     },
     {
@@ -297,7 +275,7 @@
         "id": "G9BYe4yqfeFa"
       },
       "source": [
-        "To evaluate model uncertainty, add an out-of-domain (OOD) dataset that belongs to a third class. The model never sees these OOD examples during training."
+        "To evaluate model uncertainty, add an out-of-domain (OOD) dataset that belongs to a third class. The model never observes these OOD examples during training."
       ]
     },
     {
@@ -337,7 +315,7 @@
         "plt.scatter(neg_examples[:, 0], neg_examples[:, 1], c=\"#ff7f00\", alpha=0.5)\n",
         "plt.scatter(ood_examples[:, 0], ood_examples[:, 1], c=\"red\", alpha=0.1)\n",
         "\n",
-        "plt.legend([\"Postive\", \"Negative\", \"Out-of-Domain\"])\n",
+        "plt.legend([\"Positive\", \"Negative\", \"Out-of-Domain\"])\n",
         "\n",
         "plt.ylim(DEFAULT_Y_RANGE)\n",
         "plt.xlim(DEFAULT_X_RANGE)\n",
@@ -351,7 +329,7 @@
         "id": "nlzxsnBBkybB"
       },
       "source": [
-        "Here the blue and orange represent the positive and negative classes, and the red represents the OOD data. A model that quantifies the uncertainty well is expected to be confident when close to training data  (i.e., $p(x_{test})$ close to 0 or 1), and be uncertain when far away from the training data regions  (i.e., $p(x_{test})$ close to 0.5)."
+        "Here, the blue and orange represent the positive and negative classes, and the red represents the OOD data. A model that quantifies the uncertainty well is expected to be confident when close to training data  (i.e., $p(x_{test})$ close to 0 or 1), and be uncertain when far away from the training data regions  (i.e., $p(x_{test})$ close to 0.5)."
       ]
     },
     {
@@ -413,7 +391,7 @@
         "    # Projects the 2d input data to high dimension.\n",
         "    hidden = self.input_layer(inputs)\n",
         "\n",
-        "    # Computes the resnet hidden representations.\n",
+        "    # Computes the ResNet hidden representations.\n",
         "    for i in range(self.num_layers):\n",
         "      resid = self.dense_layers[i](hidden)\n",
         "      resid = tf.keras.layers.Dropout(self.dropout_rate)(resid)\n",
@@ -437,7 +415,7 @@
         "id": "4u870GAen2aO"
       },
       "source": [
-        "This tutorial uses a 6-layer ResNet with 128 hidden units."
+        "This tutorial uses a six-layer ResNet with 128 hidden units."
       ]
     },
     {
@@ -502,7 +480,7 @@
       "source": [
         "loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n",
         "metrics = tf.keras.metrics.SparseCategoricalAccuracy(),\n",
-        "optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)\n",
+        "optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=1e-4)\n",
         "\n",
         "train_config = dict(loss=loss, metrics=metrics, optimizer=optimizer)"
       ]
@@ -570,11 +548,11 @@
         "  Arguments:\n",
         "    test_uncertainty: Array of uncertainty scores, shape (num_test,).\n",
         "    ax: A matplotlib Axes object that specifies a matplotlib figure.\n",
-        "    cmap: A matplotlib colormap object specifying the palette of the \n",
+        "    cmap: A matplotlib colormap object specifying the palette of the\n",
         "      predictive surface.\n",
         "\n",
         "  Returns:\n",
-        "    pcm: A matplotlib PathCollection object that contains the palette \n",
+        "    pcm: A matplotlib PathCollection object that contains the palette\n",
         "      information of the uncertainty plot.\n",
         "  \"\"\"\n",
         "  # Normalize uncertainty for better visualization.\n",
@@ -586,13 +564,13 @@
         "\n",
         "  # Plot normalized uncertainty surface.\n",
         "  pcm = ax.imshow(\n",
-        "      np.reshape(test_uncertainty, [DEFAULT_N_GRID, DEFAULT_N_GRID]), \n",
+        "      np.reshape(test_uncertainty, [DEFAULT_N_GRID, DEFAULT_N_GRID]),\n",
         "      cmap=cmap,\n",
         "      origin=\"lower\",\n",
         "      extent=DEFAULT_X_RANGE + DEFAULT_Y_RANGE,\n",
         "      vmin=DEFAULT_NORM.vmin,\n",
         "      vmax=DEFAULT_NORM.vmax,\n",
-        "      interpolation='bicubic', \n",
+        "      interpolation='bicubic',\n",
         "      aspect='auto')\n",
         "\n",
         "  # Plot training data.\n",
@@ -609,7 +587,7 @@
         "id": "a1age2y0339T"
       },
       "source": [
-        "Now visualize the predictions of the deterministic model. First plot the class probability: \n",
+        "Now visualize the predictions of the deterministic model. First plot the class probability:\n",
         "$$p(x) = softmax(logit(x))$$"
       ]
     },
@@ -649,7 +627,7 @@
         "id": "7ShGAB7FNYgU"
       },
       "source": [
-        "In this plot, the yellow and purple are the predictive probabilities for the two classes. The deterministic model did a good job in classifying the two known classes (blue and orange) with a nonlinear decision boundary. However, it is not **distance-aware**, and classified the never-seen red out-of-domain (OOD) examples confidently as the orange class.\n",
+        "In this plot, the yellow and purple are the predictive probabilities for the two classes. The deterministic model did a good job in classifying the two known classes—blue and orange—with a nonlinear decision boundary. However, it is not **distance-aware**, and classified the never-observed red out-of-domain (OOD) examples confidently as the orange class.\n",
         "\n",
         "Visualize the model uncertainty by computing the [predictive variance](https://en.wikipedia.org/wiki/Bernoulli_distribution#Variance):\n",
         "$$var(x) = p(x) * (1 - p(x))$$"
@@ -735,7 +713,7 @@
         "id": "rp2O2iv8LSke"
       },
       "source": [
-        "Let's look at these two components in more detail. (You can also jump to the [The SNGP model](#full-sngp-model) section to see how the full model is implemented.)"
+        "Let's inspect these two components in more detail. (You can also jump to [the full SNGP model](#full-sngp-model) section to learn how SNGP is implemented.)"
       ]
     },
     {
@@ -744,7 +722,7 @@
         "id": "5n4NIt3QjKwl"
       },
       "source": [
-        "#### Spectral Normalization wrapper"
+        "#### `SpectralNormalization` wrapper"
       ]
     },
     {
@@ -753,7 +731,7 @@
         "id": "tE-Va7J2jR2X"
       },
       "source": [
-        "[`SpectralNormalization`](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/spectral_normalization.py) is a Keras layer wrapper. It can be applied to an existing Dense layer like this:"
+        "[`SpectralNormalization`](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/spectral_normalization.py){.external} is a Keras layer wrapper. It can be applied to an existing Dense layer like this:"
       ]
     },
     {
@@ -774,7 +752,7 @@
         "id": "E9q25_6fRJRh"
       },
       "source": [
-        "Spectral normalization regularizes the hidden weight $W$ by gradually guiding its spectral norm (i.e., the largest eigenvalue of $W$) toward the target value  `norm_multiplier`. \n"
+        "Spectral normalization regularizes the hidden weight $W$ by gradually guiding its spectral norm (that is, the largest eigenvalue of $W$) toward the target value `norm_multiplier`).\n"
       ]
     },
     {
@@ -801,11 +779,11 @@
         "id": "7rYfIgtrjHnB"
       },
       "source": [
-        "[`RandomFeatureGaussianProcess`](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/gaussian_process.py) implements a [random-feature based approximation](https://people.eecs.berkeley.edu/~brecht/papers/07.rah.rec.nips.pdf) to a Gaussian process model that is end-to-end trainable with a deep neural network. Under the hood, the Gaussian process layer implements a two-layer network:\n",
+        "[`RandomFeatureGaussianProcess`](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/gaussian_process.py){.external} implements a [random-feature based approximation](https://people.eecs.berkeley.edu/~brecht/papers/07.rah.rec.nips.pdf){.external} to a Gaussian process model that is end-to-end trainable with a deep neural network. Under the hood, the Gaussian process layer implements a two-layer network:\n",
         "\n",
         "$$logits(x) = \\Phi(x) \\beta, \\quad \\Phi(x)=\\sqrt{\\frac{2}{M}} * cos(Wx + b)$$\n",
         "\n",
-        "Here $x$ is the input, and $W$ and $b$ are frozen weights initialized randomly from Gaussian and uniform distributions, respectively. (Therefore $\\Phi(x)$ are called \"random features\".) $\\beta$ is the learnable kernel weight similar to that of a Dense layer. "
+        "Here, $x$ is the input, and $W$ and $b$ are frozen weights initialized randomly from Gaussian and Uniform distributions, respectively. (Therefore, $\\Phi(x)$ are called \"random features\".) $\\beta$ is the learnable kernel weight similar to that of a Dense layer. "
       ]
     },
     {
@@ -856,7 +834,7 @@
         "id": "xgzw09gS03ae"
       },
       "source": [
-        "Note: For a deep neural network that is sensitive to the learning rate (e.g., ResNet-50 and ResNet-110), it is generally recommended to set `normalize_input=True` to stablize training, and set `scale_random_features=False` to avoid the learning rate from being modified in unexpected ways when passing through the GP layer."
+        "Note: For a deep neural network that is sensitive to the learning rate (for example, ResNet-50 and ResNet-110), it is generally recommended to set `normalize_input=True` to stabilize training, and set `scale_random_features=False` to avoid the learning rate from being modified in unexpected ways when passing through the GP layer."
       ]
     },
     {
@@ -865,7 +843,7 @@
         "id": "pZkcKw-u7XRp"
       },
       "source": [
-        "* `gp_cov_momentum` controls how the model covariance is computed. If set to a positive value (e.g., 0.999), the covariance matrix is computed using the momentum-based moving average update (similar to batch normalization). If set to -1, the the covariance matrix is updated without momentum."
+        "* `gp_cov_momentum` controls how the model covariance is computed. If set to a positive value (for example, `0.999`), the covariance matrix is computed using the momentum-based moving average update (similar to batch normalization). If set to `-1`, the covariance matrix is updated without momentum."
       ]
     },
     {
@@ -874,7 +852,7 @@
         "id": "P13X7Adt-c2d"
       },
       "source": [
-        "Note: The momentum-based update method can be sensitive to batch size. Therefore it is generally recommended to set  `gp_cov_momentum=-1` to compute the covariance exactly. For this to work properly, the covariance matrix estimator needs to be reset at the begining of a new epoch in order to avoid counting the same data twice. For `RandomFeatureGaussianProcess`, this is can be done by calling its `reset_covariance_matrix()`. The next section shows an easy implementation of this using Keras' built-in API.\n"
+        "Note: The momentum-based update method can be sensitive to batch size. Therefore it is generally recommended to set  `gp_cov_momentum=-1` to compute the covariance exactly. For this to work properly, the covariance matrix estimator needs to be reset at the beginning of a new epoch in order to avoid counting the same data twice. For `RandomFeatureGaussianProcess`, this can be done by calling its `reset_covariance_matrix()`. The next section shows an easy implementation of this using Keras' built-in API.\n"
       ]
     },
     {
@@ -905,9 +883,9 @@
         "id": "ALBqcAtwDNiO"
       },
       "source": [
-        "Note: Notice that under this implementation of the SNGP model, the predictive logits $logit(x_{test})$ for all classes share the same covariance matrix $var(x_{test})$, which describes the distance between $x_{test}$ from the training data. \n",
+        "Note: Notice that under this implementation of the SNGP model, the predictive logits $logit(x_{test})$ for all classes share the same covariance matrix $var(x_{test})$, which describes the distance between $x_{test}$ from the training data.\n",
         "\n",
-        "Theoretically, it is possible to extend the algorithm to compute different variance values for different classes (as introduced in the [original SNGP paper](https://arxiv.org/abs/2006.10108)). However, this is difficult to scale to problems with large output spaces (e.g., ImageNet or language modeling)."
+        "Theoretically, it is possible to extend the algorithm to compute different variance values for different classes (as introduced in the [original SNGP paper](https://arxiv.org/abs/2006.10108){.external}). However, this is difficult to scale to problems with large output spaces (such as classification with ImageNet or language modeling)."
       ]
     },
     {
@@ -951,12 +929,12 @@
         "  def make_output_layer(self, num_classes):\n",
         "    \"\"\"Uses Gaussian process as the output layer.\"\"\"\n",
         "    return nlp_layers.RandomFeatureGaussianProcess(\n",
-        "        num_classes, \n",
+        "        num_classes,\n",
         "        gp_cov_momentum=-1,\n",
         "        **self.classifier_kwargs)\n",
         "\n",
         "  def call(self, inputs, training=False, return_covmat=False):\n",
-        "    # Gets logits and covariance matrix from GP layer.\n",
+        "    # Gets logits and a covariance matrix from the GP layer.\n",
         "    logits, covmat = super().call(inputs)\n",
         "\n",
         "    # Returns only logits during training.\n",
@@ -1030,7 +1008,7 @@
         "class ResetCovarianceCallback(tf.keras.callbacks.Callback):\n",
         "\n",
         "  def on_epoch_begin(self, epoch, logs=None):\n",
-        "    \"\"\"Resets covariance matrix at the begining of the epoch.\"\"\"\n",
+        "    \"\"\"Resets covariance matrix at the beginning of the epoch.\"\"\"\n",
         "    if epoch > 0:\n",
         "      self.model.classifier.reset_covariance_matrix()"
       ]
@@ -1143,7 +1121,7 @@
         "\n",
         "$$E(p(x)) = \\frac{1}{M} \\sum_{m=1}^M logit_m(x), $$\n",
         "\n",
-        "where $M$ is the sample size, and $logit_m(x)$ are random samples from the SNGP posterior $MultivariateNormal$(`sngp_logits`,`sngp_covmat`). However, this approach can be slow for latency-sensitive applications such as autonomous driving or real-time bidding. Instead, can approximate $E(p(x))$ using the [mean-field method](https://arxiv.org/abs/2006.07584):\n",
+        "where $M$ is the sample size, and $logit_m(x)$ are random samples from the SNGP posterior $MultivariateNormal$(`sngp_logits`,`sngp_covmat`). However, this approach can be slow for latency-sensitive applications such as autonomous driving or real-time bidding. Instead, you can approximate $E(p(x))$ using the [mean-field method](https://arxiv.org/abs/2006.07584){.external}:\n",
         "\n",
         "$$E(p(x)) \\approx softmax(\\frac{logit(x)}{\\sqrt{1+ \\lambda * \\sigma^2(x)}})$$\n",
         "\n",
@@ -1168,7 +1146,7 @@
         "id": "bNVs_KO-5HdL"
       },
       "source": [
-        "Note: Instead of fixing $\\lambda$ to a fixed value, you can also treat it as a hyperparameter, and tune it to optimize the model's calibration performance. This is known as [temperature scaling](http://proceedings.mlr.press/v70/guo17a.html) in the deep learning uncertainty literature. "
+        "Note: Instead of fixing $\\lambda$ to a fixed value, you can also treat it as a hyperparameter, and tune it to optimize the model's calibration performance. This is known as [temperature scaling](http://proceedings.mlr.press/v70/guo17a.html){.external} in the deep learning uncertainty literature. "
       ]
     },
     {
@@ -1257,7 +1235,7 @@
         "id": "R9kY5dJg8fEi"
       },
       "source": [
-        "Put everything together. The entire procedure (training, evaluation and uncertainty computation) can be done in just five lines:"
+        "You can now put everything together. The entire procedure—training, evaluation and uncertainty computation—can be done in just five lines:"
       ]
     },
     {
@@ -1346,7 +1324,7 @@
         "id": "mao9L-LYE1Nl"
       },
       "source": [
-        "Like mentioned earlier, a deterministic model is not _distance-aware_. Its uncertainty is defined by the distance of the test example from the decision boundary. This leads the model to produce overconfident predictions for the out-of-domain examples (red)."
+        "As mentioned earlier, a deterministic model is not _distance-aware_. Its uncertainty is defined by the distance of the test example from the decision boundary. This leads the model to produce overconfident predictions for the out-of-domain examples (red)."
       ]
     },
     {
@@ -1364,9 +1342,9 @@
         "id": "S1DPELWE6LL8"
       },
       "source": [
-        "This section compares the uncertainty of SNGP with [Monte Carlo dropout](https://arxiv.org/abs/1506.02142) and [Deep ensemble](https://arxiv.org/abs/1612.01474).\n",
+        "This section compares the uncertainty of SNGP with [Monte Carlo dropout](https://arxiv.org/abs/1506.02142){.external} and [Deep ensemble](https://arxiv.org/abs/1612.01474){.external}.\n",
         "\n",
-        "Both of these methods are based on Monte Carlo averaging of multiple forward passes of deterministic models. First set the ensemble size $M$."
+        "Both of these methods are based on Monte Carlo averaging of multiple forward passes of deterministic models. First, set the ensemble size $M$."
       ]
     },
     {
@@ -1395,9 +1373,9 @@
         "id": "ZBzp2LBt7-kj"
       },
       "source": [
-        "Given a trained neural network with Dropout layers, [Monte Carlo dropout](https://arxiv.org/abs/1506.02142) computes the mean predictive probability \n",
+        "Given a trained neural network with Dropout layers, Monte Carlo dropout computes the mean predictive probability\n",
         "\n",
-        "$$E(p(x)) = \\frac{1}{M}\\sum_{m=1}^M softmax(logit_m(x))$$ \n",
+        "$$E(p(x)) = \\frac{1}{M}\\sum_{m=1}^M softmax(logit_m(x))$$\n",
         "\n",
         "by averaging over multiple Dropout-enabled forward passes $\\{logit_m(x)\\}_{m=1}^M$."
       ]
@@ -1466,7 +1444,7 @@
         "id": "L-Z2veGJ9ZgY"
       },
       "source": [
-        "[Deep ensemble](https://arxiv.org/abs/1612.01474) is a state-of-the-art (but expensive) method for deep learning uncertainty. To train a Deep ensemble, first train $M$ ensemble members."
+        "Deep ensemble is a state-of-the-art (but expensive) method for deep learning uncertainty. To train a Deep ensemble, first train $M$ ensemble members."
       ]
     },
     {
@@ -1482,7 +1460,7 @@
         "for _ in range(num_ensemble):\n",
         "  resnet_model = DeepResNet(**resnet_config)\n",
         "  resnet_model.compile(optimizer=optimizer, loss=loss, metrics=metrics)\n",
-        "  resnet_model.fit(train_examples, train_labels, verbose=0, **fit_config)  \n",
+        "  resnet_model.fit(train_examples, train_labels, verbose=0, **fit_config)\n",
         "\n",
         "  resnet_ensemble.append(resnet_model)"
       ]
@@ -1493,7 +1471,7 @@
         "id": "Al7uM-fn_ZE1"
       },
       "source": [
-        "Collect logits and compute the mean predctive probability $E(p(x)) = \\frac{1}{M}\\sum_{m=1}^M softmax(logit_m(x))$."
+        "Collect logits and compute the mean predictive probability $E(p(x)) = \\frac{1}{M}\\sum_{m=1}^M softmax(logit_m(x))$."
       ]
     },
     {
@@ -1527,7 +1505,7 @@
         "id": "GH33oVvV5-ez"
       },
       "source": [
-        "Both MC Dropout and Deep ensemble improve a model's uncertainty ability by making the decision boundary less certain. However, they both inherit the deterministic deep network's limitation in lacking distance awareness."
+        "Both the Monte Carlo Dropout and Deep ensemble methods improve the model's uncertainty ability by making the decision boundary less certain. However, they both inherit the deterministic deep network's limitation in lacking distance awareness."
       ]
     },
     {
@@ -1546,8 +1524,8 @@
       },
       "source": [
         "In this tutorial, you have:\n",
-        "* Implemented a SNGP model on a deep classifier to improve its distance awareness.\n",
-        "* Trained the SNGP model end-to-end using Keras `model.fit()` API.\n",
+        "* Implemented the SNGP model on a deep classifier to improve its distance awareness.\n",
+        "* Trained the SNGP model end-to-end using Keras `Model.fit` API.\n",
         "* Visualized the uncertainty behavior of SNGP.\n",
         "* Compared the uncertainty behavior between SNGP, Monte Carlo dropout and deep ensemble models."
       ]
@@ -1567,9 +1545,9 @@
         "id": "HoIikRybke-b"
       },
       "source": [
-        "* See the [SNGP-BERT tutorial](https://www.tensorflow.org/text/tutorials/uncertainty_quantification_with_sngp_bert) for an example of applying SNGP on a BERT model for uncertainty-aware natural language understanding.\n",
-        "* See [Uncertainty Baselines](https://github.com/google/uncertainty-baselines)  for the implementation of SNGP model (and many other uncertainty methods) on a wide variety of benchmark datasets (e.g., [CIFAR](https://www.tensorflow.org/datasets/catalog/cifar100), [ImageNet](https://www.tensorflow.org/datasets/catalog/imagenet2012), [Jigsaw toxicity detection](https://www.tensorflow.org/datasets/catalog/wikipedia_toxicity_subtypes), etc).\n",
-        "* For a deeper understanding of the SNGP method, check out the paper [Simple and Principled Uncertainty Estimation with Deterministic Deep Learning via Distance Awareness](https://arxiv.org/abs/2006.10108).\n"
+        "* Check out the [SNGP-BERT tutorial](https://www.tensorflow.org/text/tutorials/uncertainty_quantification_with_sngp_bert) for an example of applying SNGP on a BERT model for uncertainty-aware natural language understanding.\n",
+        "* Go to the [Uncertainty Baselines GitHub repo](https://github.com/google/uncertainty-baselines){.external} for the implementation of SNGP model (and many other uncertainty methods) on a wide variety of benchmark datasets (for example, [CIFAR](https://www.tensorflow.org/datasets/catalog/cifar100), [ImageNet](https://www.tensorflow.org/datasets/catalog/imagenet2012), [Jigsaw toxicity detection](https://www.tensorflow.org/datasets/catalog/wikipedia_toxicity_subtypes), etc).\n",
+        "* For a deeper understanding of the SNGP method, check out the paper titled [Simple and Principled Uncertainty Estimation with Deterministic Deep Learning via Distance Awareness](https://arxiv.org/abs/2006.10108){.external}.\n"
       ]
     }
   ],
@@ -1577,7 +1555,6 @@
     "colab": {
       "collapsed_sections": [],
       "name": "sngp.ipynb",
-      "provenance": [],
       "toc_visible": true
     },
     "kernelspec": {
diff --git a/site/en/tutorials/video/transfer_learning_with_movinet.ipynb b/site/en/tutorials/video/transfer_learning_with_movinet.ipynb
new file mode 100644
index 00000000000..df395295ba9
--- /dev/null
+++ b/site/en/tutorials/video/transfer_learning_with_movinet.ipynb
@@ -0,0 +1,825 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "EOgDUDMAG6mn"
+      },
+      "source": [
+        "##### Copyright 2022 The TensorFlow Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "B3PsBDmGG_W8"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ifkGYxdCHIof"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/tutorials/video/transfer_learning_with_movinet\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/video/transfer_learning_with_movinet.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/tutorials/video/transfer_learning_with_movinet.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/tutorials/video/transfer_learning_with_movinet.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "sWxDDkRwLVMC"
+      },
+      "source": [
+        "# Transfer learning for video classification with MoViNet\n",
+        "\n",
+        "MoViNets (Mobile Video Networks) provide a family of efficient video classification models, supporting inference on streaming video. In this tutorial, you will use a pre-trained MoViNet model to classify videos, specifically for an action recognition task, from the [UCF101 dataset](https://www.crcv.ucf.edu/data/UCF101.php). A pre-trained model is a saved network that was previously trained on a larger dataset. You can find more details about MoViNets in the [MoViNets: Mobile Video Networks for Efficient Video Recognition](https://arxiv.org/abs/2103.11511) paper by Kondratyuk, D. et al. (2021). In this tutorial, you will: \n",
+        "\n",
+        "* Learn how to download a pre-trained MoViNet model\n",
+        "* Create a new model using a pre-trained model with a new classifier by freezing the convolutional base of the MoViNet model\n",
+        "* Replace the classifier head with the number of labels of a new dataset\n",
+        "* Perform transfer learning on the [UCF101 dataset](https://www.crcv.ucf.edu/data/UCF101.php)\n",
+        "\n",
+        "The model downloaded in this tutorial is from [official/projects/movinet](https://github.com/tensorflow/models/tree/master/official/projects/movinet). This repository contains a collection of MoViNet models that TF Hub uses in the TensorFlow 2 SavedModel format.\n",
+        "\n",
+        "This transfer learning tutorial is the third part in a series of TensorFlow video tutorials. Here are the other three tutorials:\n",
+        "\n",
+        "- [Load video data](https://www.tensorflow.org/tutorials/load_data/video): This tutorial explains much of the code used in this document; in particular, how to preprocess and load data through the `FrameGenerator` class is explained in more detail.\n",
+        "- [Build a 3D CNN model for video classification](https://www.tensorflow.org/tutorials/video/video_classification). Note that this tutorial uses a (2+1)D CNN that decomposes the spatial and temporal aspects of 3D data; if you are using volumetric data such as an MRI scan, consider using a 3D CNN instead of a (2+1)D CNN.\n",
+        "- [MoViNet for streaming action recognition](https://www.tensorflow.org/hub/tutorials/movinet): Get familiar with the MoViNet models that are available on TF Hub."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GidiisyXwK--"
+      },
+      "source": [
+        "## Setup\n",
+        "\n",
+        "Begin by installing and importing some necessary libraries, including:\n",
+        "[remotezip](https://github.com/gtsystem/python-remotezip) to inspect the contents of a ZIP file, [tqdm](https://github.com/tqdm/tqdm) to use a progress bar, [OpenCV](https://opencv.org/) to process video files (ensure that `opencv-python` and `opencv-python-headless` are the same version), and TensorFlow models ([`tf-models-official`](https://github.com/tensorflow/models/tree/master/official)) to download the pre-trained MoViNet model. The TensorFlow models package are a collection of models that use TensorFlow’s high-level APIs."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "nubWhqYdwEXD"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install remotezip tqdm opencv-python==4.5.2.52 opencv-python-headless==4.5.2.52 tf-models-official"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "QImPsudoK9JI"
+      },
+      "outputs": [],
+      "source": [
+        "import tqdm\n",
+        "import random\n",
+        "import pathlib\n",
+        "import itertools\n",
+        "import collections\n",
+        "\n",
+        "import cv2\n",
+        "import numpy as np\n",
+        "import remotezip as rz\n",
+        "import seaborn as sns\n",
+        "import matplotlib.pyplot as plt\n",
+        "\n",
+        "import keras\n",
+        "import tensorflow as tf\n",
+        "import tensorflow_hub as hub\n",
+        "from tensorflow.keras import layers\n",
+        "from tensorflow.keras.optimizers import Adam\n",
+        "from tensorflow.keras.losses import SparseCategoricalCrossentropy\n",
+        "\n",
+        "# Import the MoViNet model from TensorFlow Models (tf-models-official) for the MoViNet model\n",
+        "from official.projects.movinet.modeling import movinet\n",
+        "from official.projects.movinet.modeling import movinet_model"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2w3H4dfOPfnm"
+      },
+      "source": [
+        "## Load data\n",
+        " \n",
+        "The hidden cell below defines helper functions to download a slice of data from the UCF-101 dataset, and load it into a `tf.data.Dataset`. The [Loading video data tutorial](https://www.tensorflow.org/tutorials/load_data/video) provides a detailed walkthrough of this code.\n",
+        "\n",
+        "The `FrameGenerator` class at the end of the hidden block is the most important utility here. It creates an iterable object that can feed data into the TensorFlow data pipeline. Specifically, this class contains a Python generator that loads the video frames along with its encoded label. The generator (`__call__`) function yields the frame array produced by `frames_from_video_file` and a one-hot encoded vector of the label associated with the set of frames.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "fwEhJ13_PSy6"
+      },
+      "outputs": [],
+      "source": [
+        "#@title \n",
+        "\n",
+        "def list_files_per_class(zip_url):\n",
+        "  \"\"\"\n",
+        "    List the files in each class of the dataset given the zip URL.\n",
+        "\n",
+        "    Args:\n",
+        "      zip_url: URL from which the files can be unzipped. \n",
+        "\n",
+        "    Return:\n",
+        "      files: List of files in each of the classes.\n",
+        "  \"\"\"\n",
+        "  files = []\n",
+        "  with rz.RemoteZip(URL) as zip:\n",
+        "    for zip_info in zip.infolist():\n",
+        "      files.append(zip_info.filename)\n",
+        "  return files\n",
+        "\n",
+        "def get_class(fname):\n",
+        "  \"\"\"\n",
+        "    Retrieve the name of the class given a filename.\n",
+        "\n",
+        "    Args:\n",
+        "      fname: Name of the file in the UCF101 dataset.\n",
+        "\n",
+        "    Return:\n",
+        "      Class that the file belongs to.\n",
+        "  \"\"\"\n",
+        "  return fname.split('_')[-3]\n",
+        "\n",
+        "def get_files_per_class(files):\n",
+        "  \"\"\"\n",
+        "    Retrieve the files that belong to each class. \n",
+        "\n",
+        "    Args:\n",
+        "      files: List of files in the dataset.\n",
+        "\n",
+        "    Return:\n",
+        "      Dictionary of class names (key) and files (values).\n",
+        "  \"\"\"\n",
+        "  files_for_class = collections.defaultdict(list)\n",
+        "  for fname in files:\n",
+        "    class_name = get_class(fname)\n",
+        "    files_for_class[class_name].append(fname)\n",
+        "  return files_for_class\n",
+        "\n",
+        "def download_from_zip(zip_url, to_dir, file_names):\n",
+        "  \"\"\"\n",
+        "    Download the contents of the zip file from the zip URL.\n",
+        "\n",
+        "    Args:\n",
+        "      zip_url: Zip URL containing data.\n",
+        "      to_dir: Directory to download data to.\n",
+        "      file_names: Names of files to download.\n",
+        "  \"\"\"\n",
+        "  with rz.RemoteZip(zip_url) as zip:\n",
+        "    for fn in tqdm.tqdm(file_names):\n",
+        "      class_name = get_class(fn)\n",
+        "      zip.extract(fn, str(to_dir / class_name))\n",
+        "      unzipped_file = to_dir / class_name / fn\n",
+        "\n",
+        "      fn = pathlib.Path(fn).parts[-1]\n",
+        "      output_file = to_dir / class_name / fn\n",
+        "      unzipped_file.rename(output_file,)\n",
+        "\n",
+        "def split_class_lists(files_for_class, count):\n",
+        "  \"\"\"\n",
+        "    Returns the list of files belonging to a subset of data as well as the remainder of\n",
+        "    files that need to be downloaded.\n",
+        "\n",
+        "    Args:\n",
+        "      files_for_class: Files belonging to a particular class of data.\n",
+        "      count: Number of files to download.\n",
+        "\n",
+        "    Return:\n",
+        "      split_files: Files belonging to the subset of data.\n",
+        "      remainder: Dictionary of the remainder of files that need to be downloaded.\n",
+        "  \"\"\"\n",
+        "  split_files = []\n",
+        "  remainder = {}\n",
+        "  for cls in files_for_class:\n",
+        "    split_files.extend(files_for_class[cls][:count])\n",
+        "    remainder[cls] = files_for_class[cls][count:]\n",
+        "  return split_files, remainder\n",
+        "\n",
+        "def download_ufc_101_subset(zip_url, num_classes, splits, download_dir):\n",
+        "  \"\"\"\n",
+        "    Download a subset of the UFC101 dataset and split them into various parts, such as\n",
+        "    training, validation, and test. \n",
+        "\n",
+        "    Args:\n",
+        "      zip_url: Zip URL containing data.\n",
+        "      num_classes: Number of labels.\n",
+        "      splits: Dictionary specifying the training, validation, test, etc. (key) division of data \n",
+        "              (value is number of files per split).\n",
+        "      download_dir: Directory to download data to.\n",
+        "\n",
+        "    Return:\n",
+        "      dir: Posix path of the resulting directories containing the splits of data.\n",
+        "  \"\"\"\n",
+        "  files = list_files_per_class(zip_url)\n",
+        "  for f in files:\n",
+        "    tokens = f.split('/')\n",
+        "    if len(tokens) <= 2:\n",
+        "      files.remove(f) # Remove that item from the list if it does not have a filename\n",
+        "\n",
+        "  files_for_class = get_files_per_class(files)\n",
+        "\n",
+        "  classes = list(files_for_class.keys())[:num_classes]\n",
+        "\n",
+        "  for cls in classes:\n",
+        "    new_files_for_class = files_for_class[cls]\n",
+        "    random.shuffle(new_files_for_class)\n",
+        "    files_for_class[cls] = new_files_for_class\n",
+        "\n",
+        "  # Only use the number of classes you want in the dictionary\n",
+        "  files_for_class = {x: files_for_class[x] for x in list(files_for_class)[:num_classes]}\n",
+        "\n",
+        "  dirs = {}\n",
+        "  for split_name, split_count in splits.items():\n",
+        "    print(split_name, \":\")\n",
+        "    split_dir = download_dir / split_name\n",
+        "    split_files, files_for_class = split_class_lists(files_for_class, split_count)\n",
+        "    download_from_zip(zip_url, split_dir, split_files)\n",
+        "    dirs[split_name] = split_dir\n",
+        "\n",
+        "  return dirs\n",
+        "\n",
+        "def format_frames(frame, output_size):\n",
+        "  \"\"\"\n",
+        "    Pad and resize an image from a video.\n",
+        "\n",
+        "    Args:\n",
+        "      frame: Image that needs to resized and padded. \n",
+        "      output_size: Pixel size of the output frame image.\n",
+        "\n",
+        "    Return:\n",
+        "      Formatted frame with padding of specified output size.\n",
+        "  \"\"\"\n",
+        "  frame = tf.image.convert_image_dtype(frame, tf.float32)\n",
+        "  frame = tf.image.resize_with_pad(frame, *output_size)\n",
+        "  return frame\n",
+        "\n",
+        "def frames_from_video_file(video_path, n_frames, output_size = (224,224), frame_step = 15):\n",
+        "  \"\"\"\n",
+        "    Creates frames from each video file present for each category.\n",
+        "\n",
+        "    Args:\n",
+        "      video_path: File path to the video.\n",
+        "      n_frames: Number of frames to be created per video file.\n",
+        "      output_size: Pixel size of the output frame image.\n",
+        "\n",
+        "    Return:\n",
+        "      An NumPy array of frames in the shape of (n_frames, height, width, channels).\n",
+        "  \"\"\"\n",
+        "  # Read each video frame by frame\n",
+        "  result = []\n",
+        "  src = cv2.VideoCapture(str(video_path))  \n",
+        "\n",
+        "  video_length = src.get(cv2.CAP_PROP_FRAME_COUNT)\n",
+        "\n",
+        "  need_length = 1 + (n_frames - 1) * frame_step\n",
+        "\n",
+        "  if need_length > video_length:\n",
+        "    start = 0\n",
+        "  else:\n",
+        "    max_start = video_length - need_length\n",
+        "    start = random.randint(0, max_start + 1)\n",
+        "\n",
+        "  src.set(cv2.CAP_PROP_POS_FRAMES, start)\n",
+        "  # ret is a boolean indicating whether read was successful, frame is the image itself\n",
+        "  ret, frame = src.read()\n",
+        "  result.append(format_frames(frame, output_size))\n",
+        "\n",
+        "  for _ in range(n_frames - 1):\n",
+        "    for _ in range(frame_step):\n",
+        "      ret, frame = src.read()\n",
+        "    if ret:\n",
+        "      frame = format_frames(frame, output_size)\n",
+        "      result.append(frame)\n",
+        "    else:\n",
+        "      result.append(np.zeros_like(result[0]))\n",
+        "  src.release()\n",
+        "  result = np.array(result)[..., [2, 1, 0]]\n",
+        "\n",
+        "  return result\n",
+        "\n",
+        "class FrameGenerator:\n",
+        "  def __init__(self, path, n_frames, training = False):\n",
+        "    \"\"\" Returns a set of frames with their associated label. \n",
+        "\n",
+        "      Args:\n",
+        "        path: Video file paths.\n",
+        "        n_frames: Number of frames. \n",
+        "        training: Boolean to determine if training dataset is being created.\n",
+        "    \"\"\"\n",
+        "    self.path = path\n",
+        "    self.n_frames = n_frames\n",
+        "    self.training = training\n",
+        "    self.class_names = sorted(set(p.name for p in self.path.iterdir() if p.is_dir()))\n",
+        "    self.class_ids_for_name = dict((name, idx) for idx, name in enumerate(self.class_names))\n",
+        "\n",
+        "  def get_files_and_class_names(self):\n",
+        "    video_paths = list(self.path.glob('*/*.avi'))\n",
+        "    classes = [p.parent.name for p in video_paths] \n",
+        "    return video_paths, classes\n",
+        "\n",
+        "  def __call__(self):\n",
+        "    video_paths, classes = self.get_files_and_class_names()\n",
+        "\n",
+        "    pairs = list(zip(video_paths, classes))\n",
+        "\n",
+        "    if self.training:\n",
+        "      random.shuffle(pairs)\n",
+        "\n",
+        "    for path, name in pairs:\n",
+        "      video_frames = frames_from_video_file(path, self.n_frames) \n",
+        "      label = self.class_ids_for_name[name] # Encode labels\n",
+        "      yield video_frames, label"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "vDHrNLZkPSR9"
+      },
+      "outputs": [],
+      "source": [
+        "URL = 'https://storage.googleapis.com/thumos14_files/UCF101_videos.zip'\n",
+        "download_dir = pathlib.Path('./UCF101_subset/')\n",
+        "subset_paths = download_ufc_101_subset(URL, \n",
+        "                        num_classes = 10, \n",
+        "                        splits = {\"train\": 30, \"test\": 20}, \n",
+        "                        download_dir = download_dir)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "aYYShfhMx9DW"
+      },
+      "source": [
+        "Create the training and test datasets:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "-twTu3_Bx-iJ"
+      },
+      "outputs": [],
+      "source": [
+        "batch_size = 8\n",
+        "num_frames = 8\n",
+        "\n",
+        "output_signature = (tf.TensorSpec(shape = (None, None, None, 3), dtype = tf.float32),\n",
+        "                    tf.TensorSpec(shape = (), dtype = tf.int16))\n",
+        "\n",
+        "train_ds = tf.data.Dataset.from_generator(FrameGenerator(subset_paths['train'], num_frames, training = True),\n",
+        "                                          output_signature = output_signature)\n",
+        "train_ds = train_ds.batch(batch_size)\n",
+        "\n",
+        "test_ds = tf.data.Dataset.from_generator(FrameGenerator(subset_paths['test'], num_frames),\n",
+        "                                         output_signature = output_signature)\n",
+        "test_ds = test_ds.batch(batch_size)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "d7stgmuBCGQT"
+      },
+      "source": [
+        "The labels generated here represent the encoding of the classes. For instance, 'ApplyEyeMakeup' is mapped to the integer Take a look at the labels of the training data to ensure that the dataset has been sufficiently shuffled. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "k9L2-toXCOQq"
+      },
+      "outputs": [],
+      "source": [
+        "for frames, labels in train_ds.take(10):\n",
+        "  print(labels)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YZ3qwZnpfy9c"
+      },
+      "source": [
+        "Take a look at the shape of the data."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "b6MqP4m2fyQT"
+      },
+      "outputs": [],
+      "source": [
+        "print(f\"Shape: {frames.shape}\")\n",
+        "print(f\"Label: {labels.shape}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "lxbhPqXGvc_F"
+      },
+      "source": [
+        "## What are MoViNets?\n",
+        "\n",
+        "As mentioned previously, [MoViNets](https://arxiv.org/abs/2103.11511) are video classification models used for streaming video or online inference in tasks, such as action recognition. Consider using MoViNets to classify your video data for action recognition.\n",
+        "\n",
+        "A 2D frame based classifier is efficient and simple to run over whole videos, or streaming one frame at a time. Because they can't take temporal context into account they have limited accuracy and may give inconsistent outputs from frame to frame.\n",
+        "\n",
+        "A simple 3D CNN uses bidirectional temporal context which can increase accuracy and temporal consistency. These networks may require more resources and because they look into the future they can't be used for streaming data.\n",
+        "\n",
+        "![Standard convolution](https://www.tensorflow.org/images/tutorials/video/standard_convolution.png)\n",
+        "\n",
+        "The MoViNet architecture uses 3D convolutions that are \"causal\" along the time axis (like `layers.Conv1D` with `padding=\"causal\"`). This gives some of the advantages of both approaches, mainly it allow for efficient streaming.\n",
+        "\n",
+        "![Causal convolution](https://www.tensorflow.org/images/tutorials/video/causal_convolution.png)\n",
+        "\n",
+        "Causal convolution ensures that the output at time *t* is computed using only inputs up to time *t*. To demonstrate how this can make streaming more efficient, start with a simpler example you may be familiar with: an RNN. The RNN passes state forward through time:\n",
+        "\n",
+        "![RNN model](https://www.tensorflow.org/images/tutorials/video/rnn_comparison.png)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dMvDkgfFZC6a"
+      },
+      "outputs": [],
+      "source": [
+        "gru = layers.GRU(units=4, return_sequences=True, return_state=True)\n",
+        "\n",
+        "inputs = tf.random.normal(shape=[1, 10, 8]) # (batch, sequence, channels)\n",
+        "\n",
+        "result, state = gru(inputs) # Run it all at once"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "T7xyb5C4bTs7"
+      },
+      "source": [
+        "By setting the RNN's `return_sequences=True` argument you ask it to return the state at the end of the computation. This allows you to pause and then continue where you left off, to get exactly the same result:\n",
+        "\n",
+        "![States passing in RNNs](https://www.tensorflow.org/images/tutorials/video/rnn_state_passing.png)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "bI8FOPRRXXPa"
+      },
+      "outputs": [],
+      "source": [
+        "first_half, state = gru(inputs[:, :5, :])   # run the first half, and capture the state\n",
+        "second_half, _ = gru(inputs[:,5:, :], initial_state=state)  # Use the state to continue where you left off.\n",
+        "\n",
+        "print(np.allclose(result[:, :5,:], first_half))\n",
+        "print(np.allclose(result[:, 5:,:], second_half))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KM3MArumY_Qk"
+      },
+      "source": [
+        "Causal convolutions can be used the same way, if handled with care. This technique was used in the [Fast Wavenet Generation Algorithm](https://arxiv.org/abs/1611.09482) by Le Paine et al. In the [MoVinet paper](https://arxiv.org/abs/2103.11511), the `state` is referred to as the \"Stream Buffer\".\n",
+        "\n",
+        "![States passed in causal convolution](https://www.tensorflow.org/images/tutorials/video/causal_conv_states.png)\n",
+        "\n",
+        "By passing this little bit of state forward, you can avoid recalculating the whole receptive field that shown above. "
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "1UsxiPs8yA2e"
+      },
+      "source": [
+        "## Download a pre-trained MoViNet model\n",
+        "\n",
+        "In this section, you will:\n",
+        "\n",
+        "1. You can create a MoViNet model using the open source code provided in [`official/projects/movinet`](https://github.com/tensorflow/models/tree/master/official/projects/movinet) from TensorFlow models.\n",
+        "2. Load the pretrained weights. \n",
+        "3. Freeze the convolutional base, or all other layers except the final classifier head, to speed up fine-tuning.\n",
+        "\n",
+        "To build the model, you can start with the `a0` configuration because it is the fastest to train when benchmarked against other models. Check out the [available MoViNet models on TensorFlow Model Garden](https://github.com/tensorflow/models/blob/master/official/projects/movinet/configs/movinet.py) to find what might work for your use case."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "rhSCM6cee05F"
+      },
+      "outputs": [],
+      "source": [
+        "model_id = 'a0'\n",
+        "resolution = 224\n",
+        "\n",
+        "tf.keras.backend.clear_session()\n",
+        "\n",
+        "backbone = movinet.Movinet(model_id=model_id)\n",
+        "backbone.trainable = False\n",
+        "\n",
+        "# Set num_classes=600 to load the pre-trained weights from the original model\n",
+        "model = movinet_model.MovinetClassifier(backbone=backbone, num_classes=600)\n",
+        "model.build([None, None, None, None, 3])\n",
+        "\n",
+        "# Load pre-trained weights\n",
+        "!wget https://storage.googleapis.com/tf_model_garden/vision/movinet/movinet_a0_base.tar.gz -O movinet_a0_base.tar.gz -q\n",
+        "!tar -xvf movinet_a0_base.tar.gz\n",
+        "\n",
+        "checkpoint_dir = f'movinet_{model_id}_base'\n",
+        "checkpoint_path = tf.train.latest_checkpoint(checkpoint_dir)\n",
+        "checkpoint = tf.train.Checkpoint(model=model)\n",
+        "status = checkpoint.restore(checkpoint_path)\n",
+        "status.assert_existing_objects_matched()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "BW23HVNtCXff"
+      },
+      "source": [
+        "To build a classifier, create a function that takes the backbone and the number of classes in a dataset. The `build_classifier` function will take the backbone and the number of classes in a dataset to build the classifier. In this case, the new classifier will take a `num_classes` outputs (10 classes for this subset of UCF101)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "6cfAelbU5Gi3"
+      },
+      "outputs": [],
+      "source": [
+        "def build_classifier(batch_size, num_frames, resolution, backbone, num_classes):\n",
+        "  \"\"\"Builds a classifier on top of a backbone model.\"\"\"\n",
+        "  model = movinet_model.MovinetClassifier(\n",
+        "      backbone=backbone,\n",
+        "      num_classes=num_classes)\n",
+        "  model.build([batch_size, num_frames, resolution, resolution, 3])\n",
+        "\n",
+        "  return model"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9HWSk-u7oPUZ"
+      },
+      "outputs": [],
+      "source": [
+        "model = build_classifier(batch_size, num_frames, resolution, backbone, 10)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JhbX7qdTN8lc"
+      },
+      "source": [
+        "For this tutorial, choose the `tf.keras.optimizers.Adam` optimizer and the `tf.keras.losses.SparseCategoricalCrossentropy` loss function. Use the metrics argument to the view the accuracy of the model performance at every step."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dVqBLrn1tBsd"
+      },
+      "outputs": [],
+      "source": [
+        "num_epochs = 2\n",
+        "\n",
+        "loss_obj = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n",
+        "\n",
+        "optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001)\n",
+        "\n",
+        "model.compile(loss=loss_obj, optimizer=optimizer, metrics=['accuracy'])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "VflEr_t6CuQu"
+      },
+      "source": [
+        "Train the model. After two epochs, observe a low loss with high accuracy for both the training and test sets. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "background_save": true
+        },
+        "id": "9ZeiYzI0tqQG"
+      },
+      "outputs": [],
+      "source": [
+        "results = model.fit(train_ds,\n",
+        "                    validation_data=test_ds,\n",
+        "                    epochs=num_epochs,\n",
+        "                    validation_freq=1,\n",
+        "                    verbose=1)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KkLl2zF8G9W0"
+      },
+      "source": [
+        "## Evaluate the model\n",
+        "\n",
+        "The model achieved high accuracy on the training dataset. Next, use Keras `Model.evaluate` to evaluate it on the test set."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "NqgbzOiKuxxT"
+      },
+      "outputs": [],
+      "source": [
+        "model.evaluate(test_ds, return_dict=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "OkFst2gsHBwD"
+      },
+      "source": [
+        "To visualize model performance further, use a [confusion matrix](https://www.tensorflow.org/api_docs/python/tf/math/confusion_matrix). The confusion matrix allows you to assess the performance of the classification model beyond accuracy. To build the confusion matrix for this multi-class classification problem, get the actual values in the test set and the predicted values."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "hssSdW9XHF_j"
+      },
+      "outputs": [],
+      "source": [
+        "def get_actual_predicted_labels(dataset):\n",
+        "  \"\"\"\n",
+        "    Create a list of actual ground truth values and the predictions from the model.\n",
+        "\n",
+        "    Args:\n",
+        "      dataset: An iterable data structure, such as a TensorFlow Dataset, with features and labels.\n",
+        "\n",
+        "    Return:\n",
+        "      Ground truth and predicted values for a particular dataset.\n",
+        "  \"\"\"\n",
+        "  actual = [labels for _, labels in dataset.unbatch()]\n",
+        "  predicted = model.predict(dataset)\n",
+        "\n",
+        "  actual = tf.stack(actual, axis=0)\n",
+        "  predicted = tf.concat(predicted, axis=0)\n",
+        "  predicted = tf.argmax(predicted, axis=1)\n",
+        "\n",
+        "  return actual, predicted"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2TmTue6THGWO"
+      },
+      "outputs": [],
+      "source": [
+        "def plot_confusion_matrix(actual, predicted, labels, ds_type):\n",
+        "  cm = tf.math.confusion_matrix(actual, predicted)\n",
+        "  ax = sns.heatmap(cm, annot=True, fmt='g')\n",
+        "  sns.set(rc={'figure.figsize':(12, 12)})\n",
+        "  sns.set(font_scale=1.4)\n",
+        "  ax.set_title('Confusion matrix of action recognition for ' + ds_type)\n",
+        "  ax.set_xlabel('Predicted Action')\n",
+        "  ax.set_ylabel('Actual Action')\n",
+        "  plt.xticks(rotation=90)\n",
+        "  plt.yticks(rotation=0)\n",
+        "  ax.xaxis.set_ticklabels(labels)\n",
+        "  ax.yaxis.set_ticklabels(labels)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "4RK1A1C1HH6V"
+      },
+      "outputs": [],
+      "source": [
+        "fg = FrameGenerator(subset_paths['train'], num_frames, training = True)\n",
+        "label_names = list(fg.class_ids_for_name.keys())"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "r4AFi2e5HKEO"
+      },
+      "outputs": [],
+      "source": [
+        "actual, predicted = get_actual_predicted_labels(test_ds)\n",
+        "plot_confusion_matrix(actual, predicted, label_names, 'test')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ddQG9sYxa1Ib"
+      },
+      "source": [
+        "## Next steps\n",
+        "\n",
+        "Now that you have some familiarity with the MoViNet model and how to leverage various TensorFlow APIs (for example, for transfer learning), try using the code in this tutorial with your own dataset. The data does not have to be limited to video data. Volumetric data, such as MRI scans, can also be used with 3D CNNs. The NUSDAT and IMH datasets mentioned in [Brain MRI-based 3D Convolutional Neural Networks for Classification of Schizophrenia and Controls](https://arxiv.org/pdf/2003.08818.pdf) could be two such sources for MRI data.\n",
+        "\n",
+        "In particular, using the `FrameGenerator` class used in this tutorial and the other video data and classification tutorials will help you load data into your models.\n",
+        "\n",
+        "To learn more about working with video data in TensorFlow, check out the following tutorials:\n",
+        "\n",
+        "* [Load video data](https://www.tensorflow.org/tutorials/load_data/video)\n",
+        "* [Build a 3D CNN model for video classification](https://www.tensorflow.org/tutorials/video/video_classification)\n",
+        "* [MoViNet for streaming action recognition](https://www.tensorflow.org/hub/tutorials/movinet)"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "name": "transfer_learning_with_movinet.ipynb",
+            "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/tutorials/video/video_classification.ipynb b/site/en/tutorials/video/video_classification.ipynb
new file mode 100644
index 00000000000..4265b6387e3
--- /dev/null
+++ b/site/en/tutorials/video/video_classification.ipynb
@@ -0,0 +1,1058 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "TBFXQGKYUc4X"
+      },
+      "source": [
+        "##### Copyright 2022 The TensorFlow Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "1z4xy2gTUc4a"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KwQtSOz0VrVX"
+      },
+      "source": [
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/tutorials/video/video_classification\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/video/video_classification.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/site/en/tutorials/video/video_classification.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/tutorials/video/video_classification.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
+        "  </td>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "L2MHy42s5wl6"
+      },
+      "source": [
+        "# Video classification with a 3D convolutional neural network\n",
+        "\n",
+        "This tutorial demonstrates training a 3D convolutional neural network (CNN) for video classification using the [UCF101](https://www.crcv.ucf.edu/data/UCF101.php) action recognition dataset. A 3D CNN uses a three-dimensional filter to perform convolutions. The kernel is able to slide in three directions, whereas in a 2D CNN it can slide in two dimensions. The model is based on the work published in [A Closer Look at Spatiotemporal Convolutions for Action Recognition](https://arxiv.org/abs/1711.11248v3) by D. Tran et al. (2017).  In this tutorial, you will:\n",
+        "\n",
+        "* Build an input pipeline\n",
+        "* Build a 3D convolutional neural network model with residual connections using Keras functional API\n",
+        "* Train the model\n",
+        "* Evaluate and test the model\n",
+        "\n",
+        "This video classification tutorial is the second part in a series of TensorFlow video tutorials. Here are the other three tutorials:\n",
+        "\n",
+        "- [Load video data](https://www.tensorflow.org/tutorials/load_data/video): This tutorial explains much of the code used in this document.\n",
+        "- [MoViNet for streaming action recognition](https://www.tensorflow.org/hub/tutorials/movinet): Get familiar with the MoViNet models that are available on TF Hub.\n",
+        "- [Transfer learning for video classification with MoViNet](https://www.tensorflow.org/tutorials/video/transfer_learning_with_movinet): This tutorial explains how to use a pre-trained video classification model trained on a different dataset with the UCF-101 dataset."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_Ih_df2q0kw4"
+      },
+      "source": [
+        "## Setup\n",
+        "\n",
+        "Begin by installing and importing some necessary libraries, including:\n",
+        "[remotezip](https://github.com/gtsystem/python-remotezip) to inspect the contents of a ZIP file, [tqdm](https://github.com/tqdm/tqdm) to use a progress bar, [OpenCV](https://opencv.org/) to process video files, [einops](https://github.com/arogozhnikov/einops/tree/master/docs) for performing more complex tensor operations, and [`tensorflow_docs`](https://github.com/tensorflow/docs/tree/master/tools/tensorflow_docs) for embedding data in a Jupyter notebook."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "KEbL4Mwi01PV"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install remotezip tqdm opencv-python einops \n",
+        "!pip install -U tensorflow keras"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "gg0otuqb0hIf"
+      },
+      "outputs": [],
+      "source": [
+        "import tqdm\n",
+        "import random\n",
+        "import pathlib\n",
+        "import itertools\n",
+        "import collections\n",
+        "\n",
+        "import cv2\n",
+        "import einops\n",
+        "import numpy as np\n",
+        "import remotezip as rz\n",
+        "import seaborn as sns\n",
+        "import matplotlib.pyplot as plt\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "import keras\n",
+        "from keras import layers"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ctk9A57-6ABq"
+      },
+      "source": [
+        "## Load and preprocess video data\n",
+        "\n",
+        "The hidden cell below defines helper functions to download a slice of data from the UCF-101 dataset, and load it into a `tf.data.Dataset`. You can learn more about the specific preprocessing steps in the [Loading video data tutorial](../load_data/video.ipynb), which walks you through this code in more detail.\n",
+        "\n",
+        "The `FrameGenerator` class at the end of the hidden block is the most important utility here. It creates an iterable object that can feed data into the TensorFlow data pipeline. Specifically, this class contains a Python generator that loads the video frames along with its encoded label. The generator (`__call__`) function yields the frame array produced by `frames_from_video_file` and a one-hot encoded vector of the label associated with the set of frames."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "nB2aOTU35r9_"
+      },
+      "outputs": [],
+      "source": [
+        "#@title\n",
+        "\n",
+        "def list_files_per_class(zip_url):\n",
+        "  \"\"\"\n",
+        "    List the files in each class of the dataset given the zip URL.\n",
+        "\n",
+        "    Args:\n",
+        "      zip_url: URL from which the files can be unzipped. \n",
+        "\n",
+        "    Return:\n",
+        "      files: List of files in each of the classes.\n",
+        "  \"\"\"\n",
+        "  files = []\n",
+        "  with rz.RemoteZip(URL) as zip:\n",
+        "    for zip_info in zip.infolist():\n",
+        "      files.append(zip_info.filename)\n",
+        "  return files\n",
+        "\n",
+        "def get_class(fname):\n",
+        "  \"\"\"\n",
+        "    Retrieve the name of the class given a filename.\n",
+        "\n",
+        "    Args:\n",
+        "      fname: Name of the file in the UCF101 dataset.\n",
+        "\n",
+        "    Return:\n",
+        "      Class that the file belongs to.\n",
+        "  \"\"\"\n",
+        "  return fname.split('_')[-3]\n",
+        "\n",
+        "def get_files_per_class(files):\n",
+        "  \"\"\"\n",
+        "    Retrieve the files that belong to each class. \n",
+        "\n",
+        "    Args:\n",
+        "      files: List of files in the dataset.\n",
+        "\n",
+        "    Return:\n",
+        "      Dictionary of class names (key) and files (values).\n",
+        "  \"\"\"\n",
+        "  files_for_class = collections.defaultdict(list)\n",
+        "  for fname in files:\n",
+        "    class_name = get_class(fname)\n",
+        "    files_for_class[class_name].append(fname)\n",
+        "  return files_for_class\n",
+        "\n",
+        "def download_from_zip(zip_url, to_dir, file_names):\n",
+        "  \"\"\"\n",
+        "    Download the contents of the zip file from the zip URL.\n",
+        "\n",
+        "    Args:\n",
+        "      zip_url: Zip URL containing data.\n",
+        "      to_dir: Directory to download data to.\n",
+        "      file_names: Names of files to download.\n",
+        "  \"\"\"\n",
+        "  with rz.RemoteZip(zip_url) as zip:\n",
+        "    for fn in tqdm.tqdm(file_names):\n",
+        "      class_name = get_class(fn)\n",
+        "      zip.extract(fn, str(to_dir / class_name))\n",
+        "      unzipped_file = to_dir / class_name / fn\n",
+        "\n",
+        "      fn = pathlib.Path(fn).parts[-1]\n",
+        "      output_file = to_dir / class_name / fn\n",
+        "      unzipped_file.rename(output_file,)\n",
+        "\n",
+        "def split_class_lists(files_for_class, count):\n",
+        "  \"\"\"\n",
+        "    Returns the list of files belonging to a subset of data as well as the remainder of\n",
+        "    files that need to be downloaded.\n",
+        "    \n",
+        "    Args:\n",
+        "      files_for_class: Files belonging to a particular class of data.\n",
+        "      count: Number of files to download.\n",
+        "\n",
+        "    Return:\n",
+        "      split_files: Files belonging to the subset of data.\n",
+        "      remainder: Dictionary of the remainder of files that need to be downloaded.\n",
+        "  \"\"\"\n",
+        "  split_files = []\n",
+        "  remainder = {}\n",
+        "  for cls in files_for_class:\n",
+        "    split_files.extend(files_for_class[cls][:count])\n",
+        "    remainder[cls] = files_for_class[cls][count:]\n",
+        "  return split_files, remainder\n",
+        "\n",
+        "def download_ufc_101_subset(zip_url, num_classes, splits, download_dir):\n",
+        "  \"\"\"\n",
+        "    Download a subset of the UFC101 dataset and split them into various parts, such as\n",
+        "    training, validation, and test. \n",
+        "\n",
+        "    Args:\n",
+        "      zip_url: Zip URL containing data.\n",
+        "      num_classes: Number of labels.\n",
+        "      splits: Dictionary specifying the training, validation, test, etc. (key) division of data \n",
+        "              (value is number of files per split).\n",
+        "      download_dir: Directory to download data to.\n",
+        "\n",
+        "    Return:\n",
+        "      dir: Posix path of the resulting directories containing the splits of data.\n",
+        "  \"\"\"\n",
+        "  files = list_files_per_class(zip_url)\n",
+        "  for f in files:\n",
+        "    tokens = f.split('/')\n",
+        "    if len(tokens) <= 2:\n",
+        "      files.remove(f) # Remove that item from the list if it does not have a filename\n",
+        "  \n",
+        "  files_for_class = get_files_per_class(files)\n",
+        "\n",
+        "  classes = list(files_for_class.keys())[:num_classes]\n",
+        "\n",
+        "  for cls in classes:\n",
+        "    new_files_for_class = files_for_class[cls]\n",
+        "    random.shuffle(new_files_for_class)\n",
+        "    files_for_class[cls] = new_files_for_class\n",
+        "    \n",
+        "  # Only use the number of classes you want in the dictionary\n",
+        "  files_for_class = {x: files_for_class[x] for x in list(files_for_class)[:num_classes]}\n",
+        "\n",
+        "  dirs = {}\n",
+        "  for split_name, split_count in splits.items():\n",
+        "    print(split_name, \":\")\n",
+        "    split_dir = download_dir / split_name\n",
+        "    split_files, files_for_class = split_class_lists(files_for_class, split_count)\n",
+        "    download_from_zip(zip_url, split_dir, split_files)\n",
+        "    dirs[split_name] = split_dir\n",
+        "\n",
+        "  return dirs\n",
+        "\n",
+        "def format_frames(frame, output_size):\n",
+        "  \"\"\"\n",
+        "    Pad and resize an image from a video.\n",
+        "    \n",
+        "    Args:\n",
+        "      frame: Image that needs to resized and padded. \n",
+        "      output_size: Pixel size of the output frame image.\n",
+        "\n",
+        "    Return:\n",
+        "      Formatted frame with padding of specified output size.\n",
+        "  \"\"\"\n",
+        "  frame = tf.image.convert_image_dtype(frame, tf.float32)\n",
+        "  frame = tf.image.resize_with_pad(frame, *output_size)\n",
+        "  return frame\n",
+        "\n",
+        "def frames_from_video_file(video_path, n_frames, output_size = (224,224), frame_step = 15):\n",
+        "  \"\"\"\n",
+        "    Creates frames from each video file present for each category.\n",
+        "\n",
+        "    Args:\n",
+        "      video_path: File path to the video.\n",
+        "      n_frames: Number of frames to be created per video file.\n",
+        "      output_size: Pixel size of the output frame image.\n",
+        "\n",
+        "    Return:\n",
+        "      An NumPy array of frames in the shape of (n_frames, height, width, channels).\n",
+        "  \"\"\"\n",
+        "  # Read each video frame by frame\n",
+        "  result = []\n",
+        "  src = cv2.VideoCapture(str(video_path))  \n",
+        "\n",
+        "  video_length = src.get(cv2.CAP_PROP_FRAME_COUNT)\n",
+        "\n",
+        "  need_length = 1 + (n_frames - 1) * frame_step\n",
+        "\n",
+        "  if need_length > video_length:\n",
+        "    start = 0\n",
+        "  else:\n",
+        "    max_start = video_length - need_length\n",
+        "    start = random.randint(0, max_start + 1)\n",
+        "\n",
+        "  src.set(cv2.CAP_PROP_POS_FRAMES, start)\n",
+        "  # ret is a boolean indicating whether read was successful, frame is the image itself\n",
+        "  ret, frame = src.read()\n",
+        "  result.append(format_frames(frame, output_size))\n",
+        "\n",
+        "  for _ in range(n_frames - 1):\n",
+        "    for _ in range(frame_step):\n",
+        "      ret, frame = src.read()\n",
+        "    if ret:\n",
+        "      frame = format_frames(frame, output_size)\n",
+        "      result.append(frame)\n",
+        "    else:\n",
+        "      result.append(np.zeros_like(result[0]))\n",
+        "  src.release()\n",
+        "  result = np.array(result)[..., [2, 1, 0]]\n",
+        "\n",
+        "  return result\n",
+        "\n",
+        "class FrameGenerator:\n",
+        "  def __init__(self, path, n_frames, training = False):\n",
+        "    \"\"\" Returns a set of frames with their associated label. \n",
+        "\n",
+        "      Args:\n",
+        "        path: Video file paths.\n",
+        "        n_frames: Number of frames. \n",
+        "        training: Boolean to determine if training dataset is being created.\n",
+        "    \"\"\"\n",
+        "    self.path = path\n",
+        "    self.n_frames = n_frames\n",
+        "    self.training = training\n",
+        "    self.class_names = sorted(set(p.name for p in self.path.iterdir() if p.is_dir()))\n",
+        "    self.class_ids_for_name = dict((name, idx) for idx, name in enumerate(self.class_names))\n",
+        "\n",
+        "  def get_files_and_class_names(self):\n",
+        "    video_paths = list(self.path.glob('*/*.avi'))\n",
+        "    classes = [p.parent.name for p in video_paths] \n",
+        "    return video_paths, classes\n",
+        "\n",
+        "  def __call__(self):\n",
+        "    video_paths, classes = self.get_files_and_class_names()\n",
+        "\n",
+        "    pairs = list(zip(video_paths, classes))\n",
+        "\n",
+        "    if self.training:\n",
+        "      random.shuffle(pairs)\n",
+        "\n",
+        "    for path, name in pairs:\n",
+        "      video_frames = frames_from_video_file(path, self.n_frames) \n",
+        "      label = self.class_ids_for_name[name] # Encode labels\n",
+        "      yield video_frames, label"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "OYY7PkdJFM4Z"
+      },
+      "outputs": [],
+      "source": [
+        "URL = 'https://storage.googleapis.com/thumos14_files/UCF101_videos.zip'\n",
+        "download_dir = pathlib.Path('./UCF101_subset/')\n",
+        "subset_paths = download_ufc_101_subset(URL, \n",
+        "                        num_classes = 10, \n",
+        "                        splits = {\"train\": 30, \"val\": 10, \"test\": 10},\n",
+        "                        download_dir = download_dir)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "C0O3ttIzpFZJ"
+      },
+      "source": [
+        "Create the training, validation, and test sets (`train_ds`, `val_ds`, and `test_ds`)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "lq86IyGDJjTX"
+      },
+      "outputs": [],
+      "source": [
+        "n_frames = 10\n",
+        "batch_size = 8\n",
+        "\n",
+        "output_signature = (tf.TensorSpec(shape = (None, None, None, 3), dtype = tf.float32),\n",
+        "                    tf.TensorSpec(shape = (), dtype = tf.int16))\n",
+        "\n",
+        "train_ds = tf.data.Dataset.from_generator(FrameGenerator(subset_paths['train'], n_frames, training=True),\n",
+        "                                          output_signature = output_signature)\n",
+        "\n",
+        "\n",
+        "# Batch the data\n",
+        "train_ds = train_ds.batch(batch_size)\n",
+        "\n",
+        "val_ds = tf.data.Dataset.from_generator(FrameGenerator(subset_paths['val'], n_frames),\n",
+        "                                        output_signature = output_signature)\n",
+        "val_ds = val_ds.batch(batch_size)\n",
+        "\n",
+        "test_ds = tf.data.Dataset.from_generator(FrameGenerator(subset_paths['test'], n_frames),\n",
+        "                                         output_signature = output_signature)\n",
+        "\n",
+        "test_ds = test_ds.batch(batch_size)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nzogoGA4pQW0"
+      },
+      "source": [
+        "## Create the model\n",
+        "\n",
+        "The following 3D convolutional neural network model is based off the paper [A Closer Look at Spatiotemporal Convolutions for Action Recognition](https://arxiv.org/abs/1711.11248v3) by D. Tran et al. (2017). The paper compares several versions of 3D ResNets. Instead of operating on a single image with dimensions `(height, width)`, like standard ResNets, these operate on video volume `(time, height, width)`. The most obvious approach to this problem would be replace each 2D convolution (`layers.Conv2D`) with a 3D convolution (`layers.Conv3D`).\n",
+        "\n",
+        "This tutorial uses a (2 + 1)D convolution with [residual connections](https://arxiv.org/abs/1512.03385). The (2 + 1)D convolution allows for the decomposition of the spatial and temporal dimensions, therefore creating two separate steps. An advantage of this approach is that factorizing the convolutions into spatial and temporal dimensions saves parameters. \n",
+        "\n",
+        "For each output location a 3D convolution combines all the vectors from a 3D patch of the volume to create one vector in the output volume.\n",
+        "\n",
+        "![3D convolutions](https://www.tensorflow.org/images/tutorials/video/3DCNN.png)\n",
+        "\n",
+        "This operation is takes `time * height * width * channels` inputs and produces `channels` outputs (assuming the number of input and output channels are the same. So a 3D convolution layer with a kernel size of `(3 x 3 x 3)` would need a weight-matrix with `27 * channels ** 2` entries. The reference paper found that a more effective & efficient approach was to factorize the convolution. Instead of a single 3D convolution to process the time and space dimensions, they proposed a \\\"(2+1)D\\\" convolution which processes the space and time dimensions separately. The figure below shows the factored spatial and temporal convolutions of a (2 + 1)D convolution.\n",
+        "\n",
+        "![(2+1)D convolutions](https://www.tensorflow.org/images/tutorials/video/2plus1CNN.png)\n",
+        "\n",
+        "The main advantage of this approach is that it reduces the number of parameters. In the (2 + 1)D convolution the spatial convolution takes in data of the shape `(1, width, height)`, while the temporal convolution takes in data of the shape `(time, 1, 1)`. For example, a (2 + 1)D convolution with kernel size `(3 x 3 x 3)` would need weight matrices of size `(9 * channels**2) + (3 * channels**2)`, less than half as many as the full 3D convolution. This tutorial implements (2 + 1)D ResNet18, where each convolution in the resnet is replaced by a (2+1)D convolution."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "GZcB_7dg-EZJ"
+      },
+      "outputs": [],
+      "source": [
+        "# Define the dimensions of one frame in the set of frames created\n",
+        "HEIGHT = 224\n",
+        "WIDTH = 224"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "yD_sDIBlNu7K"
+      },
+      "outputs": [],
+      "source": [
+        "class Conv2Plus1D(keras.layers.Layer):\n",
+        "  def __init__(self, filters, kernel_size, padding):\n",
+        "    \"\"\"\n",
+        "      A sequence of convolutional layers that first apply the convolution operation over the\n",
+        "      spatial dimensions, and then the temporal dimension. \n",
+        "    \"\"\"\n",
+        "    super().__init__()\n",
+        "    self.seq = keras.Sequential([  \n",
+        "        # Spatial decomposition\n",
+        "        layers.Conv3D(filters=filters,\n",
+        "                      kernel_size=(1, kernel_size[1], kernel_size[2]),\n",
+        "                      padding=padding),\n",
+        "        # Temporal decomposition\n",
+        "        layers.Conv3D(filters=filters, \n",
+        "                      kernel_size=(kernel_size[0], 1, 1),\n",
+        "                      padding=padding)\n",
+        "        ])\n",
+        "  \n",
+        "  def call(self, x):\n",
+        "    return self.seq(x)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "I-fCAddqEORZ"
+      },
+      "source": [
+        "A ResNet model is made from a sequence of residual blocks.\n",
+        "A residual block has two branches. The main branch performs the calculation, but is difficult for gradients to flow through.\n",
+        "The residual branch bypasses the main calculation and mostly just adds the input to the output of the main branch.\n",
+        "Gradients flow easily through this branch.\n",
+        "Therefore, an easy path from the loss function to any of the residual block's main branch will be present.\n",
+        "This avoids the vanishing gradient problem.\n",
+        "\n",
+        "Create the main branch of the residual block with the following class. In contrast to the standard ResNet structure this uses the custom `Conv2Plus1D` layer instead of `layers.Conv2D`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "tjxAKHwn6mTJ"
+      },
+      "outputs": [],
+      "source": [
+        "class ResidualMain(keras.layers.Layer):\n",
+        "  \"\"\"\n",
+        "    Residual block of the model with convolution, layer normalization, and the\n",
+        "    activation function, ReLU.\n",
+        "  \"\"\"\n",
+        "  def __init__(self, filters, kernel_size):\n",
+        "    super().__init__()\n",
+        "    self.seq = keras.Sequential([\n",
+        "        Conv2Plus1D(filters=filters,\n",
+        "                    kernel_size=kernel_size,\n",
+        "                    padding='same'),\n",
+        "        layers.LayerNormalization(),\n",
+        "        layers.ReLU(),\n",
+        "        Conv2Plus1D(filters=filters, \n",
+        "                    kernel_size=kernel_size,\n",
+        "                    padding='same'),\n",
+        "        layers.LayerNormalization()\n",
+        "    ])\n",
+        "    \n",
+        "  def call(self, x):\n",
+        "    return self.seq(x)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "CevmZ9qsdpWC"
+      },
+      "source": [
+        "To add the residual branch to the main branch it needs to have the same size. The `Project` layer below deals with cases where the number of channels is changed on the branch. In particular, a sequence of densely-connected layer followed by normalization is added.  "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "znrk5BrL6kuq"
+      },
+      "outputs": [],
+      "source": [
+        "class Project(keras.layers.Layer):\n",
+        "  \"\"\"\n",
+        "    Project certain dimensions of the tensor as the data is passed through different \n",
+        "    sized filters and downsampled. \n",
+        "  \"\"\"\n",
+        "  def __init__(self, units):\n",
+        "    super().__init__()\n",
+        "    self.seq = keras.Sequential([\n",
+        "        layers.Dense(units),\n",
+        "        layers.LayerNormalization()\n",
+        "    ])\n",
+        "\n",
+        "  def call(self, x):\n",
+        "    return self.seq(x)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "S8zycXGvfnak"
+      },
+      "source": [
+        "Use `add_residual_block` to introduce a skip connection between the layers of the model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "urjVgqvw-TlB"
+      },
+      "outputs": [],
+      "source": [
+        "def add_residual_block(input, filters, kernel_size):\n",
+        "  \"\"\"\n",
+        "    Add residual blocks to the model. If the last dimensions of the input data\n",
+        "    and filter size does not match, project it such that last dimension matches.\n",
+        "  \"\"\"\n",
+        "  out = ResidualMain(filters, \n",
+        "                     kernel_size)(input)\n",
+        "  \n",
+        "  res = input\n",
+        "  # Using the Keras functional APIs, project the last dimension of the tensor to\n",
+        "  # match the new filter size\n",
+        "  if out.shape[-1] != input.shape[-1]:\n",
+        "    res = Project(out.shape[-1])(res)\n",
+        "\n",
+        "  return layers.add([res, out])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bozog_0hFKrD"
+      },
+      "source": [
+        "Resizing the video is necessary to perform downsampling of the data. In particular, downsampling the video frames allow for the model to examine specific parts of frames to detect patterns that may be specific to a certain action. Through downsampling, non-essential information can be discarded. Moreoever, resizing the video will allow for dimensionality reduction and therefore faster processing through the model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "lQOWuc2I-QqK"
+      },
+      "outputs": [],
+      "source": [
+        "class ResizeVideo(keras.layers.Layer):\n",
+        "  def __init__(self, height, width):\n",
+        "    super().__init__()\n",
+        "    self.height = height\n",
+        "    self.width = width\n",
+        "    self.resizing_layer = layers.Resizing(self.height, self.width)\n",
+        "\n",
+        "  def call(self, video):\n",
+        "    \"\"\"\n",
+        "      Use the einops library to resize the tensor.  \n",
+        "      \n",
+        "      Args:\n",
+        "        video: Tensor representation of the video, in the form of a set of frames.\n",
+        "      \n",
+        "      Return:\n",
+        "        A downsampled size of the video according to the new height and width it should be resized to.\n",
+        "    \"\"\"\n",
+        "    # b stands for batch size, t stands for time, h stands for height, \n",
+        "    # w stands for width, and c stands for the number of channels.\n",
+        "    old_shape = einops.parse_shape(video, 'b t h w c')\n",
+        "    images = einops.rearrange(video, 'b t h w c -> (b t) h w c')\n",
+        "    images = self.resizing_layer(images)\n",
+        "    videos = einops.rearrange(\n",
+        "        images, '(b t) h w c -> b t h w c',\n",
+        "        t = old_shape['t'])\n",
+        "    return videos"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Z9IqzCq--Uu9"
+      },
+      "source": [
+        "Use the [Keras functional API](https://www.tensorflow.org/guide/keras/functional) to build the residual network."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "_bROfh_K-Wxs"
+      },
+      "outputs": [],
+      "source": [
+        "input_shape = (None, 10, HEIGHT, WIDTH, 3)\n",
+        "input = layers.Input(shape=(input_shape[1:]))\n",
+        "x = input\n",
+        "\n",
+        "x = Conv2Plus1D(filters=16, kernel_size=(3, 7, 7), padding='same')(x)\n",
+        "x = layers.BatchNormalization()(x)\n",
+        "x = layers.ReLU()(x)\n",
+        "x = ResizeVideo(HEIGHT // 2, WIDTH // 2)(x)\n",
+        "\n",
+        "# Block 1\n",
+        "x = add_residual_block(x, 16, (3, 3, 3))\n",
+        "x = ResizeVideo(HEIGHT // 4, WIDTH // 4)(x)\n",
+        "\n",
+        "# Block 2\n",
+        "x = add_residual_block(x, 32, (3, 3, 3))\n",
+        "x = ResizeVideo(HEIGHT // 8, WIDTH // 8)(x)\n",
+        "\n",
+        "# Block 3\n",
+        "x = add_residual_block(x, 64, (3, 3, 3))\n",
+        "x = ResizeVideo(HEIGHT // 16, WIDTH // 16)(x)\n",
+        "\n",
+        "# Block 4\n",
+        "x = add_residual_block(x, 128, (3, 3, 3))\n",
+        "\n",
+        "x = layers.GlobalAveragePooling3D()(x)\n",
+        "x = layers.Flatten()(x)\n",
+        "x = layers.Dense(10)(x)\n",
+        "\n",
+        "model = keras.Model(input, x)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "TiO0WylG-ZHM"
+      },
+      "outputs": [],
+      "source": [
+        "frames, label = next(iter(train_ds))\n",
+        "model.build(frames)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "GAsKrM8r-bKM"
+      },
+      "outputs": [],
+      "source": [
+        "# Visualize the model\n",
+        "keras.utils.plot_model(model, expand_nested=True, dpi=60, show_shapes=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "1yvJJPnY-dMP"
+      },
+      "source": [
+        "## Train the model\n",
+        "\n",
+        "For this tutorial, choose the `tf.keras.optimizers.Adam` optimizer and the `tf.keras.losses.SparseCategoricalCrossentropy` loss function. Use the `metrics` argument to the view the accuracy of the model performance at every step."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ejrbyebDp2tA"
+      },
+      "outputs": [],
+      "source": [
+        "model.compile(loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True), \n",
+        "              optimizer = keras.optimizers.Adam(learning_rate = 0.0001), \n",
+        "              metrics = ['accuracy'])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nZT1Xlx9stP2"
+      },
+      "source": [
+        "Train the model for 50 epoches with the Keras `Model.fit` method.\n",
+        "\n",
+        "Note: This example model is trained on fewer data points (300 training and 100 validation examples) to keep training time reasonable for this tutorial. Moreover, this example model may take over one hour to train."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "VMrMUl2hOqMs"
+      },
+      "outputs": [],
+      "source": [
+        "history = model.fit(x = train_ds,\n",
+        "                    epochs = 50, \n",
+        "                    validation_data = val_ds)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KKUfMNVns2hu"
+      },
+      "source": [
+        "### Visualize the results\n",
+        "\n",
+        "Create plots of the loss and accuracy on the training and validation sets:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Cd5tpNrtOrs7"
+      },
+      "outputs": [],
+      "source": [
+        "def plot_history(history):\n",
+        "  \"\"\"\n",
+        "    Plotting training and validation learning curves.\n",
+        "\n",
+        "    Args:\n",
+        "      history: model history with all the metric measures\n",
+        "  \"\"\"\n",
+        "  fig, (ax1, ax2) = plt.subplots(2)\n",
+        "\n",
+        "  fig.set_size_inches(18.5, 10.5)\n",
+        "\n",
+        "  # Plot loss\n",
+        "  ax1.set_title('Loss')\n",
+        "  ax1.plot(history.history['loss'], label = 'train')\n",
+        "  ax1.plot(history.history['val_loss'], label = 'test')\n",
+        "  ax1.set_ylabel('Loss')\n",
+        "  \n",
+        "  # Determine upper bound of y-axis\n",
+        "  max_loss = max(history.history['loss'] + history.history['val_loss'])\n",
+        "\n",
+        "  ax1.set_ylim([0, np.ceil(max_loss)])\n",
+        "  ax1.set_xlabel('Epoch')\n",
+        "  ax1.legend(['Train', 'Validation']) \n",
+        "\n",
+        "  # Plot accuracy\n",
+        "  ax2.set_title('Accuracy')\n",
+        "  ax2.plot(history.history['accuracy'],  label = 'train')\n",
+        "  ax2.plot(history.history['val_accuracy'], label = 'test')\n",
+        "  ax2.set_ylabel('Accuracy')\n",
+        "  ax2.set_ylim([0, 1])\n",
+        "  ax2.set_xlabel('Epoch')\n",
+        "  ax2.legend(['Train', 'Validation'])\n",
+        "\n",
+        "  plt.show()\n",
+        "\n",
+        "plot_history(history)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "EJrGF0Sss8E0"
+      },
+      "source": [
+        "## Evaluate the model\n",
+        "\n",
+        "Use Keras `Model.evaluate` to get the loss and accuracy on the test dataset. \n",
+        "\n",
+        "Note: The example model in this tutorial uses a subset of the UCF101 dataset to keep training time reasonable. The accuracy and loss can be improved with further hyperparameter tuning or more training data. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Hev0hMCxOtfy"
+      },
+      "outputs": [],
+      "source": [
+        "model.evaluate(test_ds, return_dict=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-F73GxD1-yc8"
+      },
+      "source": [
+        "To visualize model performance further, use a [confusion matrix](https://www.tensorflow.org/api_docs/python/tf/math/confusion_matrix). The confusion matrix allows you to assess the performance of the classification model beyond accuracy. In order to build the confusion matrix for this multi-class classification problem, get the actual values in the test set and the predicted values. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Yw-6rG5V-0L-"
+      },
+      "outputs": [],
+      "source": [
+        "def get_actual_predicted_labels(dataset): \n",
+        "  \"\"\"\n",
+        "    Create a list of actual ground truth values and the predictions from the model.\n",
+        "\n",
+        "    Args:\n",
+        "      dataset: An iterable data structure, such as a TensorFlow Dataset, with features and labels.\n",
+        "\n",
+        "    Return:\n",
+        "      Ground truth and predicted values for a particular dataset.\n",
+        "  \"\"\"\n",
+        "  actual = [labels for _, labels in dataset.unbatch()]\n",
+        "  predicted = model.predict(dataset)\n",
+        "\n",
+        "  actual = tf.stack(actual, axis=0)\n",
+        "  predicted = tf.concat(predicted, axis=0)\n",
+        "  predicted = tf.argmax(predicted, axis=1)\n",
+        "\n",
+        "  return actual, predicted"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "aln6qWW_-2dk"
+      },
+      "outputs": [],
+      "source": [
+        "def plot_confusion_matrix(actual, predicted, labels, ds_type):\n",
+        "  cm = tf.math.confusion_matrix(actual, predicted)\n",
+        "  ax = sns.heatmap(cm, annot=True, fmt='g')\n",
+        "  sns.set(rc={'figure.figsize':(12, 12)})\n",
+        "  sns.set(font_scale=1.4)\n",
+        "  ax.set_title('Confusion matrix of action recognition for ' + ds_type)\n",
+        "  ax.set_xlabel('Predicted Action')\n",
+        "  ax.set_ylabel('Actual Action')\n",
+        "  plt.xticks(rotation=90)\n",
+        "  plt.yticks(rotation=0)\n",
+        "  ax.xaxis.set_ticklabels(labels)\n",
+        "  ax.yaxis.set_ticklabels(labels)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "tfQ3VAGd-4Az"
+      },
+      "outputs": [],
+      "source": [
+        "fg = FrameGenerator(subset_paths['train'], n_frames, training=True)\n",
+        "labels = list(fg.class_ids_for_name.keys())"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "1ucGpbiA-5qi"
+      },
+      "outputs": [],
+      "source": [
+        "actual, predicted = get_actual_predicted_labels(train_ds)\n",
+        "plot_confusion_matrix(actual, predicted, labels, 'training')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Mfr7AT5T-7ZD"
+      },
+      "outputs": [],
+      "source": [
+        "actual, predicted = get_actual_predicted_labels(test_ds)\n",
+        "plot_confusion_matrix(actual, predicted, labels, 'test')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "FefzeIZz-9aI"
+      },
+      "source": [
+        "The precision and recall values for each class can also be calculated using a confusion matrix."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dq95-56Z-_E2"
+      },
+      "outputs": [],
+      "source": [
+        "def calculate_classification_metrics(y_actual, y_pred, labels):\n",
+        "  \"\"\"\n",
+        "    Calculate the precision and recall of a classification model using the ground truth and\n",
+        "    predicted values. \n",
+        "\n",
+        "    Args:\n",
+        "      y_actual: Ground truth labels.\n",
+        "      y_pred: Predicted labels.\n",
+        "      labels: List of classification labels.\n",
+        "\n",
+        "    Return:\n",
+        "      Precision and recall measures.\n",
+        "  \"\"\"\n",
+        "  cm = tf.math.confusion_matrix(y_actual, y_pred)\n",
+        "  tp = np.diag(cm) # Diagonal represents true positives\n",
+        "  precision = dict()\n",
+        "  recall = dict()\n",
+        "  for i in range(len(labels)):\n",
+        "    col = cm[:, i]\n",
+        "    fp = np.sum(col) - tp[i] # Sum of column minus true positive is false negative\n",
+        "    \n",
+        "    row = cm[i, :]\n",
+        "    fn = np.sum(row) - tp[i] # Sum of row minus true positive, is false negative\n",
+        "    \n",
+        "    precision[labels[i]] = tp[i] / (tp[i] + fp) # Precision \n",
+        "    \n",
+        "    recall[labels[i]] = tp[i] / (tp[i] + fn) # Recall\n",
+        "  \n",
+        "  return precision, recall"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "4jSEonYQ_BZt"
+      },
+      "outputs": [],
+      "source": [
+        "precision, recall = calculate_classification_metrics(actual, predicted, labels) # Test dataset"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "hXvTW1Df_DV8"
+      },
+      "outputs": [],
+      "source": [
+        "precision"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "be1yrQl5_EYF"
+      },
+      "outputs": [],
+      "source": [
+        "recall"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "d4WsP4Z2HZ6L"
+      },
+      "source": [
+        "## Next steps\n",
+        "\n",
+        "To learn more about working with video data in TensorFlow, check out the following tutorials:\n",
+        "\n",
+        "* [Load video data](https://www.tensorflow.org/tutorials/load_data/video)\n",
+        "* [MoViNet for streaming action recognition](https://www.tensorflow.org/hub/tutorials/movinet)\n",
+        "* [Transfer learning for video classification with MoViNet](https://www.tensorflow.org/tutorials/video/transfer_learning_with_movinet)"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "name": "video_classification.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/site/en/xla/README.md b/site/en/xla/README.md
deleted file mode 100644
index f7eda17bc22..00000000000
--- a/site/en/xla/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-Welcome to the warp zone!
-
-# XLA: Accelerated Linear Algebra
-
-These docs are available here: https://github.com/tensorflow/tensorflow/tree/master/tensorflow/compiler/xla/g3doc
diff --git a/tools/templates/build_docs.py b/tools/templates/build_docs.py
index bb034ac81f5..f0d24a1386f 100644
--- a/tools/templates/build_docs.py
+++ b/tools/templates/build_docs.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -26,28 +25,28 @@
 from absl import app
 from absl import flags
 
-import tensorflow_docs
+import tensorflow_docs.api_generator
 from tensorflow_docs.api_generator import generate_lib
 from tensorflow_docs.api_generator import public_api
 
 PROJECT_SHORT_NAME = 'tfdocs'
 PROJECT_FULL_NAME = 'TensorFlow Docs'
 
-FLAGS = flags.FLAGS
-
-flags.DEFINE_string(
+_OUTPUT_DIR = flags.DEFINE_string(
     'output_dir',
     default='/tmp/generated_docs',
     help='Where to write the resulting docs to.')
-flags.DEFINE_string('code_url_prefix',
-                    ('https://github.com/tensorflow/docs/tree/master/tools/tensorflow_docs'),
-                    'The url prefix for links to code.')
 
-flags.DEFINE_bool('search_hints', True,
-                  'Include metadata search hints in the generated files')
+_URL_PREFIX = flags.DEFINE_string(
+    'code_url_prefix', 'https://github.com/tensorflow/docs/tree/master/tools/tensorflow_docs',
+    'The url prefix for links to code.')
+
+_SEARCH_HINTS = flags.DEFINE_bool(
+    'search_hints', True,
+    'Include metadata search hints in the generated files')
 
-flags.DEFINE_string('site_path', '/api_docs/python',
-                    'Path prefix in the _toc.yaml')
+_SITE_PATH = flags.DEFINE_string('site_path', '/api_docs/python',
+                                 'Path prefix in the _toc.yaml')
 
 
 def gen_api_docs():
@@ -56,33 +55,37 @@ def gen_api_docs():
   # The below `del`'s are to avoid the api_gen_test to not document these.
   # Please remove these lines from your build_docs.py files when you create
   # them.
-  del tensorflow_docs.google
-  del tensorflow_docs.api_generator.report.schema
+  try:
+    del tensorflow_docs.google
+  except AttributeError:
+    pass
+
+  try:
+    del tensorflow_docs.api_generator.report.schema
+  except AttributeError:
+    pass
 
   doc_generator = generate_lib.DocGenerator(
       root_title=PROJECT_FULL_NAME,
-      # Replace `tensorflow_docs` with your module, here.
-      py_modules=[(PROJECT_SHORT_NAME, tensorflow_docs)],
-      # Replace `tensorflow_docs` with your module, here.
-      base_dir=os.path.dirname(tensorflow_docs.__file__),
-      code_url_prefix=FLAGS.code_url_prefix,
-      search_hints=FLAGS.search_hints,
-      site_path=FLAGS.site_path,
+      # Replace `tensorflow_docs.api_generator` with your module, here.
+      py_modules=[(PROJECT_SHORT_NAME, tensorflow_docs.api_generator)],
+      # Replace `tensorflow_docs.api_generator` with your module, here.
+      base_dir=os.path.dirname(tensorflow_docs.api_generator.__file__),
+      code_url_prefix=_URL_PREFIX.value,
+      search_hints=_SEARCH_HINTS.value,
+      site_path=_SITE_PATH.value,
       # This callback ensures that docs are only generated for objects that
       # are explicitly imported in your __init__.py files. There are other
       # options but this is a good starting point.
       callbacks=[public_api.explicit_package_contents_filter],
   )
 
-  doc_generator.build(FLAGS.output_dir)
-
-  print('Output docs to: ', FLAGS.output_dir)
+  doc_generator.build(_OUTPUT_DIR.value)
 
+  print('Output docs to: ', _OUTPUT_DIR.value)
 
-def main(argv):
-  if argv[1:]:
-    raise ValueError('Unrecognized arguments: {}'.format(argv[1:]))
 
+def main(_):
   gen_api_docs()
 
 
diff --git a/tools/templates/notebook.ipynb b/tools/templates/notebook.ipynb
index 3cf831e5b28..7a68b3f7c50 100644
--- a/tools/templates/notebook.ipynb
+++ b/tools/templates/notebook.ipynb
@@ -143,15 +143,14 @@
         "* Save the notebook with the table of contents open.\n",
         "* Use one `H1` header for the title.\n",
         "* Include the button-bar immediately after the `H1`.\n",
-        "* Avoid using `H1` headers for section titles. Use `H2` and `H3` instead.\n",
         "* Headers that are`H4` and below are not visible in the navigation\n",
         "bar of [tensorflow.org](http://www.tensorflow.org).\n",
         "* Include an overview section before any code.\n",
         "* Put all your installs and imports in a setup section.\n",
-        "* Always include the `__future__` imports.\n",
-        "* Write Python 3 compatible code.\n",
         "* Keep code and text cells as brief as possible.\n",
-        "* Avoid leaving an empty cell at the end of the notebook."
+        "* Break text cells at headings\n",
+        "* Break code cells between \"building\" and \"running\", and between \"printing one result\" and \"printing another result\".\n",
+        "* Necessary but uninteresting code (like plotting logic) should be hidden in a toggleable code cell by putting `#@title` as the first line."
       ]
     },
     {
@@ -166,6 +165,7 @@
         "* Use the [Google Python Style Guide](http://google.github.io/styleguide/pyguide.html), where applicable.\n",
         "* tensorflow.org doesn't support interactive plots.\n",
         "* Keep examples quick. Use small datasets, or small slices of datasets. Don't train to convergence, train until it's obvious it's making progress.\n",
+        "* If you define a function, run it and show us what it does before using it in another function.\n",
         "* Demonstrate small parts before combining them into something more complex, like this:"
       ]
     },
@@ -240,8 +240,7 @@
         "\n",
         "* Use the highest level API that gets the job done (unless the goal is to demonstrate the low level API).\n",
         "* Use `keras.Sequential` > keras functional api > keras model subclassing > ...\n",
-        "* Use `model.fit` > `model.train_on_batch` > manual `GradientTapes`.\n",
-        "* Use eager-style code.\n",
+        "* Use `model.fit` > `model.train_step` > manual `GradientTapes`.\n",
         "* Use `tensorflow_datasets` and `tf.data` where possible.\n",
         "* When using pre-trained models, prefer models from [tfhub.dev](https://tfhub.dev) where possible.\n",
         "* Avoid `compat.v1`."
@@ -284,7 +283,7 @@
         "Tce3stUlHN0L"
       ],
       "name": "notebook.ipynb",
-      "toc_visible": true
+            "toc_visible": true
     },
     "kernelspec": {
       "display_name": "Python 3",
diff --git a/tools/templates/subsite/g3doc/tutorials/notebook.ipynb b/tools/templates/subsite/g3doc/tutorials/notebook.ipynb
index a7a05de87cd..9a8c484a0b6 100644
--- a/tools/templates/subsite/g3doc/tutorials/notebook.ipynb
+++ b/tools/templates/subsite/g3doc/tutorials/notebook.ipynb
@@ -48,19 +48,16 @@
       "source": [
         "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
         "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/{PATH}\">\n",
-        "    <img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />\n",
-        "    View on TensorFlow.org</a>\n",
+        "    <a target=\"_blank\" href=\"https://www.tensorflow.org/{TENSORFLOW_PATH}\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n",
         "  </td>\n",
         "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/docs/blob/master/tools/templates/subsite/g3doc/tutorials/notebook.ipynb\">\n",
-        "    <img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />\n",
-        "    Run in Google Colab</a>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/{GITHUB_PATH}\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
         "  </td>\n",
         "  <td>\n",
-        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/docs/blob/master/tools/templates/subsite/g3doc/tutorials/notebook.ipynb\">\n",
-        "    <img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />\n",
-        "    View source on GitHub</a>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/{GITHUB_PATH}\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/{TENSORFLOW_PATH}.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n",
         "  </td>\n",
         "</table>"
       ]
diff --git a/tools/tensorflow_docs/__init__.py b/tools/tensorflow_docs/__init__.py
index 99f39c10a2a..b51b74966b5 100644
--- a/tools/tensorflow_docs/__init__.py
+++ b/tools/tensorflow_docs/__init__.py
@@ -13,9 +13,3 @@
 # limitations under the License.
 # ==============================================================================
 """tensorflow_docs is a package for generating python api-reference docs."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow_docs import api_generator
diff --git a/tools/tensorflow_docs/api_generator/__init__.py b/tools/tensorflow_docs/api_generator/__init__.py
index 362d330c589..325a9eb075e 100644
--- a/tools/tensorflow_docs/api_generator/__init__.py
+++ b/tools/tensorflow_docs/api_generator/__init__.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -19,7 +18,7 @@
 from tensorflow_docs.api_generator import doc_generator_visitor
 from tensorflow_docs.api_generator import generate_lib
 from tensorflow_docs.api_generator import parser
-from tensorflow_docs.api_generator import pretty_docs
+from tensorflow_docs.api_generator import toc_processing
 from tensorflow_docs.api_generator import public_api
 from tensorflow_docs.api_generator import traverse
 from tensorflow_docs.api_generator import utils
diff --git a/tools/tensorflow_docs/api_generator/compat_test/__init__.py b/tools/tensorflow_docs/api_generator/compat_test/__init__.py
index ff91b4d0c8f..dba383f4c29 100644
--- a/tools/tensorflow_docs/api_generator/compat_test/__init__.py
+++ b/tools/tensorflow_docs/api_generator/compat_test/__init__.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/tools/tensorflow_docs/api_generator/compat_test/estimator.py b/tools/tensorflow_docs/api_generator/compat_test/estimator.py
index 4df0efa24b6..0b54303ca39 100644
--- a/tools/tensorflow_docs/api_generator/compat_test/estimator.py
+++ b/tools/tensorflow_docs/api_generator/compat_test/estimator.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/tools/tensorflow_docs/api_generator/config.py b/tools/tensorflow_docs/api_generator/config.py
new file mode 100644
index 00000000000..f2c3a3daf60
--- /dev/null
+++ b/tools/tensorflow_docs/api_generator/config.py
@@ -0,0 +1,70 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""The `ParserConfig` contains the information extracted by walking the API."""
+
+class ParserConfig(object):
+  """Stores all indexes required to parse the docs."""
+
+  def __init__(
+      self,
+      *,
+      reference_resolver,
+      duplicates,
+      duplicate_of,
+      tree,
+      index,
+      reverse_index,
+      path_tree,
+      api_tree,
+      base_dir,
+      code_url_prefix,
+      self_link_base
+  ):
+    """Object with the common config for docs_for_object() calls.
+
+    Args:
+      reference_resolver: An instance of ReferenceResolver.
+      duplicates: A `dict` mapping fully qualified names to a set of all aliases
+        of this name. This is used to automatically generate a list of all
+        aliases for each name.
+      duplicate_of: A map from duplicate names to preferred names of API
+        symbols.
+      tree: A `dict` mapping a fully qualified name to the names of all its
+        members. Used to populate the members section of a class or module page.
+      index: A `dict` mapping full names to objects.
+      reverse_index: A `dict` mapping object ids to full names.
+      path_tree: A PathTree datastructure to manage all the API paths.
+      api_tree: A PathTree datastructure to manage all the API objects.
+      base_dir: A base path that is stripped from file locations written to the
+        docs.
+      code_url_prefix: A Url to pre-pend to the links to file locations.
+      self_link_base: A Url to pre-pend to self-links to the generated docs
+        pages.
+    """
+    self.reference_resolver = reference_resolver
+    self.duplicates = duplicates
+    self.duplicate_of = duplicate_of
+    self.tree = tree
+    self.reverse_index = reverse_index
+    self.index = index
+    self.path_tree = path_tree
+    self.api_tree = api_tree
+    self.base_dir = base_dir
+    self.code_url_prefix = code_url_prefix
+    self.self_link_base = self_link_base
+
+  def py_name_to_object(self, full_name):
+    """Return the Python object for a Python symbol name."""
+    return self.index[full_name]
diff --git a/tools/tensorflow_docs/api_generator/doc_controls.py b/tools/tensorflow_docs/api_generator/doc_controls.py
index 095af583dc1..ec8e0e862fd 100644
--- a/tools/tensorflow_docs/api_generator/doc_controls.py
+++ b/tools/tensorflow_docs/api_generator/doc_controls.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -36,20 +35,25 @@ def is_deprecated(obj) -> bool:
 
 def inheritable_header(text: str):
 
-  def _wrapped(cls):
-    setattr(cls, _INHERITABLE_HEADER, text)
-    return cls
+  def _wrapped(obj):
+    setattr(obj, _INHERITABLE_HEADER, text)
+    return obj
 
   return _wrapped
 
 
-def get_inheritable_header(cls) -> Optional[str]:
-  return getattr(cls, _INHERITABLE_HEADER, None)
+def get_inheritable_header(obj) -> Optional[str]:
+  return getattr(obj, _INHERITABLE_HEADER, None)
+
+
+header = inheritable_header
+get_header = get_inheritable_header
 
 
 _NO_SEARCH_HINTS = "_tf_docs_no_search_hints"
 
 
+
 def hide_from_search(obj: T) -> T:
   """Marks an object so metadata search hints will not be included on it's page.
 
@@ -73,17 +77,17 @@ def should_hide_from_search(obj) -> bool:
   return hasattr(obj, _NO_SEARCH_HINTS)
 
 
-_CUSTOM_PAGE_CONTENT = "_tf_docs_custom_page_content"
+_CUSTOM_PAGE_BUILDER_CLS = "_tf_docs_custom_page_builder_cls"
 
 
-def set_custom_page_content(obj, content):
+def set_custom_page_builder_cls(obj, cls):
   """Replace most of the generated page with custom content."""
-  setattr(obj, _CUSTOM_PAGE_CONTENT, content)
+  setattr(obj, _CUSTOM_PAGE_BUILDER_CLS, cls)
 
 
-def get_custom_page_content(obj):
+def get_custom_page_builder_cls(obj):
   """Gets custom page content if available."""
-  return getattr(obj, _CUSTOM_PAGE_CONTENT, None)
+  return getattr(obj, _CUSTOM_PAGE_BUILDER_CLS, None)
 
 
 _DO_NOT_DOC = "_tf_docs_do_not_document"
@@ -353,8 +357,8 @@ def doc_in_current_and_subclasses(obj: T) -> T:
   """Overrides `do_not_doc_in_subclasses` decorator.
 
   If this decorator is set on a child class's method whose parent's method
-  contains `do_not_doc_in_subclasses`, then that will be overriden and the
-  child method will get documented. All classes inherting from the child will
+  contains `do_not_doc_in_subclasses`, then that will be overridden and the
+  child method will get documented. All classes inheriting from the child will
   also document that method.
 
   For example:
diff --git a/tools/tensorflow_docs/api_generator/doc_controls_test.py b/tools/tensorflow_docs/api_generator/doc_controls_test.py
index ca46b64b3dd..9aeadc21915 100644
--- a/tools/tensorflow_docs/api_generator/doc_controls_test.py
+++ b/tools/tensorflow_docs/api_generator/doc_controls_test.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/tools/tensorflow_docs/api_generator/doc_generator_visitor.py b/tools/tensorflow_docs/api_generator/doc_generator_visitor.py
index b19fad51120..1c6f31eb3da 100644
--- a/tools/tensorflow_docs/api_generator/doc_generator_visitor.py
+++ b/tools/tensorflow_docs/api_generator/doc_generator_visitor.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,11 +13,22 @@
 # limitations under the License.
 # ==============================================================================
 """A `traverse` visitor for processing documentation."""
+from __future__ import annotations
 
 import collections
+import dataclasses
+import enum
 import inspect
+import logging
 
-from typing import Any, Dict, List, Optional, Tuple
+
+from typing import Any, Dict, List, Optional, NamedTuple, Sequence, Tuple
+
+from tensorflow_docs.api_generator import obj_type as obj_type_lib
+
+
+# To see the logs pass: --logger_levels=tensorflow_docs:DEBUG --alsologtostderr
+_LOGGER = logging.getLogger(__name__)
 
 ApiPath = Tuple[str, ...]
 
@@ -45,28 +55,38 @@ def maybe_singleton(py_object: Any) -> bool:
   is_immutable_type = isinstance(py_object, immutable_types)
 
   # Check if the object is the empty tuple.
-  return is_immutable_type or py_object is ()  # pylint: disable=literal-comparison
+  return is_immutable_type or (isinstance(py_object, tuple) and py_object == ())  # pylint: disable=g-explicit-bool-comparison
 
 
-class ApiTreeNode(object):
-  """Represents a single API end-point.
+@dataclasses.dataclass
+class PathTreeNode(object):
+  """Represents a path to an object in the API, an object can have many paths.
 
   Attributes:
     path: A tuple of strings containing the path to the object from the root
       like `('tf', 'losses', 'hinge')`
-    obj: The python object.
-    children: A dictionary from short name to `ApiTreeNode`, including the
-      children nodes.
-    parent: The parent node.
+    py_object: The python object.
+    children: A dictionary from short name to `PathTreeNode`, of this node's
+      children.
+    parent: This node's parent. This is a tree, there can only be one.
     short_name: The last path component
     full_name: All path components joined with "."
   """
+  path: ApiPath
+  py_object: Any
+  parent: Optional[PathTreeNode] = None
+  children: Dict[str, PathTreeNode] = dataclasses.field(default_factory=dict)
+
+  def __hash__(self):
+    return id(self)
 
-  def __init__(self, path: ApiPath, obj: Any, parent: Optional['ApiTreeNode']):
-    self.path = path
-    self.py_object = obj
-    self.children: Dict[str, 'ApiTreeNode'] = {}
-    self.parent = parent
+  def __repr__(self):
+    return f'{type(self).__name__}({self.full_name})'
+
+  __str__ = __repr__
+
+  def __eq__(self, other):
+    raise ValueError("Don't try to compare these")
 
   @property
   def short_name(self) -> str:
@@ -77,49 +97,38 @@ def full_name(self) -> str:
     return '.'.join(self.path)
 
 
-class ApiTree(object):
-  """Represents all api end-points as a tree.
+class PathTree(Dict[ApiPath, PathTreeNode]):
+  """An index/tree of all object-paths in the API.
 
   Items must be inserted in order, from root to leaf.
 
+
   Attributes:
-    index: A dict, mapping from path tuples to `ApiTreeNode`.
-    aliases: A dict, mapping from object ids to a list of all `ApiTreeNode` that
-      refer to the object.
-    root: The root `ApiTreeNode`
+    root: The root `PathTreeNode`
   """
 
   def __init__(self):
-    root = ApiTreeNode(path=(), obj=None, parent=None)
-    self.index: Dict[ApiPath, ApiTreeNode] = {(): root}
-    self.aliases: Dict[ApiPath,
-                       List[ApiTreeNode]] = collections.defaultdict(list)
-    self.root: ApiTreeNode = root
-
-  def __contains__(self, path: ApiPath) -> bool:
-    """Returns `True` if path exists in the tree.
+    root = PathTreeNode(path=(), py_object=None, parent=None, children={})
+    super().__setitem__((), root)
 
-    Args:
-      path: A tuple of strings, the api path to the object.
+    self.root: PathTreeNode = root
+    self._nodes_for_id: Dict[int, List[PathTreeNode]] = (
+        collections.defaultdict(list))
 
-    Returns:
-      True if `path` exists in the tree.
-    """
-    return path in self.index
+  def __eq__(self, other):
+    raise ValueError("Don't try to compare these")
 
-  def __getitem__(self, path: ApiPath) -> ApiTreeNode:
-    """Fetch an item from the tree.
-
-    Args:
-      path: A tuple of strings, the api path to the object.
+  def iter_nodes(self):
+    """Iterate over the nodes in BFS order."""
+    stack = collections.deque([self.root])
+    while stack:
+      children = list(stack.popleft().children.values())
+      yield from children
+      stack.extend(children)
 
-    Returns:
-      An `ApiTreeNode`.
-
-    Raises:
-      KeyError: If no node can be found at that path.
-    """
-    return self.index[path]
+  def __contains__(self, path: ApiPath) -> bool:  # pylint: disable=useless-super-delegation
+    # TODO(b/184563451): remove
+    return super().__contains__(path)
 
   def __setitem__(self, path: ApiPath, obj: Any):
     """Add an object to the tree.
@@ -128,19 +137,25 @@ def __setitem__(self, path: ApiPath, obj: Any):
       path: A tuple of strings.
       obj: The python object.
     """
+    assert path not in self
+
     parent_path = path[:-1]
-    parent = self.index[parent_path]
+    parent = self[parent_path]
 
-    node = ApiTreeNode(path=path, obj=obj, parent=parent)
+    node = PathTreeNode(path=path, py_object=obj, parent=parent)
 
-    self.index[path] = node
+    super().__setitem__(path, node)
     if not maybe_singleton(obj):
       # We cannot use the duplicate mechanism for some constants, since e.g.,
       # id(c1) == id(c2) with c1=1, c2=1. This isn't problematic since constants
       # have no usable docstring and won't be documented automatically.
-      self.aliases[id(obj)].append(node)  # pytype: disable=unsupported-operands  # attribute-variable-annotations
+      nodes = self.nodes_for_obj(obj)
+      nodes.append(node)
     parent.children[node.short_name] = node
 
+  def nodes_for_obj(self, py_object) -> List[PathTreeNode]:
+    return self._nodes_for_id[id(py_object)]
+
 
 class DocGeneratorVisitor(object):
   """A visitor that generates docs for a python object when __call__ed."""
@@ -155,7 +170,7 @@ def __init__(self):
     This object accumulates the various data-structures necessary to build the
     docs, including (see the property definitions for details.):
 
-    In the decsription below "main name" is the object's preferred fully
+    In the description below "main name" is the object's preferred fully
     qualified name.
 
     Params:
@@ -174,7 +189,8 @@ def __init__(self):
     self._duplicates: Dict[str, List[str]] = None
     self._duplicate_of: Dict[str, str] = None
 
-    self._api_tree = ApiTree()
+    self.path_tree = PathTree()
+    self.api_tree = None
 
   @property
   def index(self):
@@ -211,7 +227,6 @@ def reverse_index(self):
     Returns:
       The `id(object)` to full name map.
     """
-    self._maybe_find_duplicates()
     return self._reverse_index
 
   @property
@@ -225,7 +240,6 @@ def duplicate_of(self):
     Returns:
       The map from duplicate name to preferred name.
     """
-    self._maybe_find_duplicates()
     return self._duplicate_of
 
   @property
@@ -242,7 +256,6 @@ def duplicates(self):
     Returns:
       The map from main name to list of all duplicate names.
     """
-    self._maybe_find_duplicates()
     return self._duplicates
 
   def __call__(self, parent_path, parent, children):
@@ -270,15 +283,16 @@ class or module.
     parent_name = '.'.join(parent_path)
     self._index[parent_name] = parent
     self._tree[parent_name] = []
-    if parent_path not in self._api_tree:
-      self._api_tree[parent_path] = parent
+    if parent_path not in self.path_tree:
+      self.path_tree[parent_path] = parent
 
     if not (inspect.ismodule(parent) or inspect.isclass(parent)):
-      raise RuntimeError('Unexpected type in visitor -- '
-                         f'{parent_name}: {parent!r}')
+      raise TypeError('Unexpected type in visitor -- '
+                      f'{parent_name}: {parent!r}')
 
     for name, child in children:
-      self._api_tree[parent_path + (name,)] = child
+      child_path = parent_path + (name,)
+      self.path_tree[child_path] = child
 
       full_name = '.'.join([parent_name, name]) if parent_name else name
       self._index[full_name] = child
@@ -286,7 +300,14 @@ class or module.
 
     return children
 
-  def _score_name(self, name):
+  class NameScore(NamedTuple):
+    defining_class_score: int
+    experimental_score: int
+    keras_score: int
+    module_length_score: int
+    path: ApiPath
+
+  def _score_name(self, path: ApiPath) -> NameScore:
     """Return a tuple of scores indicating how to sort for the best name.
 
     This function is meant to be used as the `key` to the `sorted` function.
@@ -302,52 +323,70 @@ def _score_name(self, name):
       name: Fallback, sorts lexicographically on the full_name.
 
     Args:
-      name: the full name to score, for example `tf.estimator.Estimator`
+      path: APiPath to score, for example `('tf','estimator','Estimator')`
 
     Returns:
       A tuple of scores. When sorted the preferred name will have the lowest
       value.
     """
-    parts = name.split('.')
-    short_name = parts[-1]
-    if len(parts) == 1:
-      return (-99, -99, -99, -99, short_name)
+    py_object = self.path_tree[path].py_object
+    if len(path) == 1:
+      return self.NameScore(-99, -99, -99, -99, path)
 
-    container = self._index.get('.'.join(parts[:-1]), name)
+    short_name = path[-1]
+    container = self.path_tree[path[:-1]].py_object
 
-    defining_class_score = 1
-    if inspect.isclass(container):
+    # Prefer the reference that is not in a class.
+    defining_class_score = -1
+    container_type = obj_type_lib.ObjType.get(container)
+    if container_type is obj_type_lib.ObjType.CLASS:
       if short_name in container.__dict__:
-        # prefer the defining class
-        defining_class_score = -1
+        # If a alias points into a class, prefer the defining class
+        defining_class_score = 0
+      else:
+        defining_class_score = 1
 
     experimental_score = -1
-    if 'contrib' in parts or any('experimental' in part for part in parts):
+    if 'contrib' in path or any('experimental' in part for part in path):
       experimental_score = 1
 
     keras_score = 1
-    if 'keras' in parts:
+    if 'keras' in path:
       keras_score = -1
 
-    while parts:
-      container = self._index['.'.join(parts)]
+    if inspect.ismodule(py_object):
+      # prefer short paths for modules
+      module_length_score = len(path)
+    else:
+      module_length_score = self._get_module_length_score(path)
+
+    return self.NameScore(
+        defining_class_score=defining_class_score,
+        experimental_score=experimental_score,
+        keras_score=keras_score,
+        module_length_score=module_length_score,
+        path=path)
+
+  def _get_module_length_score(self, path):
+    partial_path = list(path)
+    while partial_path:
+      container = self.path_tree[tuple(partial_path[:-1])].py_object
+      partial_path.pop()
       if inspect.ismodule(container):
         break
-      parts.pop()
 
-    module_length = len(parts)
+    module_length = len(partial_path)
 
-    if len(parts) == 2:
+    if module_length == 2:
       # `tf.submodule.thing` is better than `tf.thing`
       module_length_score = -1
     else:
       # shorter is better
       module_length_score = module_length
 
-    return (defining_class_score, experimental_score, keras_score,
-            module_length_score, name)
+    return module_length_score
 
-  def _maybe_find_duplicates(self):
+  def build(self):
     """Compute data structures containing information about duplicates.
 
     Find duplicates in `index` and decide on one to be the "main" name.
@@ -364,6 +403,8 @@ def _maybe_find_duplicates(self):
     if self._reverse_index is not None:
       return
 
+    self.api_tree = ApiTree.from_path_tree(self.path_tree, self._score_name)
+
     # Maps the id of a symbol to its fully qualified name. For symbols that have
     # several aliases, this map contains the first one found.
     # We use id(py_object) to get a hashable value for py_object. Note all
@@ -379,7 +420,10 @@ def _maybe_find_duplicates(self):
     # symbol (incl. itself).
     duplicates = {}
 
-    for path, node in self._api_tree.index.items():
+    for path, node in self.path_tree.items():
+      _LOGGER.debug('DocGeneratorVisitor.build')
+      _LOGGER.debug('  path: %s', path)
+
       if not path:
         continue
       full_name = node.full_name
@@ -388,23 +432,25 @@ def _maybe_find_duplicates(self):
       if full_name in duplicates:
         continue
 
-      aliases = self._api_tree.aliases[object_id]
+      aliases = self.path_tree.nodes_for_obj(py_object)
+      # maybe_singleton types can't be looked up by object.
       if not aliases:
         aliases = [node]
 
-      names = [alias.full_name for alias in aliases]
+      name_tuples = [alias.path for alias in aliases]
 
-      names = sorted(names)
       # Choose the main name with a lexical sort on the tuples returned by
       # by _score_name.
-      main_name = min(names, key=self._score_name)
+      main_name_tuple = min(name_tuples, key=self._score_name)
+      main_name = '.'.join(main_name_tuple)
 
-      if names:
-        duplicates[main_name] = list(names)
+      names = ['.'.join(name_tuple) for name_tuple in name_tuples]
+      if name_tuples:
+        duplicates[main_name] = sorted(names)
 
-      names.remove(main_name)
       for name in names:
-        duplicate_of[name] = main_name
+        if name != main_name:
+          duplicate_of[name] = main_name
 
       # Set the reverse index to the canonical name.
       if not maybe_singleton(py_object):
@@ -413,3 +459,217 @@ def _maybe_find_duplicates(self):
     self._duplicate_of = duplicate_of
     self._duplicates = duplicates
     self._reverse_index = reverse_index
+
+
+@dataclasses.dataclass(repr=False)
+class ApiTreeNode(PathTreeNode):
+  """A node in the ApiTree."""
+  aliases: List[ApiPath] = dataclasses.field(default_factory=list)
+  physical_path: Optional[ApiPath] = None
+
+  @property
+  def obj_type(self) -> obj_type_lib.ObjType:
+    return obj_type_lib.ObjType.get(self.py_object)
+
+  class OutputType(enum.Enum):
+    PAGE = 'page'
+    FRAGMENT = 'fragment'
+
+  def output_type(self) -> OutputType:
+    obj_type = obj_type_lib.ObjType.get(self.py_object)
+
+    if obj_type in (obj_type_lib.ObjType.CLASS, obj_type_lib.ObjType.MODULE):
+      return self.OutputType.PAGE
+    elif obj_type in (obj_type_lib.ObjType.CALLABLE,
+                      obj_type_lib.ObjType.TYPE_ALIAS):
+      assert self.parent is not None
+      parent_type = obj_type_lib.ObjType.get(self.parent.py_object)
+      if parent_type is obj_type_lib.ObjType.CLASS:
+        return self.OutputType.FRAGMENT
+      else:
+        return self.OutputType.PAGE
+    else:
+      return self.OutputType.FRAGMENT
+
+
+class ApiTree(Dict[ApiPath, ApiTreeNode]):
+  """Public API index.
+
+  Items must be inserted in order from root to leaves.
+
+  Lookup a path-tuple to fetch a node:
+
+  ```
+  node = index[path]
+  ```
+
+  Use the `node_from_obj` method to lookup the node for a python object:
+
+  ```
+  node = index.node_from_obj(obj)
+  ```
+
+  Remember that `maybe_singleton` (numbers, strings, tuples) classes can't be
+  looked up this way.
+
+  To build a tree, nodes must be inserted in tree order starting from the root.
+
+
+  Attributes:
+    root: The root `ApiFileNode` of the tree.
+  """
+
+  def __init__(self):
+    root = ApiTreeNode(
+        path=(), py_object=None, parent=None, aliases=[()])  # type: ignore
+    self.root = root
+    super().__setitem__((), root)
+    self._nodes = []
+    self._node_for_object = {}
+
+  def __eq__(self, other):
+    raise ValueError("Don't try to compare these")
+
+  def node_for_object(self, obj: Any) -> Optional[ApiTreeNode]:
+    if maybe_singleton(obj):
+      return None
+    return self._node_for_object.get(id(obj), None)
+
+  def __contains__(self, path: ApiPath) -> bool:  # pylint: disable=useless-super-delegation
+    # TODO(b/184563451): remove
+    return super().__contains__(path)
+
+  def iter_nodes(self):
+    """Iterate over the nodes in BFS order."""
+    stack = collections.deque([self.root])
+    while stack:
+      children = list(stack.popleft().children.values())
+      yield from children
+      stack.extend(children)
+
+  def __setitem__(self, *args, **kwargs):
+    raise TypeError('Use .insert instead of setitem []')
+
+  def insert(self, path: ApiPath, py_object: Any, aliases: List[ApiPath]):
+    """Add an object to the index."""
+    _LOGGER.debug('ApiTree.insert')
+    _LOGGER.debug('  path: %s', path)
+    _LOGGER.debug('  py_object: %s', py_object)
+    _LOGGER.debug('  aliases: %s', aliases)
+    assert path not in self, 'A path was inserted twice.'
+
+    parent_path = path[:-1]
+    parent = self[parent_path]
+
+    node = ApiTreeNode(
+        path=path,
+        py_object=py_object,
+        aliases=aliases,
+        parent=parent,
+        physical_path=self._get_physical_path(py_object))
+
+    super().__setitem__(path, node)
+    self._nodes.append(node)
+    for alias in aliases:
+      if alias == path:
+        continue
+      assert alias not in self
+      super().__setitem__(alias, node)
+
+    self._node_for_object[id(node.py_object)] = node
+
+    parent.children[node.short_name] = node
+
+  def _get_physical_path(self, py_object):
+    physical_path = None
+    obj_type = obj_type_lib.ObjType.get(py_object)
+    if obj_type in [obj_type.CLASS, obj_type.CALLABLE]:
+      try:
+        physical_path = tuple(
+            py_object.__module__.split('.') + py_object.__qualname__.split('.'))
+      except AttributeError:
+        pass
+    elif obj_type is obj_type.MODULE:
+      physical_path = tuple(py_object.__name__.split('.'))
+
+    return physical_path
+
+  @classmethod
+  def from_path_tree(cls, path_tree: PathTree, score_name_fn) -> ApiTree:
+    """Create an ApiTree from a PathTree.
+
+    Args:
+      path_tree: The `PathTree` to convert.
+      score_name_fn: The name scoring function.
+
+    Returns:
+      an `ApiIndex`, created from `path_tree`.
+    """
+    self = cls()
+
+    active_nodes = collections.deque(path_tree.root.children.values())
+    while active_nodes:
+      current_node = active_nodes.popleft()
+      if current_node.path in self:
+        continue
+
+      duplicate_nodes = set(
+          path_tree.nodes_for_obj(current_node.py_object))
+
+      if not duplicate_nodes:
+        # Singleton objects will return `[]`. So look up the parent object's
+        # duplicate nodes and collect their children.
+        assert current_node.parent is not None
+        parent_nodes = path_tree.nodes_for_obj(current_node.parent.py_object)
+        duplicate_nodes = [
+            parent_node.children[current_node.short_name]
+            for parent_node in parent_nodes
+        ]
+
+      parents = [
+          node.parent for node in duplicate_nodes if node.parent is not None
+      ]
+
+      # Choose the priority name with a lexical sort on the tuples returned by
+      # _score_name.
+      if not all(parent.path in self for parent in parents):
+        # rewind
+        active_nodes.appendleft(current_node)
+        # do each duplicate's immediate parents first.
+        for parent in parents:
+          if parent.path in self:
+            continue
+          active_nodes.appendleft(parent)
+        continue
+      # If we've made it here, the immediate parents of each of the paths have
+      # been processed, so now we can choose its priority name.
+      aliases = [node.path for node in duplicate_nodes]
+
+      priority_path = self._choose_priority_path(aliases, score_name_fn)
+
+      if priority_path is None:
+        # How did this happen?
+        # No parents in the public api -> you are not in the public API.
+        continue
+
+      self.insert(priority_path, current_node.py_object, aliases)
+
+      active_nodes.extend(current_node.children.values())
+
+    return self
+
+  def _choose_priority_path(self, aliases: Sequence[ApiPath],
+                            score_name_fn) -> Optional[ApiPath]:
+    # Only consider a path an option for the priority_path if its parent-path
+    # is the priority_path for that object.
+    priority_path_options = []
+    for alias in aliases:
+      parent_path = alias[:-1]
+
+      if self[parent_path].path == parent_path:
+        priority_path_options.append(alias)
+
+    try:
+      return min(priority_path_options, key=score_name_fn)
+    except ValueError:
+      return None
diff --git a/tools/tensorflow_docs/api_generator/doc_generator_visitor_test.py b/tools/tensorflow_docs/api_generator/doc_generator_visitor_test.py
index ec493c5fb28..87a181698b9 100644
--- a/tools/tensorflow_docs/api_generator/doc_generator_visitor_test.py
+++ b/tools/tensorflow_docs/api_generator/doc_generator_visitor_test.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -15,14 +14,17 @@
 # ==============================================================================
 """Tests for tools.docs.doc_generator_visitor."""
 
-import argparse
+import dataclasses
+import io
 import os
+import textwrap
 import types
 
 from absl.testing import absltest
 
 from tensorflow_docs.api_generator import doc_generator_visitor
 from tensorflow_docs.api_generator import generate_lib
+from tensorflow_docs.api_generator import toc as toc_lib
 
 
 class NoDunderVisitor(doc_generator_visitor.DocGeneratorVisitor):
@@ -35,6 +37,17 @@ def __call__(self, parent_name, parent, children):
     return super(NoDunderVisitor, self).__call__(parent_name, parent, children)
 
 
+class TestDocGenerator(generate_lib.DocGenerator):
+
+  def __init__(self, py_modules):
+    kwargs = {}
+    kwargs['py_modules'] = py_modules
+    kwargs['root_title'] = 'TensorFlow'
+    kwargs['visitor_cls'] = NoDunderVisitor
+    kwargs['code_url_prefix'] = '/'
+    super().__init__(**kwargs)
+
+
 class DocGeneratorVisitorTest(absltest.TestCase):
 
   def test_call_module(self):
@@ -45,6 +58,7 @@ def test_call_module(self):
 
     self.assertEqual({'doc_generator_visitor': ['DocGeneratorVisitor']},
                      visitor.tree)
+
     self.assertEqual({
         'doc_generator_visitor': doc_generator_visitor,
         'doc_generator_visitor.DocGeneratorVisitor':
@@ -52,23 +66,28 @@ def test_call_module(self):
     }, visitor.index)
 
   def test_call_class(self):
+
+    class ExampleClass:
+
+      def example_method(self):
+        pass
+
     visitor = doc_generator_visitor.DocGeneratorVisitor()
     visitor(
-        ('DocGeneratorVisitor',), doc_generator_visitor.DocGeneratorVisitor,
-        [('index', doc_generator_visitor.DocGeneratorVisitor.reverse_index)])
+        parent_path=('ExampleClass',),
+        parent=ExampleClass,
+        children=[('example_method', ExampleClass.example_method)])
 
-    self.assertEqual({'DocGeneratorVisitor': ['index']},
-                     visitor.tree)
-    self.assertEqual({
-        'DocGeneratorVisitor':
-            doc_generator_visitor.DocGeneratorVisitor,
-        'DocGeneratorVisitor.index':
-            doc_generator_visitor.DocGeneratorVisitor.reverse_index
-    }, visitor.index)
+    self.assertEqual({'ExampleClass': ['example_method']}, visitor.tree)
+    self.assertEqual(
+        {
+            'ExampleClass': ExampleClass,
+            'ExampleClass.example_method': ExampleClass.example_method,
+        }, visitor.index)
 
   def test_call_raises(self):
     visitor = doc_generator_visitor.DocGeneratorVisitor()
-    with self.assertRaises(RuntimeError):
+    with self.assertRaises(TypeError):
       visitor(('non_class_or_module',), 'non_class_or_module_object', [])
 
   def test_duplicates_module_class_depth(self):
@@ -84,39 +103,41 @@ class Nested(object):
     tf.submodule = types.ModuleType('submodule')
     tf.submodule.Parent = Parent
 
-    visitor = generate_lib.extract(
-        [('tf', tf)],
-        base_dir=os.path.dirname(tf.__file__),
-        private_map={},
-        visitor_cls=NoDunderVisitor)
+    config = TestDocGenerator([('tf', tf)]).run_extraction()
 
     self.assertEqual(
         {
-            'tf.submodule.Parent':
-                sorted([
-                    'tf.Parent',
-                    'tf.submodule.Parent',
-                ]),
-            'tf.submodule.Parent.Nested':
-                sorted([
-                    'tf.Parent.Nested',
-                    'tf.submodule.Parent.Nested',
-                ]),
+            'tf.submodule.Parent': sorted([
+                'tf.Parent',
+                'tf.submodule.Parent',
+            ]),
+            'tf.submodule.Parent.Nested': sorted([
+                'tf.Parent.Nested',
+                'tf.submodule.Parent.Nested',
+            ]),
             'tf': ['tf'],
-            'tf.submodule': ['tf.submodule']
-        }, visitor.duplicates)
+            'tf.submodule': ['tf.submodule'],
+        },
+        config.duplicates,
+    )
 
-    self.assertEqual({
-        'tf.Parent.Nested': 'tf.submodule.Parent.Nested',
-        'tf.Parent': 'tf.submodule.Parent',
-    }, visitor.duplicate_of)
+    self.assertEqual(
+        {
+            'tf.Parent.Nested': 'tf.submodule.Parent.Nested',
+            'tf.Parent': 'tf.submodule.Parent',
+        },
+        config.duplicate_of,
+    )
 
-    self.assertEqual({
-        id(Parent): 'tf.submodule.Parent',
-        id(Parent.Nested): 'tf.submodule.Parent.Nested',
-        id(tf): 'tf',
-        id(tf.submodule): 'tf.submodule',
-    }, visitor.reverse_index)
+    self.assertEqual(
+        {
+            id(Parent): 'tf.submodule.Parent',
+            id(Parent.Nested): 'tf.submodule.Parent.Nested',
+            id(tf): 'tf',
+            id(tf.submodule): 'tf.submodule',
+        },
+        config.reverse_index,
+    )
 
   def test_duplicates_contrib(self):
 
@@ -130,26 +151,29 @@ class Parent(object):
     tf.contrib.Parent = Parent
     tf.submodule.Parent = Parent
 
-    visitor = generate_lib.extract(
-        [('tf', tf)],
-        base_dir=os.path.dirname(tf.__file__),
-        private_map={},
-        visitor_cls=NoDunderVisitor)
+    config = TestDocGenerator([('tf', tf)]).run_extraction()
 
-    self.assertEqual(
-        sorted(['tf.contrib.Parent', 'tf.submodule.Parent']),
-        visitor.duplicates['tf.submodule.Parent'])
+    self.assertCountEqual(
+        ['tf.contrib.Parent', 'tf.submodule.Parent'],
+        config.duplicates['tf.submodule.Parent'],
+    )
 
-    self.assertEqual({
-        'tf.contrib.Parent': 'tf.submodule.Parent',
-    }, visitor.duplicate_of)
+    self.assertEqual(
+        {
+            'tf.contrib.Parent': 'tf.submodule.Parent',
+        },
+        config.duplicate_of,
+    )
 
-    self.assertEqual({
-        id(tf): 'tf',
-        id(tf.submodule): 'tf.submodule',
-        id(Parent): 'tf.submodule.Parent',
-        id(tf.contrib): 'tf.contrib',
-    }, visitor.reverse_index)
+    self.assertEqual(
+        {
+            id(tf): 'tf',
+            id(tf.submodule): 'tf.submodule',
+            id(Parent): 'tf.submodule.Parent',
+            id(tf.contrib): 'tf.contrib',
+        },
+        config.reverse_index,
+    )
 
   def test_duplicates_defining_class(self):
 
@@ -164,28 +188,28 @@ class Child(Parent):
     tf.Parent = Parent
     tf.Child = Child
 
-    visitor = generate_lib.extract(
-        [('tf', tf)],
-        base_dir=os.path.dirname(tf.__file__),
-        private_map={},
-        visitor_cls=NoDunderVisitor)
+    config = TestDocGenerator([('tf', tf)]).run_extraction()
 
-    self.assertEqual(
-        sorted([
-            'tf.Parent.obj1',
-            'tf.Child.obj1',
-        ]), visitor.duplicates['tf.Parent.obj1'])
+    self.assertCountEqual(
+        ['tf.Parent.obj1', 'tf.Child.obj1'], config.duplicates['tf.Parent.obj1']
+    )
 
-    self.assertEqual({
-        'tf.Child.obj1': 'tf.Parent.obj1',
-    }, visitor.duplicate_of)
+    self.assertEqual(
+        {
+            'tf.Child.obj1': 'tf.Parent.obj1',
+        },
+        config.duplicate_of,
+    )
 
-    self.assertEqual({
-        id(tf): 'tf',
-        id(Parent): 'tf.Parent',
-        id(Child): 'tf.Child',
-        id(Parent.obj1): 'tf.Parent.obj1',
-    }, visitor.reverse_index)
+    self.assertEqual(
+        {
+            id(tf): 'tf',
+            id(Parent): 'tf.Parent',
+            id(Child): 'tf.Child',
+            id(Parent.obj1): 'tf.Parent.obj1',
+        },
+        config.reverse_index,
+    )
 
   def test_duplicates_module_depth(self):
 
@@ -199,26 +223,26 @@ class Parent(object):
     tf.Parent = Parent
     tf.submodule.submodule2.Parent = Parent
 
-    visitor = generate_lib.extract(
-        [('tf', tf)],
-        base_dir=os.path.dirname(tf.__file__),
-        private_map={},
-        visitor_cls=NoDunderVisitor)
+    config = TestDocGenerator([('tf', tf)]).run_extraction()
 
-    self.assertEqual(
-        sorted(['tf.Parent', 'tf.submodule.submodule2.Parent']),
-        visitor.duplicates['tf.Parent'])
+    self.assertCountEqual(
+        ['tf.Parent', 'tf.submodule.submodule2.Parent'],
+        config.duplicates['tf.Parent'],
+    )
 
-    self.assertEqual({
-        'tf.submodule.submodule2.Parent': 'tf.Parent'
-    }, visitor.duplicate_of)
+    self.assertEqual(
+        {'tf.submodule.submodule2.Parent': 'tf.Parent'}, config.duplicate_of
+    )
 
-    self.assertEqual({
-        id(tf): 'tf',
-        id(tf.submodule): 'tf.submodule',
-        id(tf.submodule.submodule2): 'tf.submodule.submodule2',
-        id(Parent): 'tf.Parent',
-    }, visitor.reverse_index)
+    self.assertEqual(
+        {
+            id(tf): 'tf',
+            id(tf.submodule): 'tf.submodule',
+            id(tf.submodule.submodule2): 'tf.submodule.submodule2',
+            id(Parent): 'tf.Parent',
+        },
+        config.reverse_index,
+    )
 
   def test_duplicates_name(self):
 
@@ -232,36 +256,135 @@ class Parent(object):
     tf.submodule = types.ModuleType('submodule')
     tf.submodule.Parent = Parent
 
-    visitor = generate_lib.extract(
-        [('tf', tf)],
-        base_dir=os.path.dirname(tf.__file__),
-        private_map={},
-        visitor_cls=NoDunderVisitor)
+    config = TestDocGenerator([('tf', tf)]).run_extraction()
+
     self.assertEqual(
         sorted([
             'tf.submodule.Parent.obj1',
             'tf.submodule.Parent.obj2',
-        ]), visitor.duplicates['tf.submodule.Parent.obj1'])
+        ]),
+        config.duplicates['tf.submodule.Parent.obj1'],
+    )
 
-    self.assertEqual({
-        'tf.submodule.Parent.obj2': 'tf.submodule.Parent.obj1',
-    }, visitor.duplicate_of)
+    self.assertEqual(
+        {
+            'tf.submodule.Parent.obj2': 'tf.submodule.Parent.obj1',
+        },
+        config.duplicate_of,
+    )
 
-    self.assertEqual({
-        id(tf): 'tf',
-        id(tf.submodule): 'tf.submodule',
-        id(Parent): 'tf.submodule.Parent',
-        id(Parent.obj1): 'tf.submodule.Parent.obj1',
-    }, visitor.reverse_index)
+    self.assertEqual(
+        {
+            id(tf): 'tf',
+            id(tf.submodule): 'tf.submodule',
+            id(Parent): 'tf.submodule.Parent',
+            id(Parent.obj1): 'tf.submodule.Parent.obj1',
+        },
+        config.reverse_index,
+    )
 
+  def test_handles_duplicate_classmethods(self):
 
-class ApiTreeTest(absltest.TestCase):
+    class MyClass:
+
+      @classmethod
+      def from_value(cls, value):
+        pass
+
+    tf = types.ModuleType('fake_tf')
+    tf.__file__ = '/tmp/tf/__init__.py'
+    tf.MyClass = MyClass
+    tf.sub = types.ModuleType('sub')
+    tf.sub.MyClass = MyClass
+
+    config = TestDocGenerator([('tf', tf)]).run_extraction()
+
+    paths = ['.'.join(p) for p in config.path_tree.keys()]
+
+    expected = [
+        '',
+        'tf',
+        'tf.MyClass',
+        'tf.MyClass.from_value',
+        'tf.sub',
+        'tf.sub.MyClass',
+        'tf.sub.MyClass.from_value',
+    ]
+    self.assertCountEqual(expected, paths)
+
+    apis = [node.full_name for node in config.api_tree.iter_nodes()]
+    expected = [
+        'tf',
+        'tf.sub',
+        'tf.sub.MyClass',
+        'tf.sub.MyClass.from_value',
+    ]
+    self.assertCountEqual(expected, apis)
+
+    self.assertIs(
+        config.api_tree[('tf', 'MyClass')],
+        config.api_tree[('tf', 'sub', 'MyClass')],
+    )
+    self.assertIs(
+        config.api_tree[('tf', 'MyClass', 'from_value')],
+        config.api_tree[('tf', 'sub', 'MyClass', 'from_value')],
+    )
+
+  def test_handles_duplicate_singleton_attributes(self):
+
+    class MyClass:
+      simple = 1
+
+    tf = types.ModuleType('fake_tf')
+    tf.__file__ = '/tmp/tf/__init__.py'
+    tf.MyClass = MyClass
+    tf.sub = types.ModuleType('sub')
+    tf.sub.MyClass = MyClass
+
+    config = TestDocGenerator([('tf', tf)]).run_extraction()
+
+    paths = ['.'.join(p) for p in config.path_tree.keys()]
+
+    expected = [
+        '',
+        'tf',
+        'tf.MyClass',
+        'tf.MyClass.simple',
+        'tf.sub',
+        'tf.sub.MyClass',
+        'tf.sub.MyClass.simple',
+    ]
+    self.assertCountEqual(expected, paths)
+
+    apis = ['.'.join(p) for p in config.api_tree.keys()]
+    expected = [
+        '',
+        'tf',
+        'tf.MyClass',
+        'tf.MyClass.simple',
+        'tf.sub',
+        'tf.sub.MyClass',
+        'tf.sub.MyClass.simple',
+    ]
+    self.assertCountEqual(expected, apis)
+
+    self.assertIs(
+        config.api_tree[('tf', 'MyClass')],
+        config.api_tree[('tf', 'sub', 'MyClass')],
+    )
+    self.assertIs(
+        config.api_tree[('tf', 'MyClass', 'simple')],
+        config.api_tree[('tf', 'sub', 'MyClass', 'simple')],
+    )
+
+
+class PathTreeTest(absltest.TestCase):
 
   def test_contains(self):
-    tf = argparse.Namespace()
-    tf.sub = argparse.Namespace()
+    tf = types.ModuleType('tf')
+    tf.sub = types.ModuleType('sub')
 
-    tree = doc_generator_visitor.ApiTree()
+    tree = doc_generator_visitor.PathTree()
     tree[('tf',)] = tf
     tree[('tf', 'sub')] = tf.sub
 
@@ -269,11 +392,11 @@ def test_contains(self):
     self.assertIn(('tf', 'sub'), tree)
 
   def test_node_insertion(self):
-    tf = argparse.Namespace()
-    tf.sub = argparse.Namespace()
+    tf = types.ModuleType('tf')
+    tf.sub = types.ModuleType('sub')
     tf.sub.object = object()
 
-    tree = doc_generator_visitor.ApiTree()
+    tree = doc_generator_visitor.PathTree()
     tree[('tf',)] = tf
     tree[('tf', 'sub')] = tf.sub
     tree[('tf', 'sub', 'thing')] = tf.sub.object
@@ -286,13 +409,13 @@ def test_node_insertion(self):
     self.assertIs(node.children['thing'], tree[('tf', 'sub', 'thing')])
 
   def test_duplicate(self):
-    tf = argparse.Namespace()
-    tf.sub = argparse.Namespace()
+    tf = types.ModuleType('tf')
+    tf.sub = types.ModuleType('sub')
     tf.sub.thing = object()
-    tf.sub2 = argparse.Namespace()
+    tf.sub2 = types.ModuleType('sub2')
     tf.sub2.thing = tf.sub.thing
 
-    tree = doc_generator_visitor.ApiTree()
+    tree = doc_generator_visitor.PathTree()
     tree[('tf',)] = tf
     tree[('tf', 'sub')] = tf.sub
     tree[('tf', 'sub', 'thing')] = tf.sub.thing
@@ -300,25 +423,232 @@ def test_duplicate(self):
     tree[('tf', 'sub2', 'thing')] = tf.sub2.thing
 
     self.assertCountEqual(
-        tree.aliases[id(tf.sub.thing)],
+        tree.nodes_for_obj(tf.sub.thing),
         [tree[('tf', 'sub', 'thing')], tree[('tf', 'sub2', 'thing')]])
 
   def test_duplicate_singleton(self):
-    tf = argparse.Namespace()
-    tf.sub = argparse.Namespace()
+    tf = types.ModuleType('tf')
+    tf.sub = types.ModuleType('sub')
     tf.sub.thing = 999
-    tf.sub2 = argparse.Namespace()
+    tf.sub2 = types.ModuleType('sub2')
     tf.sub2.thing = tf.sub.thing
 
-    tree = doc_generator_visitor.ApiTree()
+    tree = doc_generator_visitor.PathTree()
     tree[('tf',)] = tf
     tree[('tf', 'sub')] = tf.sub
     tree[('tf', 'sub', 'thing')] = tf.sub.thing
     tree[('tf', 'sub2')] = tf.sub2
     tree[('tf', 'sub2', 'thing')] = tf.sub2.thing
 
-    self.assertEmpty(tree.aliases[tf.sub.thing], [])
+    found = tree.nodes_for_obj(tf.sub.thing)
+    self.assertEqual([], found)
+
+
+class ApiTreeTest(absltest.TestCase):
+
+  def _make_fake_module(self) -> types.ModuleType:
+
+    class Parent:
+
+      def method1(self):
+        pass
+
+      def method2(self):
+        pass
+
+    class Child(Parent):
+
+      def method2(self):
+        pass
+
+      def method3(self):
+        pass
+
+    class Outer(object):
+      attribute = object()
+
+      class Nested(object):
+        pass
+
+    fun1 = lambda x: x
+    fun2 = lambda x: x
+
+    tf = types.ModuleType('tf')
+    tf.__file__ = __file__
+    tf.seven = 7
+    tf.Parent = Parent
+    tf.Outer = Outer
+    tf.fun1 = fun1
+    tf.sub1 = types.ModuleType('sub1')
+    tf.sub1.Parent = Parent
+    tf.sub2 = types.ModuleType('sub2')
+    tf.sub2.Child = Child
+    tf.sub2.fun2 = fun2
+    tf.sub1.sub2 = tf.sub2
+
+    return tf
+
+  def test_physical_path(self):
+    tf = self._make_fake_module()
+
+    api_tree = doc_generator_visitor.ApiTree()
+    api_tree.insert(path=('tf',), py_object=tf, aliases=[('tf',)])
+    api_tree.insert(
+        path=('tf', 'sub2'), py_object=tf.sub2, aliases=[('tf', 'sub2')])
+    api_tree.insert(
+        path=('tf', 'seven'), py_object=tf.seven, aliases=[('tf', 'seven')])
+    api_tree.insert(
+        path=('tf', 'fun1'), py_object=tf.fun1, aliases=[('tf', 'fun1')])
+    api_tree.insert(
+        path=('tf', 'sub2', 'Child'),
+        py_object=tf.sub2.Child,
+        aliases=[('tf', 'sub2', 'Child')])
+
+    self.assertEqual(('sub2',), api_tree[('tf', 'sub2')].physical_path)
+    self.assertIsNone(api_tree[('tf', 'seven')].physical_path)
+    self.assertEqual(('__main__', 'ApiTreeTest', '_make_fake_module',
+                      '<locals>', '<lambda>'),
+                     api_tree[('tf', 'fun1')].physical_path)
+    self.assertEqual(
+        ('__main__', 'ApiTreeTest', '_make_fake_module', '<locals>', 'Child'),
+        api_tree[('tf', 'sub2', 'Child')].physical_path)
+
+  def test_api_tree(self):
+    tf = self._make_fake_module()
+
+    api_tree = doc_generator_visitor.ApiTree()
+    api_tree.insert(path=('tf',), py_object=tf, aliases=[('tf',)])
+    api_tree.insert(
+        path=('tf', 'Parent'),
+        py_object=tf.Parent,
+        aliases=[('tf', 'Parent'), ('tf', 'Parent2')])
+    api_tree.insert(
+        path=('tf', 'seven'), py_object=tf.seven, aliases=[('tf', 'seven')])
+
+    # A node can be looked up by any alias
+    self.assertIs(api_tree[('tf', 'Parent')], api_tree[('tf', 'Parent2')])
+    # Nodes only show up once when iterating
+    self.assertEqual([
+        api_tree[('tf',)], api_tree[('tf', 'Parent')], api_tree[('tf', 'seven')]
+    ], list(api_tree.iter_nodes()))
+    # Test lookup by object.
+    self.assertIs(api_tree[('tf', 'Parent')],
+                  api_tree.node_for_object(tf.Parent))
+    # You can't lookup things that maybe singletons.
+    self.assertIs(api_tree[('tf', 'seven')].py_object, tf.seven)
+    self.assertIsNone(api_tree.node_for_object(tf.seven))
+
+  def test_from_path_tree(self):
+    tf = self._make_fake_module()
+
+    path_tree = doc_generator_visitor.PathTree()
+    path_tree[('tf',)] = tf
+    path_tree[('tf', 'Parent')] = tf.Parent
+    path_tree[('tf', 'Parent2')] = tf.Parent
+
+    result = doc_generator_visitor.ApiTree.from_path_tree(
+        path_tree, score_name_fn=lambda name: name)
+
+    expected = doc_generator_visitor.ApiTree()
+    expected.insert(path=('tf',), py_object=tf, aliases=[('tf',)])
+    expected.insert(
+        path=('tf', 'Parent'),
+        py_object=tf.Parent,
+        aliases=[('tf', 'Parent'), ('tf', 'Parent2')])
+
+    result = sorted(result.iter_nodes(), key=lambda node: node.path)
+    expected = sorted(expected.iter_nodes(), key=lambda node: node.path)
+
+    # Circular references make it hard to compare trees or nodes.
+    for e, r in zip(result, expected):
+      self.assertEqual(e.path, r.path)
+      self.assertIs(e.py_object, r.py_object)
+      self.assertCountEqual(e.aliases, r.aliases)
+      self.assertCountEqual(e.children.keys(), r.children.keys())
+
+  def test_api_tree_toc_integration(self):
+    tf = self._make_fake_module()
+
+    gen = TestDocGenerator([('tf', tf)])
+    filters = gen.make_default_filters()
+    visitor = generate_lib.extract(
+        [('tf', tf)], filters=filters, visitor_cls=NoDunderVisitor
+    )
+
+    api_tree = doc_generator_visitor.ApiTree.from_path_tree(
+        visitor.path_tree, visitor._score_name)
+
+    toc = toc_lib.TocBuilder(site_path='/').build(api_tree)
+
+    stream = io.StringIO()
+    toc.write(stream)
+
+    expected = textwrap.dedent("""\
+        toc:
+        - title: tf
+          section:
+          - title: Overview
+            path: /tf
+          - title: Outer
+            path: /tf/Outer
+          - title: Outer.Nested
+            path: /tf/Outer/Nested
+          - title: fun1
+            path: /tf/fun1
+          - title: sub1
+            section:
+            - title: Overview
+              path: /tf/sub1
+            - title: Parent
+              path: /tf/sub1/Parent
+          - title: sub2
+            section:
+            - title: Overview
+              path: /tf/sub2
+            - title: Child
+              path: /tf/sub2/Child
+            - title: fun2
+              path: /tf/sub2/fun2
+        """)
+
+    self.assertEqual(expected, stream.getvalue())
+
+  def test_non_priority_name(self):
+
+    class Class1:
+      pass
 
+    mod = types.ModuleType('mod')
+    mod.a = types.ModuleType('sub')
+    mod.a.Class1 = Class1
+    mod.b = mod.a
+
+    path_tree = doc_generator_visitor.PathTree()
+    path_tree[('mod',)] = mod
+    path_tree[('mod', 'a')] = mod.a
+    path_tree[('mod', 'a', 'Class1')] = mod.a.Class1
+    path_tree[('mod', 'b')] = mod.b
+    path_tree[('mod', 'b', 'Class1')] = mod.b.Class1
+
+    def inconsistent_name_score(path):
+      # `mod.a` is preferred over `mod.b`, but `b.Class1` is preferred over
+      # `a.Class1`!
+      scores = {
+          ('mod',): 0,
+          ('mod', 'a'): 0,  # prefer 'a'
+          ('mod', 'b'): 1,
+          ('mod', 'a', 'Class1'): 1,
+          ('mod', 'b', 'Class1'): 0,  # prefer 'b.Class1'
+      }
+      return scores[path]
+
+    api_tree = doc_generator_visitor.ApiTree.from_path_tree(
+        path_tree, inconsistent_name_score)
+    node = api_tree.node_for_object(Class1)
+
+    # `Class1` can't choose `b.Class1` as its priority_path because
+    # `a` is the priority_path for `sub`.
+    self.assertEqual('mod.a.Class1', node.full_name)
 
 if __name__ == '__main__':
   absltest.main()
diff --git a/tools/tensorflow_docs/api_generator/gen_java/__init__.py b/tools/tensorflow_docs/api_generator/gen_java/__init__.py
index a513595dd79..a8b1bd19b7f 100644
--- a/tools/tensorflow_docs/api_generator/gen_java/__init__.py
+++ b/tools/tensorflow_docs/api_generator/gen_java/__init__.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -20,7 +19,7 @@
 import subprocess
 from typing import Iterable, Mapping, Optional, Union
 
-from tensorflow_docs.api_generator.gen_java import processing
+from tensorflow_docs.api_generator import toc_processing
 
 import yaml
 
@@ -78,7 +77,7 @@ def gen_java_docs(
   yaml_content = yaml_path.read_text()
   yaml_data = yaml.safe_load(yaml_content)
   if section_labels:
-    yaml_data = processing.add_package_headings(yaml_data, root_pkgs,
-                                                section_labels)
+    yaml_data = toc_processing.add_package_headings(yaml_data, root_pkgs,
+                                                    section_labels)
   yaml_content = yaml.dump(yaml_data, Dumper=Formatter)
   yaml_path.write_text(yaml_content)
diff --git a/tools/tensorflow_docs/api_generator/gen_java/templates/macros.cs b/tools/tensorflow_docs/api_generator/gen_java/templates/macros.cs
index 7ffcf145d60..f70b3071c4f 100644
--- a/tools/tensorflow_docs/api_generator/gen_java/templates/macros.cs
+++ b/tools/tensorflow_docs/api_generator/gen_java/templates/macros.cs
@@ -101,10 +101,13 @@ that doesn't inherit from anything?><?cs
 
 <?cs # Show the short-form description of something.  These come from shortDescr and deprecated ?><?cs
 def:short_descr(obj) ?><?cs
-  if:subcount(obj.deprecated) ?>
-      <em>This <?cs var:obj.kind ?> was deprecated
-      in API level <?cs var:obj.deprecatedsince ?>.
-      <?cs call:tag_list(obj.deprecated) ?></em><?cs
+  if:subcount(obj.deprecated) ?><em><?cs
+      if:obj.deprecatedsince ?>
+        This <?cs var:obj.kind ?> was deprecated
+        in API level <?cs var:obj.deprecatedsince ?>.<?cs
+      else ?>
+        This <?cs var:obj.kind ?> is deprecated.<?cs
+      /if ?> <?cs call:tag_list(obj.deprecated) ?></em><?cs
   else ?><?cs call:tag_list(obj.shortDescr) ?><?cs
   /if ?><?cs
 /def ?>
@@ -113,8 +116,13 @@ that doesn't inherit from anything?><?cs
 def:deprecated_warning(obj) ?><?cs
   if:subcount(obj.deprecated) ?><p>
   <p class="caution">
-      <strong>This <?cs var:obj.kind ?> was deprecated
-      in API level <?cs var:obj.deprecatedsince ?></strong>.<br/> <?cs
+      <strong><?cs
+      if:obj.deprecatedsince ?>
+        This <?cs var:obj.kind ?> was deprecated
+        in API level <?cs var:obj.deprecatedsince ?><?cs
+      else ?>
+        This <?cs var:obj.kind ?> is deprecated<?cs
+      /if ?></strong>.<br/> <?cs
       call:tag_list(obj.deprecated) ?>
   </p><?cs
   /if ?><?cs
diff --git a/tools/tensorflow_docs/api_generator/generate_lib.py b/tools/tensorflow_docs/api_generator/generate_lib.py
index 0fe2a77a624..fdeb0f60601 100644
--- a/tools/tensorflow_docs/api_generator/generate_lib.py
+++ b/tools/tensorflow_docs/api_generator/generate_lib.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -16,52 +15,33 @@
 """Generate tensorflow.org style API Reference docs for a Python module."""
 
 import collections
-import fnmatch
-import importlib
-import inspect
+import logging
 import os
 import pathlib
 import shutil
 import tempfile
+from typing import Any, Optional, Sequence, Type, Union
 
-from typing import Any, Dict, List, Optional, Sequence, Tuple, Type, Union
-
-from tensorflow_docs.api_generator import doc_controls
+from tensorflow_docs.api_generator import config
 from tensorflow_docs.api_generator import doc_generator_visitor
 from tensorflow_docs.api_generator import parser
-from tensorflow_docs.api_generator import pretty_docs
 from tensorflow_docs.api_generator import public_api
+from tensorflow_docs.api_generator import reference_resolver as reference_resolver_lib
+from tensorflow_docs.api_generator import toc as toc_lib
 from tensorflow_docs.api_generator import traverse
+from tensorflow_docs.api_generator.pretty_docs import docs_for_object
 from tensorflow_docs.api_generator.report import utils
-
 import yaml
 
-try:
-  # TODO(markdaoust) delete this when the warning is in a stable release.
-  _estimator = importlib.import_module(
-      'tensorflow_estimator.python.estimator.estimator')
-
-  if doc_controls.get_inheritable_header(_estimator.Estimator) is None:
-    _add_header = doc_controls.inheritable_header("""\
-        Warning: Estimators are not recommended for new code.  Estimators run
-        `v1.Session`-style code which is more difficult to write correctly, and
-        can behave unexpectedly, especially when combined with TF 2 code.
-        Estimators do fall under our
-        [compatibility guarantees](https://tensorflow.org/guide/versions), but
-        will receive no fixes other than security vulnerabilities. See the
-        [migration guide](https://tensorflow.org/guide/migrate) for details.
-        """)
-    _add_header(_estimator.Estimator)
-except ImportError:
-  pass
-
-
 # Used to add a collections.OrderedDict representer to yaml so that the
 # dump doesn't contain !!OrderedDict yaml tags.
 # Reference: https://stackoverflow.com/a/21048064
 # Using a normal dict doesn't preserve the order of the input dictionary.
 _mapping_tag = yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG
 
+# To see the logs pass: --logger_levels=tensorflow_docs:DEBUG --alsologtostderr
+_LOGGER = logging.getLogger(__name__)
+
 
 def dict_representer(dumper, data):
   return dumper.represent_dict(data.items())
@@ -75,398 +55,19 @@ def dict_constructor(loader, node):
 yaml.add_constructor(_mapping_tag, dict_constructor)
 
 
-class TocNode(object):
-  """Represents a node in the TOC.
-
-  Attributes:
-    full_name: Name of the module.
-    short_name: The last path component.
-    py_object: Python object of the module.
-    path: Path to the module's page on tensorflow.org relative to
-      tensorflow.org.
-    experimental: Whether the module is experimental or not.
-    deprecated: Whether the module is deprecated or not.
-  """
-
-  def __init__(self, module: str, py_object: Any, path: str):
-    self._module = module
-    self._py_object = py_object
-    self._path = path
-
-  @property
-  def full_name(self):
-    return self._module
-
-  @property
-  def short_name(self):
-    return self.full_name.split('.')[-1]
-
-  @property
-  def py_object(self):
-    return self._py_object
-
-  @property
-  def path(self):
-    return self._path
-
-  @property
-  def experimental(self):
-    return 'experimental' in self.short_name
-
-  _DEPRECATED_STRING = 'THIS FUNCTION IS DEPRECATED'
-
-  @property
-  def deprecated(self):
-    """Checks if the module is deprecated or not.
-
-    Special case is `tf.contrib`. It doesn't have the _tf_decorator attribute
-    but that module should be marked as deprecated.
-
-    Each deprecated function has a `_tf_decorator.decorator_name` attribute.
-    Check the docstring of that function to confirm if the function was
-    indeed deprecated. If a different deprecation setting was used on the
-    function, then "THIS FUNCTION IS DEPRECATED" substring won't be inserted
-    into the docstring of that function by the decorator.
-
-    Returns:
-      True if depreacted else False.
-    """
-    if doc_controls.is_deprecated(self.py_object):
-      return True
-
-    if 'tf.contrib' in self.full_name:
-      return True
-
-    try:
-      # Instead of only checking the docstring, checking for the decorator
-      # provides an additional level of certainty about the correctness of the
-      # the application of `status: deprecated`.
-      decorator_list = parser.extract_decorators(self.py_object)
-      if any('deprecat' in dec for dec in decorator_list):
-        return self._check_docstring()
-    except AttributeError:
-      pass
-
-    return False
-
-  def _check_docstring(self):
-    # Only add the deprecated status if the function is deprecated. There are
-    # other settings that should be ignored like deprecate_args, etc.
-    docstring = self.py_object.__doc__
-    return docstring is not None and self._DEPRECATED_STRING in docstring
-
-
-class Module(TocNode):
-  """Represents a single module and its children and submodules.
-
-  Attributes:
-    full_name: Name of the module.
-    short_name: The last path component.
-    py_object: Python object of the module.
-    title: Title of the module in _toc.yaml
-    path: Path to the module's page on tensorflow.org relative to
-      tensorflow.org.
-    children: List of attributes on the module.
-    submodules: List of submodules in the module.
-    experimental: Whether the module is experimental or not.
-    deprecated: Whether the module is deprecated or not.
-  """
-
-  def __init__(self, module, py_object, path):
-    super(Module, self).__init__(module, py_object, path)
-
-    self._children = []
-    self._submodules = []
-
-  @property
-  def title(self):
-    if self.full_name.count('.') > 1:
-      title = self.full_name.split('.')[-1]
-    else:
-      title = self.full_name
-    return title
-
-  @property
-  def children(self):
-    return sorted(self._children, key=lambda x: x.full_name)
-
-  @property
-  def submodules(self):
-    return self._submodules
-
-  def add_children(self, children):
-    if not isinstance(children, list):
-      children = [children]
-
-    self._children.extend(children)
-
-  def add_submodule(self, sub_mod):
-    self._submodules.append(sub_mod)
-
-
-class ModuleChild(TocNode):
-  """Represents a child of a module.
-
-  Attributes:
-    full_name: Name of the child.
-    short_name: The last path component.
-    py_object: Python object of the child.
-    title: Title of the module in _toc.yaml
-    path: Path to the module's page on tensorflow.org relative to
-      tensorflow.org.
-    experimental: Whether the module child is experimental or not.
-    deprecated: Whether the module is deprecated or not.
-  """
-
-  def __init__(self, name, py_object, parent, path):
-    self._parent = parent
-    super(ModuleChild, self).__init__(name, py_object, path)
-
-  @property
-  def title(self):
-    return self.full_name[len(self._parent) + 1:]
-
-
-class GenerateToc(object):
-  """Generates a data structure that defines the structure of _toc.yaml."""
-
-  def __init__(self, modules):
-    self._modules = modules
-
-  def _create_graph(self):
-    """Creates a graph to allow a dfs traversal on it to generate the toc.
-
-    Each graph key contains a module and its value is an object of `Module`
-    class. That module object contains a list of submodules.
-
-    Example low-level structure of the graph is as follows:
-
-    {
-      'module1': [submodule1, submodule2],
-      'submodule1': [sub1-submodule1],
-      'sub1-submodule1': [],
-      'submodule2': [],
-      'module2': [],
-      'module3': [submodule4],
-      'submodule4': [sub1-submodule4],
-      'sub1-submodule4': [sub1-sub1-submodule4],
-      'sub1-sub1-submodule4': []
-    }
-
-    Returns:
-      A tuple of (graph, base_modules). Base modules is returned because while
-      creating a nested list of dictionaries, the top level should only contain
-      the base modules.
-    """
-
-    # Sort the modules in case-insensitive alphabetical order.
-    sorted_modules = sorted(self._modules.keys(), key=lambda a: a.lower())
-    toc_base_modules = []
-
-    toc_graph = {}
-    min_dots = min(module.count('.') for module in sorted_modules)
-    min_docs = max(min_dots, 1)
-
-    for module in sorted_modules:
-      mod = self._modules[module]
-
-      # Add the module to the graph.
-      toc_graph[module] = mod
-
-      # If the module's name contains more than one dot, it is not a base level
-      # module. Hence, add it to its parents submodules list.
-      if module.count('.') > min_docs:
-        # For example, if module is `tf.keras.applications.densenet` then its
-        # parent is `tf.keras.applications`.
-        parent_module = '.'.join(module.split('.')[:-1])
-        parent_mod_obj = toc_graph.get(parent_module, None)
-        if parent_mod_obj is not None:
-          parent_mod_obj.add_submodule(mod)
-      else:
-        toc_base_modules.append(module)
-
-    return toc_graph, toc_base_modules
-
-  def _generate_children(self, mod, is_parent_deprecated):
-    """Creates a list of dictionaries containing child's title and path.
-
-    For example: The dictionary created will look this this in _toc.yaml.
-
-    ```
-    children_list = [{'title': 'Overview', 'path': '/tf/app'},
-                     {'title': 'run', 'path': '/tf/app/run'}]
-    ```
-
-    The above list will get converted to the following yaml syntax.
-
-    ```
-    - title: Overview
-      path: /tf/app
-    - title: run
-      path: /tf/app/run
-    ```
-
-    Args:
-      mod: A module object.
-      is_parent_deprecated: Bool, Whether the parent is deprecated or not.
-
-    Returns:
-      A list of dictionaries containing child's title and path.
-    """
-
-    children_list = []
-    children_list.append(
-        collections.OrderedDict([('title', 'Overview'), ('path', mod.path)]))
-
-    for child in mod.children:
-      child_yaml_content = [('title', child.title), ('path', child.path)]
-
-      # Set `status: deprecated` only if the parent's status is not
-      # deprecated.
-      if child.deprecated and not is_parent_deprecated:
-        child_yaml_content.insert(1, ('status', 'deprecated'))
-      elif child.experimental:
-        child_yaml_content.insert(1, ('status', 'experimental'))
-
-      children_list.append(collections.OrderedDict(child_yaml_content))
-
-    return children_list
-
-  def _dfs(self, mod, visited, is_parent_deprecated):
-    """Does a dfs traversal on the graph generated.
-
-    This creates a nested dictionary structure which is then dumped as .yaml
-    file. Each submodule's dictionary of title and path is nested under its
-    parent module.
-
-    For example, `tf.keras.app.net` will be nested under `tf.keras.app` which
-    will be nested under `tf.keras`. Here's how the nested dictionaries will
-    look when its dumped as .yaml.
-
-    ```
-    - title: tf.keras
-      section:
-      - title: Overview
-        path: /tf/keras
-      - title: app
-        section:
-        - title: Overview
-          path: /tf/keras/app
-        - title: net
-          section:
-          - title: Overview
-            path: /tf/keras/app/net
-    ```
-
-    The above nested structure is what the dfs traversal will create in form
-    of lists of dictionaries.
-
-    Args:
-      mod: A module object.
-      visited: A dictionary of modules visited by the dfs traversal.
-      is_parent_deprecated: Bool, Whether any parent is deprecated or not.
-
-    Returns:
-      A dictionary containing the nested data structure.
-    """
-
-    visited[mod.full_name] = True
-
-    # parent_exp is set to the current module because the current module is
-    # the parent for its children.
-    children_list = self._generate_children(
-        mod, is_parent_deprecated or mod.deprecated)
-
-    # generate for submodules within the submodule.
-    for submod in mod.submodules:
-      if not visited[submod.full_name]:
-        sub_mod_dict = self._dfs(submod, visited, is_parent_deprecated or
-                                 mod.deprecated)
-        children_list.append(sub_mod_dict)
-
-    # If the parent module is not experimental, then add the experimental
-    # status to the submodule.
-    submod_yaml_content = [('title', mod.title), ('section', children_list)]
-
-    # If the parent module is not deprecated, then add the deprecated
-    # status to the submodule. If the parent is deprecated, then setting its
-    # status to deprecated in _toc.yaml propagates to all its children and
-    # submodules.
-    if mod.deprecated and not is_parent_deprecated:
-      submod_yaml_content.insert(1, ('status', 'deprecated'))
-    elif mod.experimental:
-      submod_yaml_content.insert(1, ('status', 'experimental'))
-
-    return collections.OrderedDict(submod_yaml_content)
-
-  def generate(self) -> Dict[str, Any]:
-    """Generates the final toc.
-
-    Returns:
-      A list of dictionaries which will be dumped into .yaml file.
-    """
-
-    toc = []
-    toc_graph, toc_base_modules = self._create_graph()
-    visited = {node: False for node in toc_graph.keys()}
-
-    # Sort in alphabetical case-insensitive order.
-    toc_base_modules = sorted(toc_base_modules, key=lambda a: a.lower())
-    for module in toc_base_modules:
-      module_obj = toc_graph[module]
-      # Generate children of the base module.
-      section = self._generate_children(
-          module_obj, is_parent_deprecated=module_obj.deprecated)
-
-      # DFS traversal on the submodules.
-      for sub_mod in module_obj.submodules:
-        sub_mod_list = self._dfs(
-            sub_mod, visited, is_parent_deprecated=module_obj.deprecated)
-        section.append(sub_mod_list)
-
-      module_yaml_content = [('title', module_obj.title), ('section', section)]
-      if module_obj.deprecated:
-        module_yaml_content.insert(1, ('status', 'deprecated'))
-      elif module_obj.experimental:
-        module_yaml_content.insert(1, ('status', 'experimental'))
-
-      toc.append(collections.OrderedDict(module_yaml_content))
-
-    return {'toc': toc}
-
-
-def _get_headers(page_info: parser.PageInfo, search_hints: bool) -> List[str]:
-  """Returns the list of header lines for this page."""
-  hidden = doc_controls.should_hide_from_search(page_info.py_object)
-  brief_no_backticks = page_info.doc.brief.replace('`', '').strip()
-  headers = []
-  if brief_no_backticks:
-    headers.append(f'description: {brief_no_backticks}')
-
-  # It's easier to read if there's a blank line after the `name:value` headers.
-  if search_hints and not hidden:
-    if headers:
-      headers.append('')
-    headers.append(page_info.get_metadata_html())
-  else:
-    headers.append('robots: noindex')
-    headers.append('')
-
-  return headers
-
-
 def write_docs(
     *,
     output_dir: Union[str, pathlib.Path],
-    parser_config: parser.ParserConfig,
-    yaml_toc: bool,
+    parser_config: config.ParserConfig,
+    yaml_toc: Union[bool, Type[toc_lib.TocBuilder]],
     root_module_name: str,
     root_title: str = 'TensorFlow',
     search_hints: bool = True,
     site_path: str = 'api_docs/python',
     gen_redirects: bool = True,
     gen_report: bool = True,
-    extra_docs: Optional[Dict[int, str]] = None,
+    extra_docs: Optional[dict[int, str]] = None,
+    page_builder_classes: Optional[docs_for_object.PageBuilderDict] = None,
 ):
   """Write previously extracted docs to disk.
 
@@ -479,7 +80,7 @@ def write_docs(
   Args:
     output_dir: Directory to write documentation markdown files to. Will be
       created if it doesn't exist.
-    parser_config: A `parser.ParserConfig` object, containing all the necessary
+    parser_config: A `config.ParserConfig` object, containing all the necessary
       indices.
     yaml_toc: Set to `True` to generate a "_toc.yaml" file.
     root_module_name: (str) the name of the root module (`tf` for tensorflow).
@@ -495,6 +96,8 @@ def write_docs(
     extra_docs: To add docs for a particular object instance set it's __doc__
       attribute. For some classes (list, tuple, etc) __doc__ is not writable.
       Pass those docs like: `extra_docs={id(obj): "docs"}`
+    page_builder_classes: A optional dict of `{ObjectType:Type[PageInfo]}` for
+      overriding the default page builder classes.
 
   Raises:
     ValueError: if `output_dir` is not an absolute path
@@ -508,80 +111,50 @@ def write_docs(
                      f"    output_dir='{output_dir}'")
   output_dir.mkdir(parents=True, exist_ok=True)
 
-  # These dictionaries are used for table-of-contents generation below
-  # They will contain, after the for-loop below::
-  #  - module name(string):classes and functions the module contains(list)
-  module_children = {}
-
   # Collect redirects for an api _redirects.yaml file.
   redirects = []
 
+  api_report = None
   if gen_report:
-    api_report_obj = utils.ApiReport()
+    api_report = utils.ApiReport()
 
   # Parse and write Markdown pages, resolving cross-links (`tf.symbol`).
   num_docs_output = 0
-  for full_name in sorted(parser_config.index.keys(), key=lambda k: k.lower()):
-    py_object = parser_config.index[full_name]
+  for api_node in parser_config.api_tree.iter_nodes():
+    _LOGGER.debug('generate_lib.write_docs')
+    _LOGGER.debug('  full_name: %s', api_node.full_name)
 
-    if full_name in parser_config.duplicate_of:
-      continue
+    full_name = api_node.full_name
 
-    # Methods constants are only documented only as part of their parent's page.
-    if parser_config.reference_resolver.is_fragment(full_name):
+    if api_node.output_type() is api_node.OutputType.FRAGMENT:
       continue
 
-    # Remove the extension from the path.
-    docpath, _ = os.path.splitext(parser.documentation_path(full_name))
-
-    # For a module, remember the module for the table-of-contents
-    if inspect.ismodule(py_object):
-      if full_name in parser_config.tree:
-        mod_obj = Module(
-            module=full_name,
-            py_object=py_object,
-            path=str(site_path / docpath))
-        module_children[full_name] = mod_obj
-    # For something else that's documented,
-    # figure out what module it lives in
-    else:
-      subname = str(full_name)
-      while True:
-        subname = subname[:subname.rindex('.')]
-        if inspect.ismodule(parser_config.index[subname]):
-          module_name = parser_config.duplicate_of.get(subname, subname)
-          child_mod = ModuleChild(
-              name=full_name,
-              py_object=py_object,
-              parent=module_name,
-              path=str(site_path / docpath))
-          module_children[module_name].add_children(child_mod)
-          break
-
     # Generate docs for `py_object`, resolving references.
     try:
-      page_info = parser.docs_for_object(full_name, py_object, parser_config,
-                                         extra_docs)
-      if gen_report and not full_name.startswith(
+      page_info = docs_for_object.docs_for_object(
+          api_node=api_node,
+          parser_config=parser_config,
+          extra_docs=extra_docs,
+          search_hints=search_hints,
+          page_builder_classes=page_builder_classes)
+
+      if api_report is not None and not full_name.startswith(
           ('tf.compat.v', 'tf.keras.backend', 'tf.numpy',
            'tf.experimental.numpy')):
-        api_report_obj.fill_metrics(page_info)
+        api_report.fill_metrics(page_info)
     except Exception as e:
       raise ValueError(
           f'Failed to generate docs for symbol: `{full_name}`') from e
 
     path = output_dir / parser.documentation_path(full_name)
 
-    content = _get_headers(page_info, search_hints)
-    content.append(pretty_docs.build_md_page(page_info))
-    text = '\n'.join(content)
     try:
       path.parent.mkdir(exist_ok=True, parents=True)
-      path.write_text(text, encoding='utf-8')
+      path.write_text(page_info.page_text, encoding='utf-8')
       num_docs_output += 1
-    except OSError:
+    except OSError as e:
       raise OSError('Cannot write documentation for '
-                    f'{full_name} to {path.parent}')
+                    f'{full_name} to {path.parent}') from e
 
     duplicates = parser_config.duplicates.get(full_name, [])
     if not duplicates:
@@ -595,39 +168,26 @@ def write_docs(
         to_path = site_path / full_name.replace('.', '/')
         redirects.append({'from': str(from_path), 'to': str(to_path)})
 
-  if gen_report:
-    serialized_proto = api_report_obj.api_report.SerializeToString()
-    raw_proto = output_dir / root_module_name / 'api_report.pb'
-    raw_proto.write_bytes(serialized_proto)
+  if api_report is not None:
+    api_report.write(output_dir / root_module_name / 'api_report.pb')
 
   if num_docs_output <= 1:
-    raise ValueError('The `DocGenerator` failed to generate any docs. Verify '
-                     'your arguments (`base_dir` and `callbacks`). '
-                     'Everything you want documented should be within '
-                     '`base_dir`.')
+    raise ValueError(
+        'The `DocGenerator` failed to generate any docs. Verify '
+        'your arguments (`base_dir` and `callbacks`). '
+        'Everything you want documented should be within '
+        '`base_dir`.'
+    )
 
   if yaml_toc:
-    toc_gen = GenerateToc(module_children)
-    toc_dict = toc_gen.generate()
-
-    # Replace the overview path *only* for 'TensorFlow' to
-    # `/api_docs/python/tf_overview`. This will be redirected to
-    # `/api_docs/python/tf`.
-    toc_values = toc_dict['toc'][0]
-    if toc_values['title'] == 'tf':
-      section = toc_values['section'][0]
-      section['path'] = str(site_path / 'tf_overview')
+    if isinstance(yaml_toc, bool):
+      yaml_toc = toc_lib.FlatModulesTocBuilder
+    toc = yaml_toc(site_path).build(parser_config.api_tree)
 
-    leftnav_toc = output_dir / root_module_name / '_toc.yaml'
-    with open(leftnav_toc, 'w') as toc_file:
-      yaml.dump(toc_dict, toc_file, default_flow_style=False)
+    toc_path = output_dir / root_module_name / '_toc.yaml'
+    toc.write(toc_path)
 
   if redirects and gen_redirects:
-    if yaml_toc and toc_values['title'] == 'tf':
-      redirects.append({
-          'from': str(site_path / 'tf_overview'),
-          'to': str(site_path / 'tf'),
-      })
     redirects_dict = {
         'redirects': sorted(redirects, key=lambda redirect: redirect['from'])
     }
@@ -653,11 +213,13 @@ def add_dict_to_dict(add_from, add_to):
       add_to[key] = add_from[key]
 
 
-def extract(py_modules,
-            base_dir,
-            private_map,
-            visitor_cls=doc_generator_visitor.DocGeneratorVisitor,
-            callbacks=None):
+def extract(
+    py_modules,
+    visitor_cls: Type[
+        doc_generator_visitor.DocGeneratorVisitor
+    ] = doc_generator_visitor.DocGeneratorVisitor,
+    filters: Optional[public_api.ApiFilter] = None,
+):
   """Walks the module contents, returns an index of all visited objects.
 
   The return value is an instance of `self._visitor_cls`, usually:
@@ -666,132 +228,56 @@ def extract(py_modules,
   Args:
     py_modules: A list containing a single (short_name, module_object) pair.
       like `[('tf',tf)]`.
-    base_dir: The package root directory. Nothing defined outside of this
-      directory is documented.
-    private_map: A {'path':["name"]} dictionary listing particular object
-      locations that should be ignored in the doc generator.
     visitor_cls: A class, typically a subclass of
       `doc_generator_visitor.DocGeneratorVisitor` that acumulates the indexes of
       objects to document.
-    callbacks: Additional callbacks passed to `traverse`. Executed between the
-      `PublicApiFilter` and the accumulator (`DocGeneratorVisitor`). The
-      primary use case for these is to filter the list of children (see:
-      `public_api.local_definitions_filter`)
+    filters: Filters passed to `traverse`. Executed before the accumulator
+      (`DocGeneratorVisitor`). These filter the list of children.
 
   Returns:
     The accumulator (`DocGeneratorVisitor`)
   """
-  if callbacks is None:
-    callbacks = []
+  if filters is None:
+    filters = []
 
   if len(py_modules) != 1:
     raise ValueError("only pass one [('name',module)] pair in py_modules")
   short_name, py_module = py_modules[0]
 
-  api_filter = public_api.PublicAPIFilter(
-      base_dir=base_dir,
-      private_map=private_map)
-
   accumulator = visitor_cls()
+  traverse.traverse(py_module, filters, accumulator, root_name=short_name)
 
-  # The objects found during traversal, and their children are passed to each
-  # of these visitors in sequence. Each visitor returns the list of children
-  # to be passed to the next visitor.
-  visitors = [api_filter, public_api.ignore_typing] + callbacks + [accumulator]
-
-  traverse.traverse(py_module, visitors, short_name)
-
+  accumulator.build()
   return accumulator
 
 
 EXCLUDED = set(['__init__.py', 'OWNERS', 'README.txt'])
 
 
-def replace_refs(
-    src_dir: str,
-    output_dir: str,
-    reference_resolvers: List[parser.ReferenceResolver],
-    api_docs_relpath: List[str],
-    file_pattern: str = '*.md',
-):
-  """Link `tf.symbol` references found in files matching `file_pattern`.
-
-  A matching directory structure, with the modified files is
-  written to `output_dir`.
-
-  `{"__init__.py","OWNERS","README.txt"}` are skipped.
-
-  Files not matching `file_pattern` (using `fnmatch`) are copied with no change.
-
-  Also, files in the `api_guides/python` directory get explicit ids set on all
-  heading-2s to ensure back-links work.
-
-  Args:
-    src_dir: The directory to convert files from.
-    output_dir: The root directory to write the resulting files to.
-    reference_resolvers: A list of `parser.ReferenceResolver` to make the
-      replacements.
-    api_docs_relpath: List of relative-path strings to the api_docs from the
-      src_dir for each reference_resolver.
-    file_pattern: Only replace references in files matching file_patters, using
-      `fnmatch`. Non-matching files are copied unchanged.
-  """
-
-  # Iterate through all the source files and process them.
-  for dirpath, _, filenames in os.walk(src_dir):
-    depth = os.path.relpath(src_dir, start=dirpath)
-    # Make the directory under output_dir.
-    new_dir = os.path.join(output_dir,
-                           os.path.relpath(path=dirpath, start=src_dir))
-    if not os.path.exists(new_dir):
-      os.makedirs(new_dir)
-
-    for base_name in filenames:
-      if base_name in EXCLUDED:
-        continue
-      full_in_path = os.path.join(dirpath, base_name)
-
-      suffix = os.path.relpath(path=full_in_path, start=src_dir)
-      full_out_path = os.path.join(output_dir, suffix)
-      # Copy files that do not match the file_pattern, unmodified.
-      if not fnmatch.fnmatch(base_name, file_pattern):
-        if full_in_path != full_out_path:
-          shutil.copyfile(full_in_path, full_out_path)
-        continue
-
-      with open(full_in_path, 'rb') as f:
-        content = f.read().decode('utf-8')
-
-      for resolver, rel_path in zip(reference_resolvers, api_docs_relpath):
-        # If `rel_path` is an absolute path, `depth` is just discarded.
-        relative_path_to_root = os.path.join(depth, rel_path)
-        content = resolver.replace_references(content, relative_path_to_root)
-
-      with open(full_out_path, 'wb') as f:
-        f.write((content + '\n').encode('utf-8'))
-
-
 class DocGenerator:
   """Main entry point for generating docs."""
 
   def __init__(
       self,
+      *,
       root_title: str,
-      py_modules: Sequence[Tuple[str, Any]],
+      py_modules: Sequence[tuple[str, Any]],
       base_dir: Optional[Sequence[Union[str, pathlib.Path]]] = None,
-      code_url_prefix: Union[str, Sequence[str]] = (),
+      code_url_prefix: Union[Optional[str], Sequence[Optional[str]]] = (),
+      self_link_base: Optional[str] = None,
       search_hints: bool = True,
       site_path: str = 'api_docs/python',
-      private_map: Optional[Dict[str, str]] = None,
-      visitor_cls: Type[
-          doc_generator_visitor.DocGeneratorVisitor] = doc_generator_visitor
-      .DocGeneratorVisitor,
+      private_map: Optional[dict[str, str]] = None,
+      visitor_cls: Type[doc_generator_visitor.DocGeneratorVisitor] = (
+          doc_generator_visitor.DocGeneratorVisitor
+      ),
       api_cache: bool = True,
-      callbacks: Optional[List[public_api.ApiFilter]] = None,
-      yaml_toc: bool = True,
+      callbacks: Optional[list[public_api.ApiFilter]] = None,
+      yaml_toc: Union[bool, Type[toc_lib.TocBuilder]] = True,
       gen_redirects: bool = True,
       gen_report: bool = True,
-      extra_docs: Optional[Dict[int, str]] = None,
+      extra_docs: Optional[dict[int, str]] = None,
+      page_builder_classes: Optional[docs_for_object.PageBuilderDict] = None,
   ):
     """Creates a doc-generator.
 
@@ -805,12 +291,15 @@ def __init__(
         in" paths. These are zipped with `base-dir`, to set the `defined_in`
         path for each file. The defined in link for `{base_dir}/path/to/file` is
         set to `{code_url_prefix}/path/to/file`.
+      self_link_base: A string. A URL prefix pre-pend to self-links to the
+        generated docs pages. Optional, if no `self_link_base` is supplied, no
+        self-link will be added.
       search_hints: Bool. Include metadata search hints at the top of each file.
       site_path: Path prefix in the "_toc.yaml"
       private_map: DEPRECATED. Use `api_generator.doc_controls`, or pass a
-        filter to the `callbacks` argument. A
-        `{"module.path.to.object": ["names"]}` dictionary. Specific
-        aliases that should not be shown in the resulting docs.
+        filter to the `callbacks` argument. A `{"module.path.to.object":
+        ["names"]}` dictionary. Specific aliases that should not be shown in the
+        resulting docs.
       visitor_cls: An option to override the default visitor class
         `doc_generator_visitor.DocGeneratorVisitor`.
       api_cache: Bool. Generate an api_cache file. This is used to easily add
@@ -827,6 +316,8 @@ def __init__(
       extra_docs: To add docs for a particular object instance set it's __doc__
         attribute. For some classes (list, tuple, etc) __doc__ is not writable.
         Pass those docs like: `extra_docs={id(obj): "docs"}`
+      page_builder_classes: An optional dict of `{ObjectType:Type[PageInfo]}`
+        for overriding the default page builder classes.
     """
     self._root_title = root_title
     self._py_modules = py_modules
@@ -845,17 +336,20 @@ def __init__(
     if not self._base_dir:
       raise ValueError('`base_dir` cannot be empty')
 
-    if isinstance(code_url_prefix, str):
+    if isinstance(code_url_prefix, str) or code_url_prefix is None:
       code_url_prefix = (code_url_prefix,)
     self._code_url_prefix = tuple(code_url_prefix)
     if not self._code_url_prefix:
       raise ValueError('`code_url_prefix` cannot be empty')
 
     if len(self._code_url_prefix) != len(base_dir):
-      raise ValueError('The `base_dir` list should have the same number of '
-                       'elements as the `code_url_prefix` list (they get '
-                       'zipped together).')
+      raise ValueError(
+          'The `base_dir` list should have the same number of '
+          'elements as the `code_url_prefix` list (they get '
+          'zipped together).'
+      )
 
+    self._self_link_base = self_link_base
     self._search_hints = search_hints
     self._site_path = site_path
     self._private_map = private_map or {}
@@ -868,36 +362,61 @@ def __init__(
     self._gen_redirects = gen_redirects
     self._gen_report = gen_report
     self._extra_docs = extra_docs
+    self._page_builder_classes = page_builder_classes
 
   def make_reference_resolver(self, visitor):
-    return parser.ReferenceResolver.from_visitor(
-        visitor, py_module_names=[self._short_name])
+    return reference_resolver_lib.ReferenceResolver.from_visitor(
+        visitor, py_module_names={self._short_name: self._py_module.__name__})
 
-  def make_parser_config(self, visitor, reference_resolver):
-    return parser.ParserConfig(
+  def make_parser_config(self,
+                         visitor: doc_generator_visitor.DocGeneratorVisitor):
+    reference_resolver = self.make_reference_resolver(visitor)
+    return config.ParserConfig(
         reference_resolver=reference_resolver,
         duplicates=visitor.duplicates,
         duplicate_of=visitor.duplicate_of,
         tree=visitor.tree,
         index=visitor.index,
         reverse_index=visitor.reverse_index,
+        path_tree=visitor.path_tree,
+        api_tree=visitor.api_tree,
         base_dir=self._base_dir,
-        code_url_prefix=self._code_url_prefix)
+        code_url_prefix=self._code_url_prefix,
+        self_link_base=self._self_link_base,
+    )
 
-  def run_extraction(self):
+  def make_default_filters(self) -> list[public_api.ApiFilter]:
+    # The objects found during traversal, and their children are passed to each
+    # of these filters in sequence. Each visitor returns the list of children
+    # to be passed to the next visitor.
+    return [
+        # filter the api.
+        public_api.FailIfNestedTooDeep(10),
+        public_api.filter_module_all,
+        public_api.add_proto_fields,
+        public_api.filter_private_symbols,
+        public_api.FilterBaseDirs(self._base_dir),
+        public_api.FilterPrivateMap(self._private_map),
+        public_api.filter_doc_controls_skip,
+        public_api.ignore_typing,
+    ]
+
+  def run_extraction(self) -> config.ParserConfig:
     """Walks the module contents, returns an index of all visited objects.
 
-    The return value is an instance of `self._visitor_cls`, usually:
-    `doc_generator_visitor.DocGeneratorVisitor`
-
     Returns:
+        An instance of `parser_config.ParserConfig`.
     """
-    return extract(
+    default_filters = self.make_default_filters()
+    visitor = extract(
         py_modules=self._py_modules,
-        base_dir=self._base_dir,
-        private_map=self._private_map,
         visitor_cls=self._visitor_cls,
-        callbacks=self._callbacks)
+        filters=default_filters + self._callbacks,
+    )
+
+    # Write the api docs.
+    parser_config = self.make_parser_config(visitor)
+    return parser_config
 
   def build(self, output_dir):
     """Build all the docs.
@@ -912,19 +431,7 @@ def build(self, output_dir):
     workdir = pathlib.Path(tempfile.mkdtemp())
 
     # Extract the python api from the _py_modules
-    visitor = self.run_extraction()
-    reference_resolver = self.make_reference_resolver(visitor)
-    # Replace all the `tf.symbol` references in the workdir.
-    replace_refs(
-        src_dir=str(workdir),
-        output_dir=str(workdir),
-        reference_resolvers=[reference_resolver],
-        api_docs_relpath=['api_docs'],
-        file_pattern='*.md',
-    )
-
-    # Write the api docs.
-    parser_config = self.make_parser_config(visitor, reference_resolver)
+    parser_config = self.run_extraction()
     work_py_dir = workdir / 'api_docs/python'
     write_docs(
         output_dir=str(work_py_dir),
@@ -937,18 +444,15 @@ def build(self, output_dir):
         gen_redirects=self._gen_redirects,
         gen_report=self._gen_report,
         extra_docs=self._extra_docs,
+        page_builder_classes=self._page_builder_classes,
     )
 
     if self.api_cache:
-      reference_resolver.to_json_file(
+      parser_config.reference_resolver.to_json_file(
           str(work_py_dir / self._short_name.replace('.', '/') /
               '_api_cache.json'))
 
-    try:
-      os.makedirs(output_dir)
-    except OSError as e:
-      if e.strerror != 'File exists':
-        raise
+    os.makedirs(output_dir, exist_ok=True)
 
     # Typical results are something like:
     #
diff --git a/tools/tensorflow_docs/api_generator/generate_lib_test.py b/tools/tensorflow_docs/api_generator/generate_lib_test.py
index b34c286c477..5673cab3f42 100644
--- a/tools/tensorflow_docs/api_generator/generate_lib_test.py
+++ b/tools/tensorflow_docs/api_generator/generate_lib_test.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -20,13 +19,17 @@
 import sys
 import tempfile
 import textwrap
+import types
 
 from absl import flags
 from absl.testing import absltest
 
+from tensorflow_docs.api_generator import config
 from tensorflow_docs.api_generator import doc_controls
 from tensorflow_docs.api_generator import generate_lib
 from tensorflow_docs.api_generator import parser
+from tensorflow_docs.api_generator import reference_resolver as reference_resolver_lib
+from tensorflow_docs.api_generator.pretty_docs import function_page
 
 import yaml
 
@@ -75,59 +78,21 @@ def setUp(self):
   def get_test_objects(self):
     # These are all mutable objects, so rebuild them for each test.
     # Don't cache the objects.
-    module = sys.modules[__name__]
-
-    index = {
-        'tf':
-            sys,  # Can be any module, this test doesn't care about content.
-        'tf.TestModule':
-            module,
-        'tf.test_function':
-            test_function,
-        'tf.TestModule.test_function':
-            test_function,
-        'tf.TestModule.TestClass':
-            TestClass,
-        'tf.TestModule.TestClass.ChildClass':
-            TestClass.ChildClass,
-        'tf.TestModule.TestClass.ChildClass.GrandChildClass':
-            TestClass.ChildClass.GrandChildClass,
-    }
-
-    tree = {
-        'tf': ['TestModule', 'test_function'],
-        'tf.TestModule': ['test_function', 'TestClass'],
-        'tf.TestModule.TestClass': ['ChildClass'],
-        'tf.TestModule.TestClass.ChildClass': ['GrandChildClass'],
-        'tf.TestModule.TestClass.ChildClass.GrandChildClass': []
-    }
-
-    duplicate_of = {'tf.test_function': 'tf.TestModule.test_function'}
-
-    duplicates = {
-        'tf.TestModule.test_function': [
-            'tf.test_function', 'tf.TestModule.test_function'
-        ]
-    }
-
-    base_dir = os.path.dirname(__file__)
-
-    visitor = DummyVisitor(index, duplicate_of)
-
-    reference_resolver = parser.ReferenceResolver.from_visitor(
-        visitor=visitor, py_module_names=['tf'])
-
-    parser_config = parser.ParserConfig(
-        reference_resolver=reference_resolver,
-        duplicates=duplicates,
-        duplicate_of=duplicate_of,
-        tree=tree,
-        index=index,
-        reverse_index={},
-        base_dir=base_dir,
-        code_url_prefix='/')
-
-    return reference_resolver, parser_config
+    tf = types.ModuleType('tf')
+    tf.__file__ = __file__
+    tf.TestModule = types.ModuleType('module')
+    tf.test_function = test_function
+    tf.TestModule.test_function = test_function
+    tf.TestModule.TestClass = TestClass
+
+    generator = generate_lib.DocGenerator(
+        root_title='TensorFlow',
+        py_modules=[('tf', tf)],
+        code_url_prefix='https://tensorflow.org/')
+
+    parser_config = generator.run_extraction()
+
+    return parser_config.reference_resolver, parser_config
 
   def test_write(self):
     _, parser_config = self.get_test_objects()
@@ -149,9 +114,6 @@ def test_write(self):
             'redirects': [{
                 'from': '/api_docs/python/tf/test_function',
                 'to': '/api_docs/python/tf/TestModule/test_function'
-            }, {
-                'from': '/api_docs/python/tf_overview',
-                'to': '/api_docs/python/tf'
             }]
         })
 
@@ -184,122 +146,6 @@ def test_write(self):
     # Make sure that duplicates are not written
     self.assertTrue((output_dir / 'tf/TestModule/test_function.md').exists())
 
-  def test_replace_refes(self):
-    test_dir = self.workdir
-    test_in_dir = os.path.join(test_dir, 'in')
-    test_in_dir_a = os.path.join(test_dir, 'in/a')
-    test_in_dir_b = os.path.join(test_dir, 'in/b')
-    os.makedirs(test_in_dir)
-    os.makedirs(test_in_dir_a)
-    os.makedirs(test_in_dir_b)
-
-    test_out_dir = os.path.join(test_dir, 'out')
-    os.makedirs(test_out_dir)
-
-    test_path1 = os.path.join(test_in_dir_a, 'file1.md')
-    test_path2 = os.path.join(test_in_dir_b, 'file2.md')
-    test_path3 = os.path.join(test_in_dir_b, 'file3.notmd')
-    test_path4 = os.path.join(test_in_dir_b, 'OWNERS')
-
-    with open(test_path1, 'w') as f:
-      f.write('Use `tf.test_function` to test things.')
-
-    with open(test_path2, 'w') as f:
-      f.write('Use `tf.TestModule.TestClass.ChildClass` to test things.\n'
-              "`tf.whatever` doesn't exist")
-
-    with open(test_path3, 'w') as f:
-      file3_content = (
-          'Not a .md file. Should be copied unchanged:'
-          '`tf.TestModule.TestClass.ChildClass`, `tf.test_function`')
-      f.write(file3_content)
-
-    with open(test_path4, 'w') as f:
-      f.write('')
-
-    reference_resolver, _ = self.get_test_objects()
-    generate_lib.replace_refs(test_in_dir, test_out_dir, [reference_resolver],
-                              ['api_docs/python'], '*.md')
-
-    with open(os.path.join(test_out_dir, 'a/file1.md')) as f:
-      content = f.read()
-      self.assertEqual(
-          content,
-          'Use <a href="../api_docs/python/tf/TestModule/test_function.md">'
-          '<code>tf.test_function</code></a> to test things.\n')
-
-    with open(os.path.join(test_out_dir, 'b/file2.md')) as f:
-      content = f.read()
-      self.assertEqual(
-          content, 'Use '
-          '<a href="../api_docs/python/tf/TestModule/TestClass/ChildClass.md">'
-          '<code>tf.TestModule.TestClass.ChildClass</code></a> '
-          'to test things.\n'
-          '`tf.whatever` doesn\'t exist\n')
-
-    with open(os.path.join(test_out_dir, 'b/file3.notmd')) as f:
-      content = f.read()
-      self.assertEqual(content, file3_content)
-
-    with self.assertRaises(IOError):
-      # This should fail. The OWNERS file should not be copied
-      with open(os.path.join(test_out_dir, 'b/OWNERS')) as f:
-        content = f.read()
-
-  def _get_test_page_info(self):
-    page_info = parser.FunctionPageInfo(
-        full_name='abc', py_object=test_function)
-    docstring_info = parser._DocstringInfo(
-        brief='hello `tensorflow`',
-        docstring_parts=['line1', 'line2'],
-        compatibility={})
-    page_info.set_doc(docstring_info)
-    return page_info
-
-  def test_get_headers_global_hints(self):
-    page_info = self._get_test_page_info()
-    result = '\n'.join(generate_lib._get_headers(page_info, search_hints=True))
-
-    expected = textwrap.dedent("""\
-      description: hello tensorflow
-
-      <div itemscope itemtype="http://developers.google.com/ReferenceObject">
-      <meta itemprop="name" content="abc" />
-      <meta itemprop="path" content="Stable" />
-      </div>
-      """)
-
-    self.assertEqual(expected, result)
-
-  def test_get_headers_global_no_hints(self):
-    page_info = self._get_test_page_info()
-    result = '\n'.join(generate_lib._get_headers(page_info, search_hints=False))
-
-    expected = textwrap.dedent("""\
-      description: hello tensorflow
-      robots: noindex
-      """)
-
-    self.assertEqual(expected, result)
-
-  def test_get_headers_local_no_hints(self):
-    page_info = self._get_test_page_info()
-
-    @doc_controls.hide_from_search
-    def py_object():
-      pass
-
-    page_info.py_object = py_object
-
-    result = '\n'.join(generate_lib._get_headers(page_info, search_hints=True))
-
-    expected = textwrap.dedent("""\
-      description: hello tensorflow
-      robots: noindex
-      """)
-
-    self.assertEqual(expected, result)
-
 
 if __name__ == '__main__':
   absltest.main()
diff --git a/tools/tensorflow_docs/api_generator/get_source.py b/tools/tensorflow_docs/api_generator/get_source.py
new file mode 100644
index 00000000000..8ea0b02014a
--- /dev/null
+++ b/tools/tensorflow_docs/api_generator/get_source.py
@@ -0,0 +1,55 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Simple get_source."""
+import ast
+import inspect
+import textwrap
+
+from typing import Any, Optional, Sequence, Tuple
+
+
+def get_ast(py_object) -> Optional[ast.AST]:
+  if isinstance(py_object, str):
+    source = textwrap.dedent(py_object)
+  else:
+    source = get_source(py_object)
+  if source is None:
+    return None
+
+  try:
+    return ast.parse(source)
+  except Exception:  # pylint: disable=broad-except
+    return None
+
+
+def get_source(py_object: Any) -> Optional[str]:
+  if py_object is not None:
+    try:
+      return textwrap.dedent(inspect.getsource(py_object))
+    except Exception:  # pylint: disable=broad-except
+      # A wide-variety of errors can be thrown here.
+      pass
+  return None
+
+
+def get_source_lines(
+    py_object: Any) -> Tuple[Optional[Sequence[str]], Optional[int]]:
+  if py_object is not None:
+    try:
+      return inspect.getsourcelines(py_object)
+    except Exception:  # pylint: disable=broad-except
+      # A wide-variety of errors can be thrown here.
+      pass
+  return None, None
diff --git a/tools/tensorflow_docs/api_generator/obj_type.py b/tools/tensorflow_docs/api_generator/obj_type.py
new file mode 100644
index 00000000000..fb1b36a05ad
--- /dev/null
+++ b/tools/tensorflow_docs/api_generator/obj_type.py
@@ -0,0 +1,51 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Turn Python docstrings into Markdown for TensorFlow documentation."""
+
+
+import enum
+import inspect
+
+from typing import Any
+
+
+class ObjType(enum.Enum):
+  """Enum to standardize object type checks."""
+  TYPE_ALIAS = 'type_alias'
+  MODULE = 'module'
+  CLASS = 'class'
+  CALLABLE = 'callable'
+  # properties or any `descriptor`
+  PROPERTY = 'property'
+  OTHER = 'other'
+
+  @classmethod
+  def get(cls, py_obj: Any) -> 'ObjType':
+    """Get the `ObjType` for the `py_object`."""
+    if (getattr(py_obj, '__args__', None) and
+        getattr(py_obj, '__origin__', None)):
+      return cls.TYPE_ALIAS
+    elif inspect.ismodule(py_obj):
+      return cls.MODULE
+    elif inspect.isclass(py_obj):
+      return cls.CLASS
+    elif callable(py_obj):
+      return cls.CALLABLE
+    elif hasattr(py_obj, '__get__'):
+      # This handles any descriptor not only properties.
+      # https://docs.python.org/3/howto/descriptor.html
+      return cls.PROPERTY
+    else:
+      return cls.OTHER
diff --git a/tools/tensorflow_docs/api_generator/parser.py b/tools/tensorflow_docs/api_generator/parser.py
index c2fd9779be5..f3d087bc6fc 100644
--- a/tools/tensorflow_docs/api_generator/parser.py
+++ b/tools/tensorflow_docs/api_generator/parser.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -15,111 +14,23 @@
 # ==============================================================================
 """Turn Python docstrings into Markdown for TensorFlow documentation."""
 
-import ast
-import collections
 import dataclasses
 import enum
-import functools
-import html
 import inspect
-import itertools
-import json
-import os
+import pathlib
+import posixpath
 import pprint
 import re
 import textwrap
 import typing
 
-from typing import Any, Dict, List, Tuple, Iterable, NamedTuple, Optional, Union
+from typing import Any, Dict, List, Tuple, Iterable, Optional, Union
 
-import astor
-
-from tensorflow_docs.api_generator import doc_controls
+from tensorflow_docs.api_generator import config
 from tensorflow_docs.api_generator import doc_generator_visitor
-from tensorflow_docs.api_generator import public_api
-
-from google.protobuf.message import Message as ProtoMessage
-
-
-class ObjType(enum.Enum):
-  """Enum to standardize object type checks."""
-  TYPE_ALIAS = 'type_alias'
-  MODULE = 'module'
-  CLASS = 'class'
-  CALLABLE = 'callable'
-  PROPERTY = 'property'
-  OTHER = 'other'
-
-
-def get_obj_type(py_obj: Any) -> ObjType:
-  """Get the `ObjType` for the `py_object`."""
-  if getattr(py_obj, '__args__', None) and getattr(py_obj, '__origin__', None):
-    return ObjType.TYPE_ALIAS
-  elif inspect.ismodule(py_obj):
-    return ObjType.MODULE
-  elif inspect.isclass(py_obj):
-    return ObjType.CLASS
-  elif callable(py_obj):
-    return ObjType.CALLABLE
-  elif isinstance(py_obj, property):
-    return ObjType.PROPERTY
-  else:
-    return ObjType.OTHER
-
-
-class ParserConfig(object):
-  """Stores all indexes required to parse the docs."""
-
-  def __init__(self, reference_resolver, duplicates, duplicate_of, tree, index,
-               reverse_index, base_dir, code_url_prefix):
-    """Object with the common config for docs_for_object() calls.
-
-    Args:
-      reference_resolver: An instance of ReferenceResolver.
-      duplicates: A `dict` mapping fully qualified names to a set of all aliases
-        of this name. This is used to automatically generate a list of all
-        aliases for each name.
-      duplicate_of: A map from duplicate names to preferred names of API
-        symbols.
-      tree: A `dict` mapping a fully qualified name to the names of all its
-        members. Used to populate the members section of a class or module page.
-      index: A `dict` mapping full names to objects.
-      reverse_index: A `dict` mapping object ids to full names.
-      base_dir: A base path that is stripped from file locations written to the
-        docs.
-      code_url_prefix: A Url to pre-pend to the links to file locations.
-    """
-    self.reference_resolver = reference_resolver
-    self.duplicates = duplicates
-    self.duplicate_of = duplicate_of
-    self.tree = tree
-    self.reverse_index = reverse_index
-    self.index = index
-    self.base_dir = base_dir
-    self.code_url_prefix = code_url_prefix
-
-  def py_name_to_object(self, full_name):
-    """Return the Python object for a Python symbol name."""
-    return self.index[full_name]
-
-
-@dataclasses.dataclass
-class _FileLocation(object):
-  """This class indicates that the object is defined in a regular file.
-
-  This can be used for the `defined_in` slot of the `PageInfo` objects.
-  """
-
-  base_url: Optional[str] = None
-  start_line: Optional[int] = None
-  end_line: Optional[int] = None
-
-  @property
-  def url(self) -> Optional[str]:
-    if self.start_line and self.end_line:
-      if 'github.com' in self.base_url:
-        return f'{self.base_url}#L{self.start_line}-L{self.end_line}'
-    return self.base_url
+from tensorflow_docs.api_generator import get_source
+from tensorflow_docs.api_generator import obj_type as obj_type_lib
+from tensorflow_docs.api_generator import signature as signature_lib
 
 
 def is_class_attr(full_name, index):
@@ -133,16 +44,12 @@ def is_class_attr(full_name, index):
     True if the object is a class attribute.
   """
   parent_name = full_name.rsplit('.', 1)[0]
-  if inspect.isclass(index[parent_name]):
+  if inspect.isclass(index[parent_name]):  # pytype: disable=not-supported-yet
     return True
 
   return False
 
 
-class TFDocsError(Exception):
-  pass
-
-
 def documentation_path(full_name, is_fragment=False):
   """Returns the file path for the documentation for the given API symbol.
 
@@ -153,7 +60,7 @@ def documentation_path(full_name, is_fragment=False):
 
   Args:
     full_name: Fully qualified name of a library symbol.
-    is_fragment: If `False` produce a direct markdown link (`tf.a.b.c` -->
+    is_fragment: If `False` produce a page link (`tf.a.b.c` -->
       `tf/a/b/c.md`). If `True` produce fragment link, `tf.a.b.c` -->
       `tf/a/b.md#c`
 
@@ -164,7 +71,7 @@ def documentation_path(full_name, is_fragment=False):
   if is_fragment:
     parts, fragment = parts[:-1], parts[-1]
 
-  result = os.path.join(*parts) + '.md'
+  result = posixpath.join(*parts) + '.md'
 
   if is_fragment:
     result = result + '#' + fragment
@@ -182,20 +89,35 @@ def _get_raw_docstring(py_object):
   Returns:
     The docstring, or the empty string if no docstring was found.
   """
+  obj_type = obj_type_lib.ObjType.get(py_object)
 
-  if get_obj_type(py_object) is ObjType.TYPE_ALIAS:
-    if inspect.getdoc(py_object) != inspect.getdoc(py_object.__origin__):
-      result = inspect.getdoc(py_object)
-    else:
+  if obj_type is obj_type_lib.ObjType.TYPE_ALIAS:
+    result = inspect.getdoc(py_object)
+    if result == inspect.getdoc(py_object.__origin__):
       result = ''
-  elif get_obj_type(py_object) is not ObjType.OTHER:
-    result = inspect.getdoc(py_object) or ''
-  else:
+  elif obj_type is obj_type_lib.ObjType.CLASS:
+    if dataclasses.is_dataclass(py_object):
+      result = _get_dataclass_docstring(py_object)
+    else:
+      result = inspect.getdoc(py_object) or ''
+      if (
+          result == inspect.getdoc(dict)
+          or result == inspect.getdoc(list)
+          or result == inspect.getdoc(tuple)
+      ):
+        result = ''
+  elif obj_type is obj_type_lib.ObjType.OTHER:
     result = ''
+  else:
+    result = inspect.getdoc(py_object) or ''
 
   if result is None:
     result = ''
 
+  lines = result.splitlines()
+  if lines and 'GENERATED' in lines[0]:
+    result = ''
+
   result = _StripTODOs()(result)
   result = _StripPylintAndPyformat()(result)
   result = _AddDoctestFences()(result + '\n')
@@ -203,6 +125,17 @@ def _get_raw_docstring(py_object):
   return result
 
 
+def _get_dataclass_docstring(py_object):
+  result = inspect.getdoc(py_object) or ''
+
+  if (result.startswith(f'{py_object.__name__}(') and result.endswith(')') and
+      '\n' not in result):
+    # This is probably an autogenerated dataclass docstring.
+    # We don't want it. These are not formatted and can be huge and unreadable.
+    result = ''
+  return result
+
+
 class _AddDoctestFences(object):
   """Adds ``` fences around doctest caret blocks >>> that don't have them."""
   CARET_BLOCK_RE = re.compile(
@@ -266,406 +199,6 @@ def __call__(self, docstring):
     return docstring
 
 
-class IgnoreLineInBlock(object):
-  """Ignores the lines in a block.
-
-  Attributes:
-    block_start: Contains the start string of a block to ignore.
-    block_end: Contains the end string of a block to ignore.
-  """
-
-  def __init__(self, block_start, block_end):
-    self._block_start = block_start
-    self._block_end = block_end
-    self._in_block = False
-
-    self._start_end_regex = re.escape(self._block_start) + r'.*?' + re.escape(
-        self._block_end)
-
-  def __call__(self, line):
-    # If start and end block are on the same line, return True.
-    if re.match(self._start_end_regex, line):
-      return True
-
-    if not self._in_block:
-      if self._block_start in line:
-        self._in_block = True
-
-    elif self._block_end in line:
-      self._in_block = False
-      # True is being returned here because the last line in the block should
-      # also be ignored.
-      return True
-
-    return self._in_block
-
-
-# ?P<...> helps to find the match by entering the group name instead of the
-# index. For example, instead of doing match.group(1) we can do
-# match.group('brackets')
-AUTO_REFERENCE_RE = re.compile(
-    r"""
-    (?P<brackets>\[.*?\])                    # find characters inside '[]'
-    |
-    `(?P<backticks>[\w\(\[\)\]\{\}.,=\s]+?)` # or find characters inside '``'
-    """,
-    flags=re.VERBOSE)
-
-
-class ReferenceResolver(object):
-  """Class for replacing `tf.symbol` references with Markdown links."""
-
-  def __init__(
-      self,
-      duplicate_of: Dict[str, str],
-      is_fragment: Dict[str, bool],
-      py_module_names: List[str],
-      site_link: Optional[str] = None,
-  ):
-    """Initializes a Reference Resolver.
-
-    Args:
-      duplicate_of: A map from duplicate names to preferred names of API
-        symbols.
-      is_fragment: A map from full names to bool for each symbol. If True the
-        object lives at a page fragment `tf.a.b.c` --> `tf/a/b#c`. If False
-        object has a page to itself: `tf.a.b.c` --> `tf/a/b/c`.
-      py_module_names: A list of string names of Python modules.
-      site_link: The website to which these symbols should link to. A prefix
-        is added before the links to enable cross-site linking if `site_link`
-        is not None.
-    """
-    self._duplicate_of = duplicate_of
-    self._is_fragment = is_fragment
-    self._py_module_names = py_module_names
-    self._site_link = site_link
-
-    self._all_names = set(is_fragment.keys())
-    self._partial_symbols_dict = self._create_partial_symbols_dict()
-
-  @classmethod
-  def from_visitor(cls, visitor, **kwargs):
-    """A factory function for building a ReferenceResolver from a visitor.
-
-    Args:
-      visitor: an instance of `DocGeneratorVisitor`
-      **kwargs: all remaining args are passed to the constructor
-
-    Returns:
-      an instance of `ReferenceResolver` ()
-    """
-    is_fragment = {}
-    for full_name, obj in visitor.index.items():
-      obj_type = get_obj_type(obj)
-      if obj_type in (ObjType.CLASS, ObjType.MODULE):
-        is_fragment[full_name] = False
-      elif obj_type in (ObjType.CALLABLE, ObjType.TYPE_ALIAS):
-        if is_class_attr(full_name, visitor.index):
-          is_fragment[full_name] = True
-        else:
-          is_fragment[full_name] = False
-      else:
-        is_fragment[full_name] = True
-
-    return cls(
-        duplicate_of=visitor.duplicate_of, is_fragment=is_fragment, **kwargs)
-
-  def is_fragment(self, full_name: str):
-    """Returns True if the object's doc is a subsection of another page."""
-    return self._is_fragment[full_name]
-
-  @classmethod
-  def from_json_file(cls, filepath):
-    """Initialize the reference resolver via _api_cache.json."""
-    with open(filepath) as f:
-      json_dict = json.load(f)
-
-    return cls(**json_dict)
-
-  def _partial_symbols(self, symbol):
-    """Finds the partial symbols given the true symbol.
-
-    For example, symbol: `tf.keras.layers.Conv2D`, then the partial dictionary
-    returned will be:
-
-    partials = ["tf.keras.layers.Conv2D","keras.layers.Conv2D","layers.Conv2D"]
-
-    There should at least be one '.' in the partial symbol generated so as to
-    avoid guessing for the true symbol.
-
-    Args:
-      symbol: String, representing the true symbol.
-
-    Returns:
-      A list of partial symbol names
-    """
-
-    split_symbol = symbol.split('.')
-    partials = [
-        '.'.join(split_symbol[i:]) for i in range(1,
-                                                  len(split_symbol) - 1)
-    ]
-    return partials
-
-  def _create_partial_symbols_dict(self):
-    """Creates a partial symbols dictionary for all the symbols in TensorFlow.
-
-    Returns:
-      A dictionary mapping {partial_symbol: real_symbol}
-    """
-    partial_symbols_dict = collections.defaultdict(list)
-
-    for name in sorted(self._all_names):
-      if 'tf.compat.v' in name or 'tf.contrib' in name:
-        continue
-      # TODO(yashkatariya): Remove `tf.experimental.numpy` after `tf.numpy` is
-      # in not in experimental namespace.
-      if 'tf.experimental.numpy' in name or 'tf.numpy' in name:
-        continue
-      partials = self._partial_symbols(name)
-      for partial in partials:
-        partial_symbols_dict[partial].append(name)
-
-    new_partial_dict = {}
-    for partial, full_names in partial_symbols_dict.items():
-      if not full_names:
-        continue
-
-      full_names = [
-          self._duplicate_of.get(full_name, full_name)
-          for full_name in full_names
-      ]
-
-      new_partial_dict[partial] = full_names[0]
-
-    return new_partial_dict
-
-  def to_json_file(self, filepath):
-    """Converts the RefenceResolver to json and writes it to the specified file.
-
-    Args:
-      filepath: The file path to write the json to.
-    """
-
-    try:
-      os.makedirs(os.path.dirname(filepath))
-    except OSError:
-      pass
-
-    json_dict = {}
-    for key, value in self.__dict__.items():
-      # Drop these fields, they are generated by the constructor.
-      if key == '_all_names' or key == '_partial_symbols_dict':
-        continue
-
-      # Strip off any leading underscores on field names as these are not
-      # recognized by the constructor.
-      json_dict[key.lstrip('_')] = value
-
-    with open(filepath, 'w') as f:
-      json.dump(json_dict, f, indent=2, sort_keys=True)
-      f.write('\n')
-
-  def replace_references(self, string, relative_path_to_root, full_name=None):
-    """Replace `tf.symbol` references with links to symbol's documentation page.
-
-    This function finds all occurrences of "`tf.symbol`" in `string`
-    and replaces them with markdown links to the documentation page
-    for "symbol".
-
-    `relative_path_to_root` is the relative path from the document
-    that contains the "`tf.symbol`" reference to the root of the API
-    documentation that is linked to. If the containing page is part of
-    the same API docset, `relative_path_to_root` can be set to
-    `os.path.dirname(documentation_path(name))`, where `name` is the
-    python name of the object whose documentation page the reference
-    lives on.
-
-    Args:
-      string: A string in which "`tf.symbol`" references should be replaced.
-      relative_path_to_root: The relative path from the containing document to
-        the root of the API documentation that is being linked to.
-      full_name: (optional) The full name of current object, so replacements can
-        depend on context.
-
-    Returns:
-      `string`, with "`tf.symbol`" references replaced by Markdown links.
-    """
-
-    def one_ref(match):
-      return self._one_ref(match, relative_path_to_root, full_name)
-
-    fixed_lines = []
-
-    filters = [
-        IgnoreLineInBlock('<pre class="tfo-notebook-code-cell-output">',
-                          '</pre>'),
-        IgnoreLineInBlock('```', '```'),
-        IgnoreLineInBlock(
-            '<pre class="devsite-click-to-copy prettyprint lang-py">', '</pre>')
-    ]
-
-    for line in string.splitlines():
-      if not any(filter_block(line) for filter_block in filters):
-        line = re.sub(AUTO_REFERENCE_RE, one_ref, line)
-      fixed_lines.append(line)
-
-    return '\n'.join(fixed_lines)
-
-  def python_link(self,
-                  link_text,
-                  ref_full_name,
-                  relative_path_to_root,
-                  code_ref=True):
-    """Resolve a "`tf.symbol`" reference to a Markdown link.
-
-    This will pick the canonical location for duplicate symbols.  The
-    input to this function should already be stripped of the '@' and
-    '{}'.  This function returns a Markdown link. If `code_ref` is
-    true, it is assumed that this is a code reference, so the link
-    text will be rendered as code (using backticks).
-    `link_text` should refer to a library symbol, starting with 'tf.'.
-
-    Args:
-      link_text: The text of the Markdown link.
-      ref_full_name: The fully qualified name of the symbol to link to.
-      relative_path_to_root: The relative path from the location of the current
-        document to the root of the API documentation.
-      code_ref: If true (the default), put `link_text` in `...`.
-
-    Returns:
-      A markdown link to the documentation page of `ref_full_name`.
-    """
-    url = self.reference_to_url(ref_full_name, relative_path_to_root)
-    if self._site_link is not None:
-      if os.path.isabs(url):
-        url = os.path.join(self._site_link, url[1:])
-      else:
-        url = os.path.join(self._site_link, url)
-      url = url.replace('.md', '')
-
-    if code_ref:
-      link_text = link_text.join(['<code>', '</code>'])
-    else:
-      link_text = self._link_text_to_html(link_text)
-
-    return f'<a href="{url}">{link_text}</a>'
-
-  @staticmethod
-  def _link_text_to_html(link_text):
-    code_re = '`(.*?)`'
-    return re.sub(code_re, r'<code>\1</code>', link_text)
-
-  def py_main_name(self, full_name):
-    """Return the main name for a Python symbol name."""
-    return self._duplicate_of.get(full_name, full_name)
-
-  def reference_to_url(self, ref_full_name, relative_path_to_root):
-    """Resolve a "`tf.symbol`" reference to a relative path.
-
-    The input to this function should already be stripped of the '@'
-    and '{}', and its output is only the link, not the full Markdown.
-
-    If `ref_full_name` is the name of a class member, method, or property, the
-    link will point to the page of the containing class, and it will include the
-    method name as an anchor. For example, `tf.module.MyClass.my_method` will be
-    translated into a link to
-    `os.join.path(relative_path_to_root, 'tf/module/MyClass.md#my_method')`.
-
-    Args:
-      ref_full_name: The fully qualified name of the symbol to link to.
-      relative_path_to_root: The relative path from the location of the current
-        document to the root of the API documentation.
-
-    Returns:
-      A relative path that links from the documentation page of `from_full_name`
-      to the documentation page of `ref_full_name`.
-
-    Raises:
-      TFDocsError: If the symbol is not found.
-    """
-    if self._is_fragment.get(ref_full_name, False):
-      # methods and constants get duplicated. And that's okay.
-      # Use the main name of their parent.
-      parent_name, short_name = ref_full_name.rsplit('.', 1)
-      parent_main_name = self._duplicate_of.get(parent_name, parent_name)
-      main_name = '.'.join([parent_main_name, short_name])
-    else:
-      main_name = self._duplicate_of.get(ref_full_name, ref_full_name)
-
-    # Check whether this link exists
-    if main_name not in self._all_names:
-      raise TFDocsError(f'Cannot make link to {main_name!r}: Not in index.')
-
-    ref_path = documentation_path(main_name, self._is_fragment[main_name])
-    return os.path.join(relative_path_to_root, ref_path)
-
-  def _one_ref(self, match, relative_path_to_root, full_name=None):
-    """Return a link for a single "`tf.symbol`" reference."""
-
-    if match.group(1):
-      # Found a '[]' group, return it unmodified.
-      return match.group('brackets')
-
-    # Found a '``' group.
-    string = match.group('backticks')
-
-    link_text = string
-
-    string = re.sub(r'(.*)[\(\[].*', r'\1', string)
-
-    if string.startswith('compat.v1') or string.startswith('compat.v2'):
-      string = 'tf.' + string
-    elif string.startswith('v1') or string.startswith('v2'):
-      string = 'tf.compat.' + string
-
-    elif full_name is None or ('tf.compat.v' not in full_name and
-                               'tf.contrib' not in full_name):
-      string = self._partial_symbols_dict.get(string, string)
-
-    try:
-      if string.startswith('tensorflow::'):
-        # C++ symbol
-        return self._cc_link(string, link_text, relative_path_to_root)
-
-      is_python = False
-      for py_module_name in self._py_module_names:
-        if string == py_module_name or string.startswith(py_module_name + '.'):
-          is_python = True
-          break
-
-      if is_python:  # Python symbol
-        return self.python_link(
-            link_text, string, relative_path_to_root, code_ref=True)
-    except TFDocsError:
-      pass
-
-    return match.group(0)
-
-  def _cc_link(self, string, link_text, relative_path_to_root):
-    """Generate a link for a `tensorflow::...` reference."""
-    # TODO(joshl): Fix this hard-coding of paths.
-    if string == 'tensorflow::ClientSession':
-      ret = 'class/tensorflow/client-session.md'
-    elif string == 'tensorflow::Scope':
-      ret = 'class/tensorflow/scope.md'
-    elif string == 'tensorflow::Status':
-      ret = 'class/tensorflow/status.md'
-    elif string == 'tensorflow::Tensor':
-      ret = 'class/tensorflow/tensor.md'
-    elif string == 'tensorflow::ops::Const':
-      ret = 'namespace/tensorflow/ops.md#const'
-    else:
-      raise TFDocsError(f'C++ reference not understood: "{string}"')
-
-    # relative_path_to_root gets you to api_docs/python, we go from there
-    # to api_docs/cc, and then add ret.
-    cc_relative_path = os.path.normpath(
-        os.path.join(relative_path_to_root, '../cc', ret))
-
-    return f'<a href="{cc_relative_path}"><code>{link_text}</code></a>'
-
-
 def _handle_compatibility(doc) -> Tuple[str, Dict[str, str]]:
   """Parse and remove compatibility blocks from the main docstring.
 
@@ -783,11 +316,10 @@ def _dedent_after_first_line(self, text):
     result = '\n'.join([first, remainder])
     return result
 
-  def table_view(self, title_template: Optional[str] = None) -> str:
-    """Returns a tabular markdown version of the TitleBlock.
-
-    Tabular view is only for `Args`, `Returns`, `Raises` and `Attributes`. If
-    anything else is encountered, redirect to list view.
+  def table_view(self,
+                 title_template: Optional[str] = None,
+                 anchors: bool = True) -> str:
+    """Returns the TitleBlock as an HTML table.
 
     Args:
       title_template: Template for title detailing how to display it.
@@ -813,8 +345,14 @@ def table_view(self, title_template: Optional[str] = None) -> str:
       else:
         description = description.strip('\n')
         description = self._dedent_after_first_line(description)
+
+      if anchors:
+        anchor = f'<a id="{name}"></a>'
+      else:
+        anchor = ''
+
       item_table = ITEMS_TEMPLATE.format(
-          name=f'`{name}`', anchor='', description=description)
+          name=f'`{name}`', anchor=anchor, description=description)
       items.append(item_table)
 
     return '\n' + TABLE_TEMPLATE.format(
@@ -850,11 +388,16 @@ def __str__(self) -> str:
 
   ITEM_RE = re.compile(
       r"""
-      ^(\*?\*?'?"?     # Capture optional *s to allow *args, **kwargs and quotes
-          \w[\w.'"]*?  # Capture a word character followed by word characters
-                       # or "."s or ending quotes.
+      ^(\*?\*?'?"?     # Optional * to allow *args, **kwargs and quotes
+          \w[\w.'"]*?  # words, dots and closing quotes
+          (?:          # non capturing
+              [ ]?     # maybe a space
+              \(       # a `(`
+              [\ \(\[\w.,\)\]]*? # word chars, dots or more open/close or space
+          )?           # all optional
       )\s*:\s          # Allow any whitespace around the colon.""",
-      re.MULTILINE | re.VERBOSE)
+      re.MULTILINE | re.VERBOSE,
+  )
 
   @classmethod
   def split_string(cls, docstring: str):
@@ -917,13 +460,28 @@ def split_string(cls, docstring: str):
       text = split.pop(0)
       items = _pairs(split)
 
+      items = list(cls._split_items(items))
+
       title_block = cls(title=title, text=text, items=items)
       parts.append(title_block)
 
     return parts
 
+  @classmethod
+  def _split_items(
+      cls, items: Iterable[Tuple[str, str]]
+  ) -> Iterable[Tuple[str, str]]:
+    """If there's a type in the name, move it to the top of the description."""
+    for name, value in items:
+      if '(' in name:
+        name, type_str = re.split(r' ?[\(]', name, maxsplit=1)
+        type_str = f"`{type_str.rstrip(')')}`"
+        value = f'{type_str}\n\n{value}'
+
+      yield name, value
+
 
-class _DocstringInfo(typing.NamedTuple):
+class DocstringInfo(typing.NamedTuple):
   brief: str
   docstring_parts: List[Union[TitleBlock, str]]
   compatibility: Dict[str, str]
@@ -931,7 +489,7 @@ class _DocstringInfo(typing.NamedTuple):
 
 def _get_other_member_doc(
     obj: Any,
-    parser_config: ParserConfig,
+    parser_config: config.ParserConfig,
     extra_docs: Optional[Dict[int, str]],
 ) -> str:
   """Returns the docs for other members of a module."""
@@ -949,7 +507,18 @@ def _get_other_member_doc(
   if description is None and extra_docs is not None:
     description = extra_docs.get(id(obj), None)
 
+  value_repr = _tfrepr(obj, parser_config)
+
+  parts = [value_repr, description]
+  parts = [item for item in parts if item is not None]
+
+  return '\n\n'.join(parts)
+
+
+def _tfrepr(obj, parser_config):
+  """Convert an object to a string for display."""
   info = None
+
   if isinstance(obj, dict):
     # pprint.pformat (next block) doesn't sort dicts until python 3.8
     items = [
@@ -972,6 +541,8 @@ def _get_other_member_doc(
     #   breaks on the site.
     info = pprint.pformat(obj).replace('`', r'\`')
     info = f'`{info}`'
+  elif obj_type_lib.ObjType.get(obj) is obj_type_lib.ObjType.PROPERTY:
+    info = None
   else:
     class_full_name = parser_config.reverse_index.get(id(type(obj)), None)
     if class_full_name is None:
@@ -983,1606 +554,221 @@ def _get_other_member_doc(
         class_full_name = f'{module}.{class_name}'
     info = f'Instance of `{class_full_name}`'
 
-  if description is None:
-    result = info
-  else:
-    result = f'{info}\n\n{description}'
+  if info is not None:
+    info = signature_lib.strip_obj_addresses(info)
 
-  return result
+  return info
 
 
-def _parse_md_docstring(
+def parse_md_docstring(
     py_object: Any,
-    relative_path_to_root: str,
     full_name: str,
-    parser_config: ParserConfig,
+    parser_config: config.ParserConfig,
     extra_docs: Optional[Dict[int, str]] = None,
-) -> _DocstringInfo:
-  """Parse the object's docstring and return a `_DocstringInfo`.
+) -> DocstringInfo:
+  """Parse the object's docstring and return a `DocstringInfo`.
 
   This function clears @@'s from the docstring, and replaces `` references
-  with markdown links.
-
-  For links within the same set of docs, the `relative_path_to_root` for a
-  docstring on the page for `full_name` can be set to:
-
-  ```python
-  relative_path_to_root = os.path.relpath(
-    path='.', start=os.path.dirname(documentation_path(full_name)) or '.')
-  ```
+  with links.
 
   Args:
     py_object: A python object to retrieve the docs for (class, function/method,
       or module).
-    relative_path_to_root: The relative path from the location of the current
-      document to the root of the Python API documentation. This is used to
-      compute links for "`tf.symbol`" references.
     full_name: (optional) The api path to the current object, so replacements
       can depend on context.
-    parser_config: An instance of `ParserConfig`.
+    parser_config: An instance of `config.ParserConfig`.
     extra_docs: Extra docs for symbols like public constants(list, tuple, etc)
       that need to be added to the markdown pages created.
 
   Returns:
-    A _DocstringInfo object, all fields will be empty if no docstring was found.
+    A DocstringInfo object, all fields will be empty if no docstring was found.
   """
 
-  if get_obj_type(py_object) is ObjType.OTHER:
+  if obj_type_lib.ObjType.get(py_object) is obj_type_lib.ObjType.OTHER:
     raw_docstring = _get_other_member_doc(
         obj=py_object, parser_config=parser_config, extra_docs=extra_docs)
   else:
     raw_docstring = _get_raw_docstring(py_object)
 
-  raw_docstring = parser_config.reference_resolver.replace_references(
-      raw_docstring,
-      relative_path_to_root,
-      full_name,
-  )
-
   atat_re = re.compile(r' *@@[a-zA-Z_.0-9]+ *$')
   raw_docstring = '\n'.join(
       line for line in raw_docstring.split('\n') if not atat_re.match(line))
 
   docstring, compatibility = _handle_compatibility(raw_docstring)
+  compatibility = {
+      key: parser_config.reference_resolver.replace_references(value, full_name)
+      for key, value in compatibility.items()
+  }
 
   if 'Generated by: tensorflow/tools/api/generator' in docstring:
     docstring = ''
 
-  # Remove the first-line "brief" docstring.
   lines = docstring.split('\n')
-  brief = lines.pop(0)
-
-  docstring = '\n'.join(lines)
-
-  docstring_parts = TitleBlock.split_string(docstring)
-
-  return _DocstringInfo(brief, docstring_parts, compatibility)
-
-
-class TypeAnnotationExtractor(ast.NodeVisitor):
-  """Extracts the type annotations by parsing the AST of a function."""
-
-  def __init__(self):
-    self.annotation_dict = {}
-    self.arguments_typehint_exists = False
-    self.return_typehint_exists = False
-
-  def visit_FunctionDef(self, node) -> None:  # pylint: disable=invalid-name
-    """Visits the `FunctionDef` node in AST tree and extracts the typehints."""
-
-    # Capture the return type annotation.
-    if node.returns:
-      self.annotation_dict['return'] = astor.to_source(
-          node.returns).strip().replace('"""', '"')
-      self.return_typehint_exists = True
-
-    # Capture the args type annotation.
-    for arg in node.args.args:
-      if arg.annotation:
-        self.annotation_dict[arg.arg] = astor.to_source(
-            arg.annotation).strip().replace('"""', '"')
-        self.arguments_typehint_exists = True
-
-    # Capture the kwarg only args type annotation.
-    for kwarg in node.args.kwonlyargs:
-      if kwarg.annotation:
-        self.annotation_dict[kwarg.arg] = astor.to_source(
-            kwarg.annotation).strip().replace('"""', '"')
-        self.arguments_typehint_exists = True
-
-
-class DataclassTypeAnnotationExtractor(ast.NodeVisitor):
-  """Extracts the type annotations by parsing the AST of a dataclass."""
-
-  def __init__(self):
-    self.annotation_dict = {}
-    self.arguments_typehint_exists = False
-    self.return_typehint_exists = False
-
-  def visit_ClassDef(self, node) -> None:  # pylint: disable=invalid-name
-    # Don't visit all nodes. Only visit top-level AnnAssign nodes so that
-    # If there's an AnnAssign in a method it doesn't get picked up.
-    for sub in node.body:
-      if isinstance(sub, ast.AnnAssign):
-        self.visit_AnnAssign(sub)
-
-  def visit_AnnAssign(self, node) -> None:  # pylint: disable=invalid-name
-    """Vists an assignment with a type annotation. Dataclasses is an example."""
-    arg = astor.to_source(node.target).strip()
-    anno = astor.to_source(node.annotation).strip()
-    self.annotation_dict[arg] = anno
-    self.arguments_typehint_exists = True
+  first_line = lines[0].strip()
 
+  good_first_line = (
+      first_line.endswith(('.', '!', '?', ')')) or first_line.isupper()
+  )
 
-class ASTDefaultValueExtractor(ast.NodeVisitor):
-  """Extracts the default values by parsing the AST of a function."""
+  def escape(match):
+    return (
+        match.group(0)
+        .replace('.', '.<skip>')
+        .replace('!', '!<skip>')
+        .replace('?', '?<skip>')
+    )
 
-  _PAREN_NUMBER_RE = re.compile(r'^\(([0-9.e-]+)\)')
+  def unescape(s):
+    return (
+        s.replace('.<skip>', '.')
+        .replace('!<skip>', '!')
+        .replace('?<skip>', '?')
+    )
 
-  def __init__(self):
-    self.ast_args_defaults = []
-    self.ast_kw_only_defaults = []
+  escaped = re.sub('`(.|\n)*?`', escape, docstring)
+  match = re.match(
+      r"""
+      (?P<first_sentence>
+          .*?    # Take as little as possible
+          (
+              [.!?](?!<skip>)($|(?=\s))\n?|   # stop at the end of a sentence
+              (?=\n\n)              # or before a blank line
+          )
+      )
+      (?P<remainder>.*)         # collect the rest of the docstring
+      """,
+      escaped,
+      re.VERBOSE | re.DOTALL,
+  )
+  if not good_first_line and match:
+    groupdict = match.groupdict()
+    brief = unescape(re.sub('\s+', ' ', groupdict['first_sentence']))
+    docstring = unescape(groupdict['remainder'])
+  else:
+    # Use the first line
+    brief = lines.pop(0)
+    docstring = '\n'.join(lines)
+
+  brief = brief.strip()
+  brief = parser_config.reference_resolver.replace_references(brief, full_name)
+  docstring = parser_config.reference_resolver.replace_references(
+      docstring, full_name
+  )
 
-  def _preprocess(self, val: str) -> str:
-    text_default_val = astor.to_source(val).strip().replace(
-        '\t', '\\t').replace('\n', '\\n').replace('"""', "'")
-    text_default_val = self._PAREN_NUMBER_RE.sub('\\1', text_default_val)
-    return text_default_val
+  docstring_parts = TitleBlock.split_string(docstring)
 
-  def visit_FunctionDef(self, node) -> None:  # pylint: disable=invalid-name
-    """Visits the `FunctionDef` node and extracts the default values."""
+  return DocstringInfo(brief, docstring_parts, compatibility)
 
-    for default_val in node.args.defaults:
-      if default_val is not None:
-        text_default_val = self._preprocess(default_val)
-        self.ast_args_defaults.append(text_default_val)
 
-    for default_val in node.args.kw_defaults:
-      if default_val is not None:
-        text_default_val = self._preprocess(default_val)
-        self.ast_kw_only_defaults.append(text_default_val)
+def get_defining_class(py_class, name):
+  for cls in inspect.getmro(py_class):
+    if name in cls.__dict__:
+      return cls
+  return None
 
 
-class FormatArguments(object):
-  """Formats the arguments and adds type annotations if they exist."""
+def _unwrap_obj(obj):
+  while True:
+    unwrapped_obj = getattr(obj, '__wrapped__', None)
+    if unwrapped_obj is None:
+      break
+    obj = unwrapped_obj
+  return obj
 
-  _INTERNAL_NAMES = {
-      'ops.GraphKeys': 'tf.GraphKeys',
-      '_ops.GraphKeys': 'tf.GraphKeys',
-      'init_ops.zeros_initializer': 'tf.zeros_initializer',
-      'init_ops.ones_initializer': 'tf.ones_initializer',
-      'saver_pb2.SaverDef': 'tf.train.SaverDef',
-  }
 
-  _OBJECT_MEMORY_ADDRESS_RE = re.compile(r'<(?P<type>.+) object at 0x[\da-f]+>')
+@dataclasses.dataclass
+class FileLocation(object):
+  """This class indicates that the object is defined in a regular file.
 
-  # A regular expression capturing a python identifier.
-  _IDENTIFIER_RE = r'[a-zA-Z_]\w*'
+  This can be used for the `defined_in` slot of the `PageInfo` objects.
+  """
 
-  _INDIVIDUAL_TYPES_RE = re.compile(
-      r"""
-        (?P<single_type>
-          ([\w.]*)
-          (?=$|,| |\]|\[)
-        )
-      """, re.IGNORECASE | re.VERBOSE)
-
-  _TYPING = frozenset(
-      list(typing.__dict__.keys()) +
-      ['int', 'str', 'bytes', 'float', 'complex', 'bool', 'None'])
-
-  _IMMUTABLE_TYPES = frozenset([
-      int, str, bytes, float, complex, bool, Ellipsis,
-      type(None), tuple, frozenset
-  ])
-
-  def __init__(
-      self,
-      type_annotations: Dict[str, str],
-      parser_config: ParserConfig,
-      func_full_name: str,
-  ) -> None:
-    self._type_annotations = type_annotations
-    self._reverse_index = parser_config.reverse_index
-    self._reference_resolver = parser_config.reference_resolver
-    # func_full_name is used to calculate the relative path.
-    self._func_full_name = func_full_name
-
-    self._is_fragment = self._reference_resolver._is_fragment.get(
-        self._func_full_name, None)
-
-  def get_link(self, obj_full_name: str) -> str:
-    relative_path_to_root = os.path.relpath(
-        path='.',
-        start=os.path.dirname(
-            documentation_path(self._func_full_name, self._is_fragment)) or '.')
-
-    return self._reference_resolver.python_link(
-        link_text=obj_full_name,
-        ref_full_name=obj_full_name,
-        relative_path_to_root=relative_path_to_root,
-        code_ref=True)
-
-  def _extract_non_builtin_types(self, arg_obj: Any,
-                                 non_builtin_types: List[Any]) -> List[Any]:
-    """Extracts the non-builtin types from a type annotations object.
-
-    Recurses if an object contains `__args__` attribute. If an object is
-    an inbuilt object or an `Ellipsis` then its skipped.
+  base_url: Optional[str] = None
+  start_line: Optional[int] = None
+  end_line: Optional[int] = None
 
-    Args:
-      arg_obj: Type annotation object.
-      non_builtin_types: List to keep track of the non-builtin types extracted.
+  @property
+  def url(self) -> Optional[str]:
+    if self.start_line and self.end_line:
+      if self.base_url and self.base_url.startswith('https://github.com/'):
+        return f'{self.base_url}#L{self.start_line}-L{self.end_line}'
+    return self.base_url
 
-    Returns:
-      List of non-builtin types.
-    """
+def get_defined_in(
+    py_object: Any,
+    parser_config: config.ParserConfig) -> Optional[FileLocation]:
+  """Returns a description of where the passed in python object was defined.
 
-    annotations = getattr(arg_obj, '__args__', [arg_obj])
-    if annotations is None:
-      annotations = [arg_obj]
+  Args:
+    py_object: The Python object.
+    parser_config: A config.ParserConfig object.
 
-    for anno in annotations:
-      if self._reverse_index.get(id(anno), None):
-        non_builtin_types.append(anno)
-      elif (anno in self._IMMUTABLE_TYPES or
-            id(type(anno)) in public_api._TYPING_IDS):  # pylint: disable=protected-access
-        continue
-      elif hasattr(anno, '__args__'):
-        self._extract_non_builtin_types(anno, non_builtin_types)
-      else:
-        non_builtin_types.append(anno)
-    return non_builtin_types
+  Returns:
+    A `FileLocation`
+  """
+  # Every page gets a note about where this object is defined
+  base_dirs_and_prefixes = zip(parser_config.base_dir,
+                               parser_config.code_url_prefix)
+  try:
+    obj_path = pathlib.Path(inspect.getfile(_unwrap_obj(py_object)))
+  except TypeError:  # getfile throws TypeError if py_object is a builtin.
+    return None
 
-  def _get_non_builtin_ast_types(self, ast_typehint: str) -> List[str]:
-    """Extracts non-builtin types from a string AST type annotation.
+  if obj_path.suffix not in ('.py', '.pyc'):
+    return None
 
-    If the type is an inbuilt type or an `...`(Ellipsis) then its skipped.
+  code_url_prefix = None
+  for base_dir, temp_prefix in base_dirs_and_prefixes:
+    try:
+      rel_path = obj_path.relative_to(base_dir)
+    except ValueError:
+      continue
 
-    Args:
-      ast_typehint: AST extracted type annotation.
+    code_url_prefix = temp_prefix
+    # rel_path is currently a platform-specific path, so we need to convert
+    # it to a posix path (for lack of a URL path).
+    posix_rel_path_str = str(pathlib.PurePosixPath(rel_path))
+    break
 
-    Returns:
-      List of non-builtin ast types.
-    """
+  # No link if the file was not found in a `base_dir`, or the prefix is None.
+  if code_url_prefix is None:
+    return None
 
-    non_builtin_ast_types = []
-    for single_type, _ in self._INDIVIDUAL_TYPES_RE.findall(ast_typehint):
-      if (not single_type or single_type in self._TYPING or
-          single_type == '...'):
-        continue
-      non_builtin_ast_types.append(single_type)
-    return non_builtin_ast_types
-
-  def _linkify(self, non_builtin_map: Dict[str, Any], match) -> str:
-    """Links off to types that can be linked.
-
-    Args:
-      non_builtin_map: Dictionary mapping non-builtin_ast_types to
-        non_builtin_type_objs
-      match: Match object returned by `re.sub`.
-
-    Returns:
-      Linked type annotation if the type annotation object exists.
-    """
-
-    group = match.groupdict()
-    ast_single_typehint = group['single_type']
-
-    # If the AST type hint is a built-in type hint or an `Ellipsis`,
-    # return it as is.
-    if ast_single_typehint not in non_builtin_map:
-      return ast_single_typehint
-
-    if not non_builtin_map:
-      return ast_single_typehint
-
-    # Get the type object from the ast_single_typehint and lookup the object
-    # in reverse_index to get its full name.
-    obj_full_name = self._reverse_index.get(
-        id(non_builtin_map[ast_single_typehint]), None)
-    if obj_full_name is None:
-      return ast_single_typehint
-
-    return self.get_link(obj_full_name)
-
-  def preprocess(self, ast_typehint: str, obj_anno: Any) -> str:
-    """Links type annotations to its page if it exists.
-
-    Args:
-      ast_typehint: AST extracted type annotation.
-      obj_anno: Type annotation object.
-
-    Returns:
-      Linked type annotation if the type annotation object exists.
-    """
-    # If the object annotations exists in the reverse_index, get the link
-    # directly for the entire annotation.
-    obj_anno_full_name = self._reverse_index.get(id(obj_anno), None)
-    if obj_anno_full_name is not None:
-      return self.get_link(obj_anno_full_name)
-
-    non_builtin_ast_types = self._get_non_builtin_ast_types(ast_typehint)
-    try:
-      non_builtin_type_objs = self._extract_non_builtin_types(obj_anno, [])
-    except RecursionError:
-      non_builtin_type_objs = {}
-
-    # If the length doesn't match then don't linkify any type annotation. This
-    # is done to avoid linking to wrong pages instead of guessing.
-    if len(non_builtin_type_objs) != len(non_builtin_ast_types):
-      non_builtin_map = {}
-    else:
-      non_builtin_map = dict(zip(non_builtin_ast_types, non_builtin_type_objs))
-
-    partial_func = functools.partial(self._linkify, non_builtin_map)
-    return self._INDIVIDUAL_TYPES_RE.sub(partial_func, ast_typehint)
-
-  def _replace_internal_names(self, default_text: str) -> str:
-    full_name_re = f'^{self._IDENTIFIER_RE}(.{self._IDENTIFIER_RE})+'
-    match = re.match(full_name_re, default_text)
-    if match:
-      for internal_name, public_name in self._INTERNAL_NAMES.items():
-        if match.group(0).startswith(internal_name):
-          return public_name + default_text[len(internal_name):]
-    return default_text
-
-  def format_return(self, return_anno: Any) -> str:
-    return self.preprocess(self._type_annotations['return'], return_anno)
-
-  def format_args(self, args: List[inspect.Parameter]) -> List[str]:
-    """Creates a text representation of the args in a method/function.
-
-    Args:
-      args: List of args to format.
-
-    Returns:
-      Formatted args with type annotations if they exist.
-    """
-
-    args_text_repr = []
-
-    for arg in args:
-      arg_name = arg.name
-      if arg_name in self._type_annotations:
-        typeanno = self.preprocess(self._type_annotations[arg_name],
-                                   arg.annotation)
-        args_text_repr.append(f'{arg_name}: {typeanno}')
-      else:
-        args_text_repr.append(f'{arg_name}')
-
-    return args_text_repr
-
-  def format_kwargs(self, kwargs: List[inspect.Parameter],
-                    ast_defaults: List[str]) -> List[str]:
-    """Creates a text representation of the kwargs in a method/function.
-
-    Args:
-      kwargs: List of kwargs to format.
-      ast_defaults: Default values extracted from the function's AST tree.
-
-    Returns:
-      Formatted kwargs with type annotations if they exist.
-    """
-
-    kwargs_text_repr = []
-
-    if len(ast_defaults) < len(kwargs):
-      ast_defaults.extend([None] * (len(kwargs) - len(ast_defaults)))  # pytype: disable=container-type-mismatch
-
-    for kwarg, ast_default in zip(kwargs, ast_defaults):
-      kname = kwarg.name
-      default_val = kwarg.default
-
-      if id(default_val) in self._reverse_index:
-        default_text = self._reverse_index[id(default_val)]
-      elif ast_default is not None:
-        default_text = ast_default
-        if default_text != repr(default_val):
-          default_text = self._replace_internal_names(default_text)
-      # Kwarg without default value.
-      elif default_val is kwarg.empty:
-        kwargs_text_repr.extend(self.format_args([kwarg]))
-        continue
-      else:
-        # Strip object memory addresses to avoid unnecessary doc churn.
-        default_text = self._OBJECT_MEMORY_ADDRESS_RE.sub(
-            r'<\g<type>>', repr(default_val))
-      default_text = html.escape(str(default_text))
-
-      # Format the kwargs to add the type annotation and default values.
-      if kname in self._type_annotations:
-        typeanno = self.preprocess(self._type_annotations[kname],
-                                   kwarg.annotation)
-        kwargs_text_repr.append(f'{kname}: {typeanno} = {default_text}')
-      else:
-        kwargs_text_repr.append(f'{kname}={default_text}')
-
-    return kwargs_text_repr
-
-
-class _SignatureComponents(NamedTuple):
-  """Contains the components that make up the signature of a function/method."""
-
-  arguments: List[str]
-  arguments_typehint_exists: bool
-  return_typehint_exists: bool
-  return_type: Optional[str] = None
-
-  def __str__(self):
-    arguments_signature = ''
-    if self.arguments:
-      str_signature = ',\n'.join(self.arguments)
-      # If there is no type annotation on arguments, then wrap the entire
-      # signature to width 80.
-      if not self.arguments_typehint_exists:
-        str_signature = textwrap.fill(str_signature, width=80)
-      arguments_signature = '\n' + textwrap.indent(
-          str_signature, prefix='    ') + '\n'
-
-    full_signature = f'({arguments_signature})'
-    if self.return_typehint_exists:
-      full_signature += f' -> {self.return_type}'
-
-    return full_signature
-
-
-class FuncType(enum.Enum):
-  """Enum to recognize type of function passed to `generate_signature`."""
-  FUNCTION = 'function'
-  METHOD = 'method'
-  CLASSMETHOD = 'classmethod'
-
-
-def generate_signature(
-    func: Any,
-    parser_config: ParserConfig,
-    func_full_name: str,
-    func_type: FuncType,
-) -> _SignatureComponents:
-  """Given a function, returns a list of strings representing its args.
-
-  This function uses `__name__` for callables if it is available. This can lead
-  to poor results for functools.partial and other callable objects.
-
-  The returned string is Python code, so if it is included in a Markdown
-  document, it should be typeset as code (using backticks), or escaped.
-
-  Args:
-    func: A function, method, or functools.partial to extract the signature for.
-    parser_config: `ParserConfig` for the method/function whose signature is
-      generated.
-    func_full_name: The full name of a function whose signature is generated.
-    func_type: Type of the current `func`. This is required because there isn't
-      a clear distinction between function and method being passed to
-      `generate_signature`. Sometimes methods are detected as function by
-      `inspect`. Since we know the type of `func` when generate_signature is
-      called, use that to pass the type of `func`.
-
-  Returns:
-    A `_SignatureComponents` NamedTuple.
-  """
-
-  all_args_list = []
-
-  try:
-    sig = inspect.signature(func)
-    sig_values = sig.parameters.values()
-    return_anno = sig.return_annotation
-  except (ValueError, TypeError):
-    sig_values = []
-    return_anno = None
-
-  if dataclasses.is_dataclass(func):
-    type_annotation_visitor = DataclassTypeAnnotationExtractor()
-  else:
-    type_annotation_visitor = TypeAnnotationExtractor()
-
-  ast_defaults_visitor = ASTDefaultValueExtractor()
-
-  try:
-    func_source = textwrap.dedent(inspect.getsource(func))
-    func_ast = ast.parse(func_source)
-    # Extract the type annotation from the parsed ast.
-    type_annotation_visitor.visit(func_ast)
-    ast_defaults_visitor.visit(func_ast)
-  except Exception:  # pylint: disable=broad-except
-    # A wide-variety of errors can be thrown here.
-    pass
-
-  type_annotations = type_annotation_visitor.annotation_dict
-  arguments_typehint_exists = type_annotation_visitor.arguments_typehint_exists
-  return_typehint_exists = type_annotation_visitor.return_typehint_exists
-
-  #############################################################################
-  # Process the information about the func.
-  #############################################################################
-
-  pos_only_args = []
-  args = []
-  kwargs = []
-  only_kwargs = []
-  varargs = None
-  varkwargs = None
-
-  for index, param in enumerate(sig_values):
-    kind = param.kind
-    default = param.default
-
-    if (index == 0 and func_type == FuncType.METHOD and
-        kind != param.VAR_POSITIONAL):
-      # - Skip the first arg for regular methods.
-      # - Some wrapper methods forget `self` and just use `(*args, **kwargs)`.
-      #   That's still valid, don't drop `*args`.
-      # - For classmethods the `cls` arg already bound here (it's not in
-      #   `sig_values`).
-      # - For regular functions (or staticmethods) you never need to skip.
-      continue
-    elif kind == param.POSITIONAL_ONLY:
-      pos_only_args.append(param)
-    elif default is param.empty and kind == param.POSITIONAL_OR_KEYWORD:
-      args.append(param)
-    elif default is not param.empty and kind == param.POSITIONAL_OR_KEYWORD:
-      kwargs.append(param)
-    elif kind == param.VAR_POSITIONAL:
-      varargs = (index, param)
-    elif kind == param.KEYWORD_ONLY:
-      only_kwargs.append(param)
-    elif kind == param.VAR_KEYWORD:
-      varkwargs = param
-
-  #############################################################################
-  # Build the text representation of Args and Kwargs.
-  #############################################################################
-
-  formatter = FormatArguments(
-      type_annotations, parser_config, func_full_name=func_full_name)
-
-  if pos_only_args:
-    all_args_list.extend(formatter.format_args(pos_only_args))
-    all_args_list.append('/')
-
-  if args:
-    all_args_list.extend(formatter.format_args(args))
-
-  if kwargs:
-    all_args_list.extend(
-        formatter.format_kwargs(kwargs, ast_defaults_visitor.ast_args_defaults))
-
-  if only_kwargs:
-    if varargs is None:
-      all_args_list.append('*')
-    all_args_list.extend(
-        formatter.format_kwargs(only_kwargs,
-                                ast_defaults_visitor.ast_kw_only_defaults))
-
-  if varargs is not None:
-    all_args_list.insert(varargs[0], '*' + varargs[1].name)
-
-  if varkwargs is not None:
-    all_args_list.append('**' + varkwargs.name)
-
-  if return_anno and return_anno is not sig.empty and type_annotations.get(
-      'return', None):
-    return_type = formatter.format_return(return_anno)
-  else:
-    return_type = 'None'
-
-  return _SignatureComponents(
-      arguments=all_args_list,
-      arguments_typehint_exists=arguments_typehint_exists,
-      return_typehint_exists=return_typehint_exists,
-      return_type=return_type)
-
-
-def _get_defining_class(py_class, name):
-  for cls in inspect.getmro(py_class):
-    if name in cls.__dict__:
-      return cls
-  return None
-
-
-class MemberInfo(NamedTuple):
-  """Describes an attribute of a class or module."""
-  short_name: str
-  full_name: str
-  py_object: Any
-  doc: _DocstringInfo
-  url: str
-
-
-class MethodInfo(NamedTuple):
-  """Described a method."""
-  short_name: str
-  full_name: str
-  py_object: Any
-  doc: _DocstringInfo
-  url: str
-  signature: _SignatureComponents
-  decorators: List[str]
-  defined_in: Optional[_FileLocation]
-
-  @classmethod
-  def from_member_info(cls, method_info: MemberInfo,
-                       signature: _SignatureComponents, decorators: List[str],
-                       defined_in: Optional[_FileLocation]):
-    """Upgrades a `MemberInfo` to a `MethodInfo`."""
-    return cls(
-        **method_info._asdict(),
-        signature=signature,
-        decorators=decorators,
-        defined_in=defined_in)
-
-
-def extract_decorators(func: Any) -> List[str]:
-  """Extracts the decorators on top of functions/methods.
-
-  Args:
-    func: The function to extract the decorators from.
-
-  Returns:
-    A List of decorators.
-  """
-
-  class ASTDecoratorExtractor(ast.NodeVisitor):
-
-    def __init__(self):
-      self.decorator_list = []
-
-    def visit_FunctionDef(self, node):  # pylint: disable=invalid-name
-      for dec in node.decorator_list:
-        self.decorator_list.append(astor.to_source(dec).strip())
-
-  visitor = ASTDecoratorExtractor()
-
-  try:
-    # Note: inspect.getsource doesn't include the decorator lines on classes,
-    # this won't work for classes until that's fixed.
-    func_source = textwrap.dedent(inspect.getsource(func))
-    func_ast = ast.parse(func_source)
-    visitor.visit(func_ast)
-  except Exception:  # pylint: disable=broad-except
-    # A wide-variety of errors can be thrown here.
-    pass
-
-  return visitor.decorator_list
-
-
-class PageInfo:
-  """Base-class for api_pages objects.
-
-  Converted to markdown by pretty_docs.py.
-
-  Attributes:
-    full_name: The full, main name, of the object being documented.
-    short_name: The last part of the full name.
-    py_object: The object being documented.
-    defined_in: A _FileLocation describing where the object was defined.
-    aliases: A list of full-name for all aliases for this object.
-    doc: A list of objects representing the docstring. These can all be
-      converted to markdown using str().
-  """
-
-  def __init__(
-      self,
-      full_name: str,
-      py_object: Any,
-      extra_docs: Optional[Dict[int, str]] = None,
-  ):
-    """Initialize a PageInfo.
-
-    Args:
-      full_name: The full, main name, of the object being documented.
-      py_object: The object being documented.
-      extra_docs: Extra docs for symbols like public constants(list, tuple, etc)
-        that need to be added to the markdown pages created.
-    """
-    self.full_name = full_name
-    self.py_object = py_object
-    self._extra_docs = extra_docs
-
-    self._defined_in = None
-    self._aliases = None
-    self._doc = None
-
-  def __eq__(self, other):
-    if isinstance(other, PageInfo):
-      return self.__dict__ == other.__dict__
-    else:
-      return NotImplemented
-
-  @property
-  def short_name(self):
-    """Returns the documented object's short name."""
-    return self.full_name.split('.')[-1]
-
-  @property
-  def defined_in(self):
-    """Returns the path to the file where the documented object is defined."""
-    return self._defined_in
-
-  def set_defined_in(self, defined_in):
-    """Sets the `defined_in` path."""
-    assert self.defined_in is None
-    self._defined_in = defined_in
-
-  @property
-  def aliases(self):
-    """Returns a list of all full names for the documented object."""
-    return self._aliases
-
-  def set_aliases(self, aliases):
-    """Sets the `aliases` list.
-
-    Args:
-      aliases: A list of strings. Containing all the object's full names.
-    """
-    assert self.aliases is None
-    self._aliases = aliases
-
-  @property
-  def doc(self) -> _DocstringInfo:
-    """Returns a `_DocstringInfo` created from the object's docstring."""
-    return self._doc
-
-  def set_doc(self, doc: _DocstringInfo):
-    """Sets the `doc` field.
-
-    Args:
-      doc: An instance of `_DocstringInfo`.
-    """
-    assert self.doc is None
-    self._doc = doc
-
-
-class FunctionPageInfo(PageInfo):
-  """Collects docs For a function Page.
-
-  Attributes:
-    full_name: The full, main name, of the object being documented.
-    short_name: The last part of the full name.
-    py_object: The object being documented.
-    defined_in: A _FileLocation describing where the object was defined.
-    aliases: A list of full-name for all aliases for this object.
-    doc: A list of objects representing the docstring. These can all be
-      converted to markdown using str().
-    signature: the parsed signature (see: generate_signature)
-    decorators: A list of decorator names.
-  """
-
-  def __init__(self, *, full_name: str, py_object: Any, **kwargs):
-    """Initialize a FunctionPageInfo.
-
-    Args:
-      full_name: The full, main name, of the object being documented.
-      py_object: The object being documented.
-      **kwargs: Extra arguments.
-    """
-    super().__init__(full_name, py_object, **kwargs)
-
-    self._signature = None
-    self._decorators = []
-
-  @property
-  def signature(self):
-    return self._signature
-
-  def collect_docs(self, parser_config):
-    """Collect all information necessary to genertate the function page.
-
-    Mainly this is details about the function signature.
-
-    Args:
-      parser_config: The ParserConfig for the module being documented.
-    """
-
-    assert self.signature is None
-    self._signature = generate_signature(
-        self.py_object,
-        parser_config,
-        self.full_name,
-        func_type=FuncType.FUNCTION,
-    )
-    self._decorators = extract_decorators(self.py_object)
-
-  @property
-  def decorators(self):
-    return list(self._decorators)
-
-  def add_decorator(self, dec):
-    self._decorators.append(dec)
-
-  def get_metadata_html(self):
-    return Metadata(self.full_name).build_html()
-
-
-class TypeAliasPageInfo(PageInfo):
-  """Collects docs For a type alias page.
-
-  Attributes:
-    full_name: The full, main name, of the object being documented.
-    short_name: The last part of the full name.
-    py_object: The object being documented.
-    defined_in: A _FileLocation describing where the object was defined.
-    aliases: A list of full-name for all aliases for this object.
-    doc: A list of objects representing the docstring. These can all be
-      converted to markdown using str().
-    signature: the parsed signature (see: generate_signature)
-    decorators: A list of decorator names.
-  """
-
-  def __init__(self, *, full_name: str, py_object: Any, **kwargs) -> None:
-    """Initialize a `TypeAliasPageInfo`.
-
-    Args:
-      full_name: The full, main name, of the object being documented.
-      py_object: The object being documented.
-      **kwargs: Extra arguments.
-    """
-
-    super().__init__(full_name, py_object, **kwargs)
-    self._signature = None
-
-  @property
-  def signature(self) -> None:
-    return self._signature
-
-  def _custom_join(self, args: List[str], origin: str) -> str:
-    """Custom join for Callable and other type hints.
-
-    Args:
-      args: Args of a type annotation object returned by `__args__`.
-      origin: Origin of a type annotation object returned by `__origin__`.
-
-    Returns:
-      A joined string containing the right representation of a type annotation.
-    """
-    if 'Callable' in origin:
-      if args[0] == '...':
-        return ', '.join(args)
-      else:
-        return f"[{', '.join(args[:-1])}], {args[-1]}"
-
-    return ', '.join(args)
-
-  def _link_type_args(self, obj: Any, reverse_index: Dict[int, str],
-                      linker: FormatArguments) -> str:
-    """Recurses into typehint object and links known objects to their pages."""
-    arg_full_name = reverse_index.get(id(obj), None)
-    if arg_full_name is not None:
-      return linker.get_link(arg_full_name)
-
-    result = []
-    if getattr(obj, '__args__', None):
-      for arg in obj.__args__:
-        result.append(self._link_type_args(arg, reverse_index, linker))
-      origin_str = typing._type_repr(obj.__origin__)  # pylint: disable=protected-access # pytype: disable=module-attr
-      result = self._custom_join(result, origin_str)
-      return f'{origin_str}[{result}]'
-    else:
-      return typing._type_repr(obj)  # pylint: disable=protected-access # pytype: disable=module-attr
-
-  def collect_docs(self, parser_config) -> None:
-    """Collect all information necessary to genertate the function page.
-
-    Mainly this is details about the function signature.
-
-    For the type alias signature, the args are extracted and replaced with the
-    full_name if the object is present in `parser_config.reverse_index`. They
-    are also linkified to point to that symbol's page.
-
-    For example (If generating docs for symbols in TF library):
-
-    ```
-    X = Union[int, str, bool, tf.Tensor, np.ndarray]
-    ```
-
-    In this case `tf.Tensor` will get linked to that symbol's page.
-    Note: In the signature `tf.Tensor` is an object, so it will show up as
-    `tensorflow.python.framework.ops.Tensor`. That's why we need to query
-    `parser_config.reverse_index` to get the full_name of the object which will
-    be `tf.Tensor`. Hence the signature will be:
-
-    ```
-    X = Union[int, str, bool, <a href="URL">tf.Tensor</a>, np.ndarray]
-    ```
-
-    Args:
-      parser_config: The ParserConfig for the module being documented.
-    """
-    assert self.signature is None
-
-    linker = FormatArguments(
-        type_annotations={},
-        parser_config=parser_config,
-        func_full_name=self.full_name)
-
-    sig_args = []
-    if self.py_object.__origin__:
-      for arg_obj in self.py_object.__args__:
-        sig_args.append(
-            self._link_type_args(arg_obj, parser_config.reverse_index, linker))
-
-    sig_args_str = textwrap.indent(',\n'.join(sig_args), '    ')
-    if self.py_object.__origin__:
-      sig = f'{self.py_object.__origin__}[\n{sig_args_str}\n]'
-    else:
-      sig = repr(self.py_object)
-
-    # pytype: enable=module-attr
-
-    # Starting in Python 3.7, the __origin__ attribute of typing constructs
-    # contains the equivalent runtime class rather than the construct itself
-    # (e.g., typing.Callable.__origin__ is collections.abc.Callable).
-    self._signature = sig.replace('typing.', '').replace('collections.abc.', '')
-
-  def get_metadata_html(self) -> str:
-    return Metadata(self.full_name).build_html()
-
-
-class ClassPageInfo(PageInfo):
-  """Collects docs for a class page.
-
-  Attributes:
-    full_name: The full, main name, of the object being documented.
-    short_name: The last part of the full name.
-    py_object: The object being documented.
-    defined_in: A _FileLocation describing where the object was defined.
-    aliases: A list of full-name for all aliases for this object.
-    doc: A list of objects representing the docstring. These can all be
-      converted to markdown using str().
-    attributes: A dict mapping from "name" to a docstring
-    bases: A list of `MemberInfo` objects pointing to the docs for the parent
-      classes.
-    methods: A list of `MethodInfo` objects documenting the class' methods.
-    classes: A list of `MemberInfo` objects pointing to docs for any nested
-      classes.
-    other_members: A list of `MemberInfo` objects documenting any other object's
-      defined inside the class object (mostly enum style fields).
-    attr_block: A `TitleBlock` containing information about the Attributes of
-      the class.
-  """
-
-  def __init__(self, *, full_name, py_object, **kwargs):
-    """Initialize a ClassPageInfo.
-
-    Args:
-      full_name: The full, main name, of the object being documented.
-      py_object: The object being documented.
-      **kwargs: Extra arguments.
-    """
-    super().__init__(full_name, py_object, **kwargs)
-
-    self._namedtuplefields = collections.OrderedDict()
-    if issubclass(py_object, tuple):
-      namedtuple_attrs = ('_asdict', '_fields', '_make', '_replace')
-      if all(hasattr(py_object, attr) for attr in namedtuple_attrs):
-        for name in py_object._fields:
-          self._namedtuplefields[name] = None
-
-    self._properties = collections.OrderedDict()
-    self._bases = None
-    self._methods = []
-    self._classes = []
-    self._other_members = []
-    self.attr_block = None
-
-  @property
-  def bases(self):
-    """Returns a list of `MemberInfo` objects pointing to the class' parents."""
-    return self._bases
-
-  def set_attr_block(self, attr_block):
-    assert self.attr_block is None
-    self.attr_block = attr_block
-
-  def _set_bases(self, relative_path, parser_config):
-    """Builds the `bases` attribute, to document this class' parent-classes.
-
-    This method sets the `bases` to a list of `MemberInfo` objects point to the
-    doc pages for the class' parents.
-
-    Args:
-      relative_path: The relative path from the doc this object describes to the
-        documentation root.
-      parser_config: An instance of `ParserConfig`.
-    """
-    bases = []
-    for base in self.py_object.__mro__[1:]:
-      base_full_name = parser_config.reverse_index.get(id(base), None)
-      if base_full_name is None:
-        continue
-      base_doc = _parse_md_docstring(base, relative_path, self.full_name,
-                                     parser_config, self._extra_docs)
-      base_url = parser_config.reference_resolver.reference_to_url(
-          base_full_name, relative_path)
-
-      link_info = MemberInfo(
-          short_name=base_full_name.split('.')[-1],
-          full_name=base_full_name,
-          py_object=base,
-          doc=base_doc,
-          url=base_url)
-      bases.append(link_info)
-
-    self._bases = bases
-
-  def _add_property(self, member_info: MemberInfo):
-    """Adds an entry to the `properties` list.
-
-    Args:
-      member_info: a `MemberInfo` describing the property.
-    """
-    doc = member_info.doc
-    # Clarify the default namedtuple docs-strings.
-    if re.match('Alias for field number [0-9]+', doc.brief):
-      new_brief = f'A `namedtuple` {doc.brief.lower()}'
-      doc = doc._replace(docstring_parts=[], brief=new_brief)
-
-    new_parts = [doc.brief]
-    # Strip args/returns/raises from property
-    new_parts.extend([
-        str(part)
-        for part in doc.docstring_parts
-        if not isinstance(part, TitleBlock)
-    ])
-    new_parts.append('')
-    desc = '\n'.join(new_parts)
-
-    if member_info.short_name in self._namedtuplefields:
-      self._namedtuplefields[member_info.short_name] = desc
-    else:
-      self._properties[member_info.short_name] = desc
-
-  @property
-  def methods(self):
-    """Returns a list of `MethodInfo` describing the class' methods."""
-    return self._methods
-
-  def _add_method(
-      self,
-      member_info: MemberInfo,
-      defining_class: Optional[type],  # pylint: disable=g-bare-generic
-      parser_config: ParserConfig) -> None:
-    """Adds a `MethodInfo` entry to the `methods` list.
-
-    Args:
-      member_info: a `MemberInfo` describing the method.
-      defining_class: The `type` object where this method is defined.
-      parser_config: A `ParserConfig`.
-    """
-    if defining_class is None:
-      return
-
-    # Omit methods defined by namedtuple.
-    original_method = defining_class.__dict__[member_info.short_name]
-    if (hasattr(original_method, '__module__') and
-        (original_method.__module__ or '').startswith('namedtuple')):
-      return
-
-    # Some methods are often overridden without documentation. Because it's
-    # obvious what they do, don't include them in the docs if there's no
-    # docstring.
-    if (not member_info.doc.brief.strip() and
-        member_info.short_name in ['__del__', '__copy__']):
-      return
-
-    # If the curent class py_object is a dataclass then use the class object
-    # instead of the __init__ method object because __init__ is a
-    # generated method on dataclasses (unless the definition used init=False)
-    # and `inspect.getsource` doesn't work on generated methods (as the source
-    # file doesn't exist) which is required for signature generation.
-    if (dataclasses.is_dataclass(self.py_object) and
-        member_info.short_name == '__init__' and
-        self.py_object.__dataclass_params__.init):
-      is_dataclass = True
-      py_obj = self.py_object
-    else:
-      is_dataclass = False
-      py_obj = member_info.py_object
-
-    if isinstance(original_method, classmethod):
-      func_type = FuncType.CLASSMETHOD
-    elif member_info.short_name == '__new__':
-      # __new__ acts like a regular method for this.
-      # - At this point all args are visible in the signature.
-      # - When used the first argument gets boound (like self).
-      # - Sometimes users wrap it with a `staticmethod` but that gets ignored.
-      func_type = FuncType.METHOD
-    elif isinstance(original_method, staticmethod):
-      func_type = FuncType.FUNCTION
-    elif is_dataclass:
-      # When building the init signature directly from a dataclass-class (for
-      # the auto-generated __init__) `self` is already removed from the
-      # signature.
-      func_type = FuncType.FUNCTION
-    else:
-      func_type = FuncType.METHOD
-    signature = generate_signature(
-        py_obj, parser_config, member_info.full_name, func_type=func_type)
-
-    decorators = extract_decorators(member_info.py_object)
-
-    defined_in = _get_defined_in(member_info.py_object, parser_config)
-
-    method_info = MethodInfo.from_member_info(member_info, signature,
-                                              decorators, defined_in)
-    self._methods.append(method_info)
-
-  @property
-  def classes(self):
-    """Returns a list of `MemberInfo` pointing to any nested classes."""
-    return self._classes
-
-  def get_metadata_html(self) -> str:
-    meta_data = Metadata(self.full_name)
-    for item in itertools.chain(self.classes, self.methods, self.other_members):
-      meta_data.append(item)
-
-    return meta_data.build_html()
-
-  def _add_class(self, member_info):
-    """Adds a `MemberInfo` for a nested class to `classes` list.
-
-    Args:
-      member_info: a `MemberInfo` describing the class.
-    """
-    self._classes.append(member_info)
-
-  @property
-  def other_members(self):
-    """Returns a list of `MemberInfo` describing any other contents."""
-    return self._other_members
-
-  def _add_other_member(self, member_info: MemberInfo):
-    """Adds an `MemberInfo` entry to the `other_members` list.
-
-    Args:
-      member_info: a `MemberInfo` describing the object.
-    """
-    self._other_members.append(member_info)
-
-  def _add_member(
-      self,
-      member_info: MemberInfo,
-      defining_class: Optional[type],  # pylint: disable=g-bare-generic
-      parser_config: ParserConfig,
-  ) -> None:
-    """Adds a member to the class page."""
-    obj_type = get_obj_type(member_info.py_object)
-
-    if obj_type is ObjType.PROPERTY:
-      self._add_property(member_info)
-    elif obj_type is ObjType.CLASS:
-      if defining_class is None:
-        return
-      self._add_class(member_info)
-    elif obj_type is ObjType.CALLABLE:
-      self._add_method(member_info, defining_class, parser_config)
-    elif obj_type is ObjType.OTHER:
-      # Exclude members defined by protobuf that are useless
-      if issubclass(self.py_object, ProtoMessage):
-        if (member_info.short_name.endswith('_FIELD_NUMBER') or
-            member_info.short_name in ['__slots__', 'DESCRIPTOR']):
-          return
-
-      self._add_other_member(member_info)
-
-  def collect_docs(self, parser_config):
-    """Collects information necessary specifically for a class's doc page.
-
-    Mainly, this is details about the class's members.
-
-    Args:
-      parser_config: An instance of ParserConfig.
-    """
-    py_class = self.py_object
-    doc_path = documentation_path(self.full_name)
-    relative_path = os.path.relpath(
-        path='.', start=os.path.dirname(doc_path) or '.')
-
-    self._set_bases(relative_path, parser_config)
-
-    for child_short_name in parser_config.tree[self.full_name]:
-      child_full_name = '.'.join([self.full_name, child_short_name])
-      child = parser_config.py_name_to_object(child_full_name)
-
-      # Don't document anything that is defined in object or by protobuf.
-      defining_class = _get_defining_class(py_class, child_short_name)
-      if defining_class in [object, type, tuple, BaseException, Exception]:
-        continue
-
-      # The following condition excludes most protobuf-defined symbols.
-      if (defining_class and
-          defining_class.__name__ in ['CMessage', 'Message', 'MessageMeta']):
-        continue
-
-      if doc_controls.should_skip_class_attr(py_class, child_short_name):
-        continue
-
-      child_doc = _parse_md_docstring(child, relative_path, self.full_name,
-                                      parser_config, self._extra_docs)
-
-      child_url = parser_config.reference_resolver.reference_to_url(
-          child_full_name, relative_path)
-
-      member_info = MemberInfo(child_short_name, child_full_name, child,
-                               child_doc, child_url)
-      self._add_member(member_info, defining_class, parser_config)
-
-    self.set_attr_block(self._augment_attributes(self.doc.docstring_parts))
-
-  def _augment_attributes(self,
-                          docstring_parts: List[Any]) -> Optional[TitleBlock]:
-    """Augments and deletes the "Attr" block of the docstring.
-
-    The augmented block is returned and then added to the markdown page by
-    pretty_docs.py. The existing Attribute block is deleted from the docstring.
-
-    Merges `namedtuple` fields and properties into the attrs block.
-
-    + `namedtuple` fields first, in order.
-    + Then the docstring `Attr:` block.
-    + Then any `properties` or `dataclass` fields not mentioned above.
-
-    Args:
-      docstring_parts: A list of docstring parts.
-
-    Returns:
-      Augmented "Attr" block.
-    """
-
-    attribute_block = None
-
-    for attr_block_index, part in enumerate(docstring_parts):
-      if isinstance(part, TitleBlock) and part.title.startswith('Attr'):
-        raw_attrs = collections.OrderedDict(part.items)
-        break
-    else:
-      # Didn't find the attributes block, there may still be attributes so
-      # add a placeholder for them at the end.
-      raw_attrs = collections.OrderedDict()
-      attr_block_index = len(docstring_parts)
-      docstring_parts.append(None)
-
-    attrs = collections.OrderedDict()
-    # namedtuple fields first, in order.
-    for name, desc in self._namedtuplefields.items():
-      # If a namedtuple field has been filtered out, it's description will
-      # not have been set in the `member_info` loop, so skip fields with `None`
-      # as the description.
-      if desc is not None:
-        attrs[name] = desc
-    # the contents of the `Attrs:` block from the docstring
-    attrs.update(raw_attrs)
-
-    # properties and dataclass fields last.
-    for name, desc in self._properties.items():
-      # Don't overwrite existing items
-      attrs.setdefault(name, desc)
-
-    if dataclasses.is_dataclass(self.py_object):
-      for name, desc in self._dataclass_fields().items():
-        # Don't overwrite existing items
-        attrs.setdefault(name, desc)
-
-    if attrs:
-      attribute_block = TitleBlock(
-          title='Attributes', text='', items=list(attrs.items()))
-
-    # Delete the Attrs block if it exists or delete the placeholder.
-    del docstring_parts[attr_block_index]
-
-    return attribute_block
-
-  def _dataclass_fields(self):
-    fields = {
-        name: 'Dataclass field'
-        for name in self.py_object.__dataclass_fields__.keys()
-        if not name.startswith('_')
-    }
-
-    return fields
-
-
-class ModulePageInfo(PageInfo):
-  """Collects docs for a module page.
-
-  Attributes:
-    full_name: The full, main name, of the object being documented.
-    short_name: The last part of the full name.
-    py_object: The object being documented.
-    defined_in: A _FileLocation describing where the object was defined.
-    aliases: A list of full-name for all aliases for this object.
-    doc: A list of objects representing the docstring. These can all be
-      converted to markdown using str().
-    classes: A list of `MemberInfo` objects pointing to docs for the classes in
-      this module.
-    functions: A list of `MemberInfo` objects pointing to docs for the functions
-      in this module
-    modules: A list of `MemberInfo` objects pointing to docs for the modules in
-      this module.
-    type_alias: A list of `MemberInfo` objects pointing to docs for the type
-      aliases in this module.
-    other_members: A list of `MemberInfo` objects documenting any other object's
-      defined on the module object (mostly enum style fields).
-  """
-
-  def __init__(self, *, full_name, py_object, **kwargs):
-    """Initialize a `ModulePageInfo`.
-
-    Args:
-      full_name: The full, main name, of the object being documented.
-      py_object: The object being documented.
-      **kwargs: Extra arguments.
-    """
-    super().__init__(full_name, py_object, **kwargs)
-
-    self._modules = []
-    self._classes = []
-    self._functions = []
-    self._other_members = []
-    self._type_alias = []
-
-  @property
-  def modules(self):
-    return self._modules
-
-  @property
-  def functions(self):
-    return self._functions
-
-  @property
-  def classes(self):
-    return self._classes
-
-  @property
-  def type_alias(self):
-    return self._type_alias
-
-  @property
-  def other_members(self):
-    return self._other_members
-
-  def _add_module(self, member_info: MemberInfo):
-    self._modules.append(member_info)
-
-  def _add_class(self, member_info: MemberInfo):
-    self._classes.append(member_info)
-
-  def _add_function(self, member_info: MemberInfo):
-    self._functions.append(member_info)
-
-  def _add_type_alias(self, member_info: MemberInfo):
-    self._type_alias.append(member_info)
-
-  def _add_other_member(self, member_info: MemberInfo):
-    self._other_members.append(member_info)
-
-  def get_metadata_html(self):
-    meta_data = Metadata(self.full_name)
-
-    # Objects with their own pages are not added to the metadata list for the
-    # module, the module only has a link to the object page. No docs.
-    for item in self.other_members:
-      meta_data.append(item)
-
-    return meta_data.build_html()
-
-  def _add_member(self, member_info: MemberInfo) -> None:
-    """Adds members of the modules to the respective lists."""
-    obj_type = get_obj_type(member_info.py_object)
-    if obj_type is ObjType.MODULE:
-      self._add_module(member_info)
-    elif obj_type is ObjType.CLASS:
-      self._add_class(member_info)
-    elif obj_type is ObjType.CALLABLE:
-      self._add_function(member_info)
-    elif obj_type is ObjType.TYPE_ALIAS:
-      self._add_type_alias(member_info)
-    elif obj_type is ObjType.OTHER:
-      self._add_other_member(member_info)
-
-  def collect_docs(self, parser_config):
-    """Collect information necessary specifically for a module's doc page.
-
-    Mainly this is information about the members of the module.
-
-    Args:
-      parser_config: An instance of ParserConfig.
-    """
-    relative_path = os.path.relpath(
-        path='.',
-        start=os.path.dirname(documentation_path(self.full_name)) or '.')
-
-    member_names = parser_config.tree.get(self.full_name, [])
-    for member_short_name in member_names:
-
-      if member_short_name in [
-          '__builtins__', '__doc__', '__file__', '__name__', '__path__',
-          '__package__', '__cached__', '__loader__', '__spec__',
-          'absolute_import', 'division', 'print_function', 'unicode_literals'
-      ]:
-        continue
-
-      if self.full_name:
-        member_full_name = self.full_name + '.' + member_short_name
-      else:
-        member_full_name = member_short_name
-
-      member = parser_config.py_name_to_object(member_full_name)
-
-      member_doc = _parse_md_docstring(member, relative_path, self.full_name,
-                                       parser_config, self._extra_docs)
-
-      url = parser_config.reference_resolver.reference_to_url(
-          member_full_name, relative_path)
-
-      member_info = MemberInfo(member_short_name, member_full_name, member,
-                               member_doc, url)
-      self._add_member(member_info)
-
-
-def docs_for_object(
-    full_name: str,
-    py_object: Any,
-    parser_config: ParserConfig,
-    extra_docs: Optional[Dict[int, str]] = None,
-) -> PageInfo:
-  """Return a PageInfo object describing a given object from the TF API.
-
-  This function uses _parse_md_docstring to parse the docs pertaining to
-  `object`.
-
-  This function resolves '`tf.symbol`' references in the docstrings into links
-  to the appropriate location. It also adds a list of alternative names for the
-  symbol automatically.
-
-  It assumes that the docs for each object live in a file given by
-  `documentation_path`, and that relative links to files within the
-  documentation are resolvable.
-
-  Args:
-    full_name: The fully qualified name of the symbol to be documented.
-    py_object: The Python object to be documented. Its documentation is sourced
-      from `py_object`'s docstring.
-    parser_config: A ParserConfig object.
-    extra_docs: Extra docs for symbols like public constants(list, tuple, etc)
-      that need to be added to the markdown pages created.
-
-  Returns:
-    Either a `FunctionPageInfo`, `ClassPageInfo`, or a `ModulePageInfo`
-    depending on the type of the python object being documented.
-
-  Raises:
-    RuntimeError: If an object is encountered for which we don't know how
-      to make docs.
-  """
-
-  # Which other aliases exist for the object referenced by full_name?
-  main_name = parser_config.reference_resolver.py_main_name(full_name)
-  duplicate_names = parser_config.duplicates.get(main_name, [])
-  if main_name in duplicate_names:
-    duplicate_names.remove(main_name)
-
-  obj_type = get_obj_type(py_object)
-  if obj_type is ObjType.CLASS:
-    page_info = ClassPageInfo(
-        full_name=main_name, py_object=py_object, extra_docs=extra_docs)
-  elif obj_type is ObjType.CALLABLE:
-    page_info = FunctionPageInfo(
-        full_name=main_name, py_object=py_object, extra_docs=extra_docs)
-  elif obj_type is ObjType.MODULE:
-    page_info = ModulePageInfo(
-        full_name=main_name, py_object=py_object, extra_docs=extra_docs)
-  elif obj_type is ObjType.TYPE_ALIAS:
-    page_info = TypeAliasPageInfo(
-        full_name=main_name, py_object=py_object, extra_docs=extra_docs)
+  lines, start_line = get_source.get_source_lines(py_object)
+  if start_line is None:
+    end_line = None
   else:
-    raise RuntimeError('Cannot make docs for object {full_name}: {py_object!r}')
-
-  relative_path = os.path.relpath(
-      path='.', start=os.path.dirname(documentation_path(full_name)) or '.')
-
-  page_info.set_doc(
-      _parse_md_docstring(
-          py_object,
-          relative_path,
-          full_name,
-          parser_config,
-          extra_docs,
-      ))
-
-  page_info.collect_docs(parser_config)
-
-  page_info.set_aliases(duplicate_names)
-
-  page_info.set_defined_in(_get_defined_in(py_object, parser_config))
-
-  return page_info
-
-
-def _unwrap_obj(obj):
-  while True:
-    unwrapped_obj = getattr(obj, '__wrapped__', None)
-    if unwrapped_obj is None:
-      break
-    obj = unwrapped_obj
-  return obj
-
-
-def _get_defined_in(py_object: Any,
-                    parser_config: ParserConfig) -> Optional[_FileLocation]:
-  """Returns a description of where the passed in python object was defined.
-
-  Args:
-    py_object: The Python object.
-    parser_config: A ParserConfig object.
-
-  Returns:
-    A `_FileLocation`
-  """
-  # Every page gets a note about where this object is defined
-  base_dirs_and_prefixes = zip(parser_config.base_dir,
-                               parser_config.code_url_prefix)
-  try:
-    obj_path = inspect.getfile(_unwrap_obj(py_object))
-  except TypeError:  # getfile throws TypeError if py_object is a builtin.
-    return None
-
-  if not obj_path.endswith(('.py', '.pyc')):
-    return None
-
-  code_url_prefix = None
-  for base_dir, temp_prefix in base_dirs_and_prefixes:
-    rel_path = os.path.relpath(path=obj_path, start=base_dir)
-    # A leading ".." indicates that the file is not inside `base_dir`, and
-    # the search should continue.
-    if rel_path.startswith('..'):
-      continue
-    else:
-      code_url_prefix = temp_prefix
-      break
-
-  # No link if the file was not found in a `base_dir`, or the prefix is None.
-  if code_url_prefix is None:
-    return None
-
-  try:
-    lines, start_line = inspect.getsourcelines(py_object)
     end_line = start_line + len(lines) - 1
-    if 'MACHINE GENERATED' in lines[0]:
+    if 'GENERATED' in lines[0]:
       # don't link to files generated by tf_export
       return None
-  except (IOError, TypeError, IndexError):
-    start_line = None
-    end_line = None
 
   # In case this is compiled, point to the original
-  if rel_path.endswith('.pyc'):
+  if posix_rel_path_str.endswith('.pyc'):
     # If a PY3 __pycache__/ subdir is being used, omit it.
-    rel_path = rel_path.replace('__pycache__' + os.sep, '')
+    posix_rel_path_str = posix_rel_path_str.replace('__pycache__/', '')
     # Strip everything after the first . so that variants such as .pyc and
     # .cpython-3x.pyc or similar are all handled.
-    rel_path = rel_path.partition('.')[0] + '.py'
+    posix_rel_path_str = posix_rel_path_str.partition('.')[0] + '.py'
 
-  if re.search(r'<[\w\s]+>', rel_path):
+  if re.search(r'<[\w\s]+>', posix_rel_path_str):
     # Built-ins emit paths like <embedded stdlib>, <string>, etc.
     return None
-  if '<attrs generated' in rel_path:
+  if '<attrs generated' in posix_rel_path_str:
     return None
 
-  if re.match(r'.*/gen_[^/]*\.py$', rel_path):
-    return _FileLocation()
-  if 'genfiles' in rel_path:
-    return _FileLocation()
-  elif re.match(r'.*_pb2\.py$', rel_path):
+  if re.match(r'.*/gen_[^/]*\.py$', posix_rel_path_str):
+    return FileLocation()
+  if 'genfiles' in posix_rel_path_str:
+    return FileLocation()
+  elif posix_rel_path_str.endswith('_pb2.py'):
     # The _pb2.py files all appear right next to their defining .proto file.
-    rel_path = rel_path[:-7] + '.proto'
-    return _FileLocation(base_url=os.path.join(code_url_prefix, rel_path))
+    posix_rel_path_str = posix_rel_path_str[:-7] + '.proto'
+    return FileLocation(
+        base_url=posixpath.join(code_url_prefix, posix_rel_path_str))
   else:
-    return _FileLocation(
-        base_url=os.path.join(code_url_prefix, rel_path),
+    return FileLocation(
+        base_url=posixpath.join(code_url_prefix, posix_rel_path_str),
         start_line=start_line,
         end_line=end_line)
 
@@ -2604,15 +790,16 @@ def generate_global_index(library_name, index, reference_resolver):
   """
   symbol_links = []
   for full_name, py_object in index.items():
-    obj_type = get_obj_type(py_object)
-    if obj_type in (ObjType.OTHER, ObjType.PROPERTY):
+    obj_type = obj_type_lib.ObjType.get(py_object)
+    if obj_type in (obj_type_lib.ObjType.OTHER, obj_type_lib.ObjType.PROPERTY):
       continue
     # In Python 3, unbound methods are functions, so eliminate those.
-    if obj_type is ObjType.CALLABLE:
+    if obj_type is obj_type_lib.ObjType.CALLABLE:
       if is_class_attr(full_name, index):
         continue
-    symbol_links.append(
-        (full_name, reference_resolver.python_link(full_name, full_name, '..')))
+    with reference_resolver.temp_prefix('..'):
+      symbol_links.append(
+          (full_name, reference_resolver.python_link(full_name, full_name)))
 
   lines = [f'# All symbols in {library_name}', '']
   lines.append('<!-- Insert buttons and diff -->\n')
diff --git a/tools/tensorflow_docs/api_generator/parser_test.py b/tools/tensorflow_docs/api_generator/parser_test.py
index 41bf691c1ae..0bfffeded92 100644
--- a/tools/tensorflow_docs/api_generator/parser_test.py
+++ b/tools/tensorflow_docs/api_generator/parser_test.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -16,22 +15,25 @@
 """Tests for documentation parser."""
 
 import collections
+from collections import abc
+import dataclasses
 import inspect
-import os
-import random
-import sys
-import tempfile
 import textwrap
+import types
 
-from typing import Callable, Dict, List, Optional, Union
+from typing import List, Union
 
 from absl.testing import absltest
 from absl.testing import parameterized
 import attr
-import dataclasses
 
+from tensorflow_docs.api_generator import config
 from tensorflow_docs.api_generator import doc_controls
+from tensorflow_docs.api_generator import doc_generator_visitor
+from tensorflow_docs.api_generator import generate_lib
 from tensorflow_docs.api_generator import parser
+from tensorflow_docs.api_generator import reference_resolver as reference_resolver_lib
+from tensorflow_docs.api_generator.pretty_docs import docs_for_object
 
 # The test needs a real module. `types.ModuleType()` doesn't work, as the result
 # is a `builtin` module. Using "parser" here is arbitraty. The tests don't
@@ -95,15 +97,8 @@ def class_method(cls):
   CLASS_MEMBER = 'a class member'
 
 
-class DummyVisitor(object):
-
-  def __init__(self, index, duplicate_of):
-    self.index = index
-    self.duplicate_of = duplicate_of
-
-
-class ConcreteMutableMapping(collections.MutableMapping):
-  """MutableMapping subclass to repro inspect.getsource() IndexError."""
+class ConcreteMutableMapping(abc.MutableMapping):
+  """MutableMapping subclass to repro getsource() IndexError."""
 
   def __init__(self):
     self._map = {}
@@ -159,69 +154,59 @@ class HasOneMember(object):
       def foo(self):
         pass
 
-    string = (
-        'A `tf.reference`, a member `tf.reference.foo`, and a `tf.third`. '
-        'This is `not a symbol`, and this is `tf.not.a.real.symbol`')
+    class Other:
+      pass
 
-    duplicate_of = {'tf.third': 'tf.fourth'}
-    index = {
-        'tf.reference': HasOneMember,
-        'tf.reference.foo': HasOneMember.foo,
-        'tf.third': HasOneMember,
-        'tf.fourth': HasOneMember
-    }
+    tf = types.ModuleType('tf')
+    tf.__file__ = __file__
+    tf.reference = HasOneMember
+    tf.third = Other
+    tf.fourth = Other
+
+    string = ('A `@tf.reference`, a member `tf.reference.foo`, and a '
+              '`tf.third(what)`. '
+              'This is `not a symbol`, and this is `tf.not.a.real.symbol`')
 
-    visitor = DummyVisitor(index, duplicate_of)
+    generator = generate_lib.DocGenerator(
+        root_title='test',
+        py_modules=[('tf', tf)],
+        code_url_prefix='https://tensorflow.org')
 
-    reference_resolver = parser.ReferenceResolver.from_visitor(
-        visitor=visitor, py_module_names=['tf'])
+    parser_config = generator.run_extraction()
+
+    result = (
+        parser_config.reference_resolver.with_prefix(
+            '../..').replace_references(string))
 
-    result = reference_resolver.replace_references(string, '../..')
     self.assertEqual(
         'A <a href="../../tf/reference.md">'
-        '<code>tf.reference</code></a>, '
+        '<code>@tf.reference</code></a>, '
         'a member <a href="../../tf/reference.md#foo">'
         '<code>tf.reference.foo</code></a>, '
         'and a <a href="../../tf/fourth.md">'
-        '<code>tf.third</code></a>. '
+        '<code>tf.third(what)</code></a>. '
         'This is `not a symbol`, and this is '
         '`tf.not.a.real.symbol`', result)
 
   def test_docs_for_class(self):
+    m = types.ModuleType('m')
+    m.__file__ = __file__
+    m.TestClass = TestClass
 
-    index = {
-        'TestClass': TestClass,
-        'TestClass.a_method': TestClass.a_method,
-        'TestClass.a_property': TestClass.a_property,
-        'TestClass.ChildClass': TestClass.ChildClass,
-        'TestClass.static_method': TestClass.static_method,
-        'TestClass.class_method': TestClass.class_method,
-        'TestClass.CLASS_MEMBER': TestClass.CLASS_MEMBER,
-    }
-
-    visitor = DummyVisitor(index=index, duplicate_of={})
+    generator = generate_lib.DocGenerator(
+        root_title='test',
+        py_modules=[('m', m)],
+        code_url_prefix='https://tensorflow.org')
 
-    reference_resolver = parser.ReferenceResolver.from_visitor(
-        visitor=visitor, py_module_names=['tf'])
+    parser_config = generator.run_extraction()
 
-    tree = {
-        'TestClass': [
-            'a_method', 'class_method', 'static_method', 'a_property',
-            'ChildClass', 'CLASS_MEMBER'
-        ]
-    }
-    parser_config = parser.ParserConfig(
-        reference_resolver=reference_resolver,
-        duplicates={},
-        duplicate_of={},
-        tree=tree,
-        index=index,
-        reverse_index={},
-        base_dir='/',
-        code_url_prefix='/')
-
-    page_info = parser.docs_for_object(
-        full_name='TestClass', py_object=TestClass, parser_config=parser_config)
+    api_node = doc_generator_visitor.ApiTreeNode(
+        path=(
+            'm',
+            'TestClass',
+        ), py_object=TestClass)
+    page_info = docs_for_object.docs_for_object(
+        api_node=api_node, parser_config=parser_config)
 
     # Make sure the brief docstring is present
     self.assertEqual(
@@ -235,8 +220,8 @@ def test_docs_for_class(self):
     self.assertIs(method_infos['a_method'].py_object, TestClass.a_method)
 
     # Make sure that the signature is extracted properly and omits self.
-    self.assertEqual(['arg=&#x27;default&#x27;'],
-                     method_infos['a_method'].signature.arguments)
+    self.assertEqual('(\n    arg=&#x27;default&#x27;\n)',
+                     str(method_infos['a_method'].signature))
 
     self.assertEqual(method_infos['static_method'].decorators, ['staticmethod'])
     self.assertEqual(method_infos['class_method'].decorators, ['classmethod'])
@@ -250,70 +235,48 @@ def test_docs_for_class(self):
     self.assertIs(TestClass.ChildClass, page_info.classes[0].py_object)
 
   def test_dataclass_attributes_table(self):
+    m = types.ModuleType('m')
+    m.__file__ = __file__
+    m.ExampleDataclass = ExampleDataclass
 
-    index = {
-        'ExampleDataclass': ExampleDataclass,
-    }
-
-    visitor = DummyVisitor(index=index, duplicate_of={})
-
-    reference_resolver = parser.ReferenceResolver.from_visitor(
-        visitor=visitor, py_module_names=['tf'])
+    generator = generate_lib.DocGenerator(
+        root_title='test',
+        py_modules=[('m', m)],
+        code_url_prefix='https://tensorflow.org')
 
-    tree = {'ExampleDataclass': []}
+    parser_config = generator.run_extraction()
 
-    parser_config = parser.ParserConfig(
-        reference_resolver=reference_resolver,
-        duplicates={},
-        duplicate_of={},
-        tree=tree,
-        index=index,
-        reverse_index={},
-        base_dir='/',
-        code_url_prefix='/')
-
-    page_info = parser.docs_for_object(
-        full_name='ExampleDataclass',
-        py_object=ExampleDataclass,
-        parser_config=parser_config)
+    api_node = doc_generator_visitor.ApiTreeNode(
+        path=('m', 'ExampleDataclass'), py_object=ExampleDataclass)
+    page_info = docs_for_object.docs_for_object(
+        api_node=api_node, parser_config=parser_config)
 
     self.assertCountEqual(['a', 'b', 'c', 'x', 'y', 'z'],
                           [name for name, value in page_info.attr_block.items])
 
-  def test_namedtuple_field_order(self):
+  def test_namedtuple_field_order_respects_hidden(self):
     namedtupleclass = collections.namedtuple(
         'namedtupleclass', ['z', 'y', 'x', 'hidden', 'w', 'v', 'u'])
 
-    index = {
-        'namedtupleclass': namedtupleclass,
-        'namedtupleclass.u': namedtupleclass.u,
-        'namedtupleclass.v': namedtupleclass.v,
-        'namedtupleclass.w': namedtupleclass.w,
-        'namedtupleclass.x': namedtupleclass.x,
-        'namedtupleclass.y': namedtupleclass.y,
-        'namedtupleclass.z': namedtupleclass.z,
-    }
+    m = types.ModuleType('m')
+    m.__file__ = __file__
+    m.namedtupleclass = namedtupleclass
 
-    visitor = DummyVisitor(index=index, duplicate_of={})
-
-    reference_resolver = parser.ReferenceResolver.from_visitor(
-        visitor=visitor, py_module_names=['tf'])
-
-    tree = {'namedtupleclass': {'u', 'v', 'w', 'x', 'y', 'z'}}
-    parser_config = parser.ParserConfig(
-        reference_resolver=reference_resolver,
-        duplicates={},
-        duplicate_of={},
-        tree=tree,
-        index=index,
-        reverse_index={},
-        base_dir='/',
-        code_url_prefix='/')
-
-    page_info = parser.docs_for_object(
-        full_name='namedtupleclass',
-        py_object=namedtupleclass,
-        parser_config=parser_config)
+    def hide(path, parent, children):
+      return [(name, value) for name, value in children if name != 'hidden']
+
+    generator = generate_lib.DocGenerator(
+        root_title='test',
+        py_modules=[('m', m)],
+        code_url_prefix='https://tensorflow.org',
+        callbacks=[hide])
+
+    parser_config = generator.run_extraction()
+
+    api_node = doc_generator_visitor.ApiTreeNode(
+        path=('m', 'namedtupleclass'), py_object=namedtupleclass)
+    page_info = docs_for_object.docs_for_object(
+        api_node=api_node, parser_config=parser_config)
 
     self.assertIsNone(page_info._namedtuplefields['hidden'])
 
@@ -339,32 +302,24 @@ class Child(Parent):
       def a_method(self, arg='default'):
         pass
 
-    index = {
-        'Child': Child,
-        'Child.a_method': Child.a_method,
-    }
-
-    visitor = DummyVisitor(index=index, duplicate_of={})
-
-    reference_resolver = parser.ReferenceResolver.from_visitor(
-        visitor=visitor, py_module_names=['tf'])
+    m = types.ModuleType('m')
+    m.__file__ = __file__
+    m.Child = Child
 
-    tree = {
-        'Child': ['a_method'],
-    }
+    generator = generate_lib.DocGenerator(
+        root_title='test',
+        py_modules=[('m', m)],
+        code_url_prefix='https://tensorflow.org')
 
-    parser_config = parser.ParserConfig(
-        reference_resolver=reference_resolver,
-        duplicates={},
-        duplicate_of={},
-        tree=tree,
-        index=index,
-        reverse_index={},
-        base_dir='/',
-        code_url_prefix='/')
+    parser_config = generator.run_extraction()
 
-    page_info = parser.docs_for_object(
-        full_name='Child', py_object=Child, parser_config=parser_config)
+    api_node = doc_generator_visitor.ApiTreeNode(
+        path=(
+            'm',
+            'Child',
+        ), py_object=Child)
+    page_info = docs_for_object.docs_for_object(
+        api_node=api_node, parser_config=parser_config)
 
     # Make sure the `a_method` is not present
     self.assertEmpty(page_info.methods)
@@ -391,76 +346,43 @@ class ChildMessage(CMessage, Message, MessageMeta):
       def my_method(self):
         pass
 
-    index = {
-        'ChildMessage': ChildMessage,
-        'ChildMessage.hidden': ChildMessage.hidden,
-        'ChildMessage.hidden2': ChildMessage.hidden2,
-        'ChildMessage.hidden3': ChildMessage.hidden3,
-        'ChildMessage.my_method': ChildMessage.my_method,
-    }
-
-    visitor = DummyVisitor(index=index, duplicate_of={})
+    m = types.ModuleType('m')
+    m.__file__ = __file__
+    m.ChildMessage = ChildMessage
 
-    reference_resolver = parser.ReferenceResolver.from_visitor(
-        visitor=visitor, py_module_names=['tf'])
+    generator = generate_lib.DocGenerator(
+        root_title='test',
+        py_modules=[('m', m)],
+        code_url_prefix='https://tensorflow.org')
 
-    tree = {'ChildMessage': ['hidden', 'hidden2', 'hidden3', 'my_method']}
+    parser_config = generator.run_extraction()
 
-    parser_config = parser.ParserConfig(
-        reference_resolver=reference_resolver,
-        duplicates={},
-        duplicate_of={},
-        tree=tree,
-        index=index,
-        reverse_index={},
-        base_dir='/',
-        code_url_prefix='/')
-
-    page_info = parser.docs_for_object(
-        full_name='ChildMessage',
-        py_object=ChildMessage,
-        parser_config=parser_config)
+    api_node = doc_generator_visitor.ApiTreeNode(
+        path=('m', 'ChildMessage'), py_object=ChildMessage)
+    page_info = docs_for_object.docs_for_object(
+        api_node=api_node, parser_config=parser_config)
 
     self.assertLen(page_info.methods, 1)
     self.assertEqual('my_method', page_info.methods[0].short_name)
 
   def test_docs_for_module(self):
+    m = types.ModuleType('m')
+    m.__file__ = __file__
+    m.test_function = test_function
+    m.test_function_with_args_kwargs = test_function_with_args_kwargs
+    m.TestClass = TestClass
 
-    index = {
-        'TestModule':
-            test_module,
-        'TestModule.test_function':
-            test_function,
-        'TestModule.test_function_with_args_kwargs':
-            test_function_with_args_kwargs,
-        'TestModule.TestClass':
-            TestClass,
-    }
+    generator = generate_lib.DocGenerator(
+        root_title='test',
+        py_modules=[('m', m)],
+        code_url_prefix='https://tensorflow.org')
 
-    visitor = DummyVisitor(index=index, duplicate_of={})
+    parser_config = generator.run_extraction()
 
-    reference_resolver = parser.ReferenceResolver.from_visitor(
-        visitor=visitor, py_module_names=['tf'])
-
-    tree = {
-        'TestModule': [
-            'TestClass', 'test_function', 'test_function_with_args_kwargs'
-        ]
-    }
-    parser_config = parser.ParserConfig(
-        reference_resolver=reference_resolver,
-        duplicates={},
-        duplicate_of={},
-        tree=tree,
-        index=index,
-        reverse_index={},
-        base_dir='/',
-        code_url_prefix='/')
-
-    page_info = parser.docs_for_object(
-        full_name='TestModule',
-        py_object=test_module,
-        parser_config=parser_config)
+    api_node = doc_generator_visitor.ApiTreeNode(
+        path=('m',), py_object=test_module)
+    page_info = docs_for_object.docs_for_object(
+        api_node=api_node, parser_config=parser_config)
 
     # Make sure the brief docstring is present
     self.assertEqual(
@@ -474,60 +396,47 @@ def test_docs_for_module(self):
     self.assertEqual({TestClass}, classes)
 
   def test_docs_for_function(self):
-    index = {'test_function': test_function}
-
-    visitor = DummyVisitor(index=index, duplicate_of={})
-
-    reference_resolver = parser.ReferenceResolver.from_visitor(
-        visitor=visitor, py_module_names=['tf'])
-
-    tree = {'': ['test_function']}
-    parser_config = parser.ParserConfig(
-        reference_resolver=reference_resolver,
-        duplicates={},
-        duplicate_of={},
-        tree=tree,
-        index=index,
-        reverse_index={},
-        base_dir='/',
-        code_url_prefix='/')
-
-    page_info = parser.docs_for_object(
-        full_name='test_function',
-        py_object=test_function,
-        parser_config=parser_config)
+    m = types.ModuleType('m')
+    m.__file__ = __file__
+    m.test_function = test_function
+
+    generator = generate_lib.DocGenerator(
+        root_title='test',
+        py_modules=[('m', m)],
+        code_url_prefix='https://tensorflow.org')
+
+    parser_config = generator.run_extraction()
+
+    api_node = doc_generator_visitor.ApiTreeNode(
+        path=('test_function',), py_object=test_function)
+    page_info = docs_for_object.docs_for_object(
+        api_node=api_node, parser_config=parser_config)
 
     # Make sure the brief docstring is present
     self.assertEqual(
         inspect.getdoc(test_function).split('\n')[0], page_info.doc.brief)
 
     # Make sure the extracted signature is good.
-    self.assertEqual(['unused_arg', 'unused_kwarg=&#x27;default&#x27;'],
-                     page_info.signature.arguments)
+    self.assertEqual('(\n    unused_arg, unused_kwarg=&#x27;default&#x27;\n)',
+                     str(page_info.signature))
 
   def test_docs_for_function_with_kwargs(self):
-    index = {'test_function_with_args_kwargs': test_function_with_args_kwargs}
-
-    visitor = DummyVisitor(index=index, duplicate_of={})
-
-    reference_resolver = parser.ReferenceResolver.from_visitor(
-        visitor=visitor, py_module_names=['tf'])
-
-    tree = {'': ['test_function_with_args_kwargs']}
-    parser_config = parser.ParserConfig(
-        reference_resolver=reference_resolver,
-        duplicates={},
-        duplicate_of={},
-        tree=tree,
-        index=index,
-        reverse_index={},
-        base_dir='/',
-        code_url_prefix='/')
-
-    page_info = parser.docs_for_object(
-        full_name='test_function_with_args_kwargs',
-        py_object=test_function_with_args_kwargs,
-        parser_config=parser_config)
+    m = types.ModuleType('m')
+    m.__file__ = __file__
+    m.test_function_with_args_kwargs = test_function_with_args_kwargs
+
+    generator = generate_lib.DocGenerator(
+        root_title='test',
+        py_modules=[('m', m)],
+        code_url_prefix='https://tensorflow.org')
+
+    parser_config = generator.run_extraction()
+
+    api_node = doc_generator_visitor.ApiTreeNode(
+        path=('test_function_with_args_kwargs',),
+        py_object=test_function_with_args_kwargs)
+    page_info = docs_for_object.docs_for_object(
+        api_node=api_node, parser_config=parser_config)
 
     # Make sure the brief docstring is present
     self.assertEqual(
@@ -535,8 +444,8 @@ def test_docs_for_function_with_kwargs(self):
         page_info.doc.brief)
 
     # Make sure the extracted signature is good.
-    self.assertEqual(['unused_arg', '*unused_args', '**unused_kwargs'],
-                     page_info.signature.arguments)
+    self.assertEqual('(\n    unused_arg, *unused_args, **unused_kwargs\n)',
+                     str(page_info.signature))
 
   def test_parse_md_docstring(self):
 
@@ -576,33 +485,29 @@ class HasOneMember(object):
       def foo(self):
         pass
 
-    duplicate_of = {'tf.third': 'tf.fourth'}
-    index = {
-        'tf': test_module,
-        'tf.fancy': test_function_with_fancy_docstring,
-        'tf.reference': HasOneMember,
-        'tf.reference.foo': HasOneMember.foo,
-        'tf.third': HasOneMember,
-        'tf.fourth': HasOneMember
-    }
+    class HasOneMember2(object):
+
+      def foo(self):
+        pass
+
+    tf = types.ModuleType('tf')
+    tf.__file__ = __file__
+    tf.fancy = test_function_with_fancy_docstring
+    tf.reference = HasOneMember
+    tf.third = HasOneMember2
+    tf.fourth = HasOneMember2
+
+    generator = generate_lib.DocGenerator(
+        root_title='test',
+        py_modules=[('tf', tf)],
+        code_url_prefix='https://tensorflow.org')
 
-    visitor = DummyVisitor(index=index, duplicate_of=duplicate_of)
-
-    reference_resolver = parser.ReferenceResolver.from_visitor(
-        visitor=visitor, py_module_names=['tf'])
-    parser_config = parser.ParserConfig(
-        reference_resolver=reference_resolver,
-        duplicates={},
-        duplicate_of={},
-        tree={},
-        index=index,
-        reverse_index={},
-        base_dir='/',
-        code_url_prefix='/')
-
-    doc_info = parser._parse_md_docstring(
+    parser_config = generator.run_extraction()
+    parser_config.reference_resolver = (
+        parser_config.reference_resolver.with_prefix('/'))
+
+    doc_info = parser.parse_md_docstring(
         test_function_with_fancy_docstring,
-        relative_path_to_root='../..',
         full_name=None,
         parser_config=parser_config)
 
@@ -621,8 +526,10 @@ def foo(self):
     self.assertCountEqual(doc_info.compatibility.keys(),
                           {'numpy', 'two words!'})
 
-    self.assertEqual(doc_info.compatibility['numpy'],
-                     'NumPy has nothing as awesome as this function.\n')
+    self.assertEqual(
+        doc_info.compatibility['numpy'],
+        'NumPy has nothing as awesome as this function.',
+    )
 
   def test_downgrade_h1_docstrings(self):
     h1_docstring = textwrap.dedent("""\
@@ -656,85 +563,56 @@ def test_downgrade_h1_docstrings(self):
     self.assertIn('\nRaises:', doc)
 
   def test_generate_index(self):
+    m = types.ModuleType('m')
+    m.__file__ = __file__
+    m.TestClass = TestClass
+    m.test_function = test_function
+    m.submodule = types.ModuleType('submodule')
+    m.submodule.test_function = test_function
 
-    index = {
-        'tf': test_module,
-        'tf.TestModule': test_module,
-        'tf.test_function': test_function,
-        'tf.TestModule.test_function': test_function,
-        'tf.TestModule.TestClass': TestClass,
-        'tf.TestModule.TestClass.a_method': TestClass.a_method,
-        'tf.TestModule.TestClass.a_property': TestClass.a_property,
-        'tf.TestModule.TestClass.ChildClass': TestClass.ChildClass,
-    }
-    duplicate_of = {'tf.TestModule.test_function': 'tf.test_function'}
-
-    visitor = DummyVisitor(index=index, duplicate_of=duplicate_of)
+    generator = generate_lib.DocGenerator(
+        root_title='test',
+        py_modules=[('m', m)],
+        code_url_prefix='https://tensorflow.org')
 
-    reference_resolver = parser.ReferenceResolver.from_visitor(
-        visitor=visitor, py_module_names=['tf'])
+    parser_config = generator.run_extraction()
 
     docs = parser.generate_global_index(
-        'TestLibrary', index=index, reference_resolver=reference_resolver)
+        'TestLibrary',
+        index=parser_config.index,
+        reference_resolver=parser_config.reference_resolver)
 
     # Make sure duplicates and non-top-level symbols are in the index, but
     # methods and properties are not.
     self.assertNotIn('a_method', docs)
     self.assertNotIn('a_property', docs)
-    self.assertIn('TestModule.TestClass', docs)
-    self.assertIn('TestModule.TestClass.ChildClass', docs)
-    self.assertIn('TestModule.test_function', docs)
-    # Leading backtick to make sure it's included top-level.
-    # This depends on formatting, but should be stable.
-    self.assertIn('<code>tf.test_function', docs)
+    self.assertIn('m.TestClass', docs)
+    self.assertIn('m.TestClass.ChildClass', docs)
+    self.assertIn('m.submodule.test_function', docs)
+    self.assertIn('<code>m.submodule.test_function', docs)
 
   def test_getsource_indexerror_resilience(self):
     """Validates that parser gracefully handles IndexErrors.
 
-    inspect.getsource() can raise an IndexError in some cases. It's unclear
+    getsource() can raise an IndexError in some cases. It's unclear
     why this happens, but it consistently repros on the `get` method of
     collections.MutableMapping subclasses.
     """
+    m = types.ModuleType('m')
+    m.__file__ = __file__
+    m.ConcreteMutableMapping = ConcreteMutableMapping
 
-    # This isn't the full set of APIs from MutableMapping, but sufficient for
-    # testing.
-    index = {
-        'ConcreteMutableMapping':
-            ConcreteMutableMapping,
-        'ConcreteMutableMapping.__init__':
-            ConcreteMutableMapping.__init__,
-        'ConcreteMutableMapping.__getitem__':
-            ConcreteMutableMapping.__getitem__,
-        'ConcreteMutableMapping.__setitem__':
-            ConcreteMutableMapping.__setitem__,
-        'ConcreteMutableMapping.values':
-            ConcreteMutableMapping.values,
-        'ConcreteMutableMapping.get':
-            ConcreteMutableMapping.get
-    }
-    visitor = DummyVisitor(index=index, duplicate_of={})
-    reference_resolver = parser.ReferenceResolver.from_visitor(
-        visitor=visitor, py_module_names=['tf'])
-
-    tree = {
-        'ConcreteMutableMapping': [
-            '__init__', '__getitem__', '__setitem__', 'values', 'get'
-        ]
-    }
-    parser_config = parser.ParserConfig(
-        reference_resolver=reference_resolver,
-        duplicates={},
-        duplicate_of={},
-        tree=tree,
-        index=index,
-        reverse_index={},
-        base_dir='/',
-        code_url_prefix='/')
-
-    page_info = parser.docs_for_object(
-        full_name='ConcreteMutableMapping',
-        py_object=ConcreteMutableMapping,
-        parser_config=parser_config)
+    generator = generate_lib.DocGenerator(
+        root_title='test',
+        py_modules=[('m', m)],
+        code_url_prefix='https://tensorflow.org')
+
+    parser_config = generator.run_extraction()
+
+    api_node = doc_generator_visitor.ApiTreeNode(
+        path=('m', 'ConcreteMutableMapping'), py_object=ConcreteMutableMapping)
+    page_info = docs_for_object.docs_for_object(
+        api_node=api_node, parser_config=parser_config)
 
     self.assertIn(ConcreteMutableMapping.get,
                   [m.py_object for m in page_info.methods])
@@ -747,33 +625,25 @@ def test_strips_default_arg_memory_address(self):
 
      See: `help(collections.MutableMapping.pop)`
     """
-    index = {
-        'ConcreteMutableMapping': ConcreteMutableMapping,
-        'ConcreteMutableMapping.pop': ConcreteMutableMapping.pop
-    }
-    visitor = DummyVisitor(index=index, duplicate_of={})
-    reference_resolver = parser.ReferenceResolver.from_visitor(
-        visitor=visitor, py_module_names=['tf'])
-
-    tree = {'ConcreteMutableMapping': ['pop']}
-    parser_config = parser.ParserConfig(
-        reference_resolver=reference_resolver,
-        duplicates={},
-        duplicate_of={},
-        tree=tree,
-        index=index,
-        reverse_index={},
-        base_dir='/',
-        code_url_prefix='/')
-
-    page_info = parser.docs_for_object(
-        full_name='ConcreteMutableMapping',
-        py_object=ConcreteMutableMapping,
-        parser_config=parser_config)
+    m = types.ModuleType('m')
+    m.__file__ = __file__
+    m.fun = lambda x=object(): x
 
-    pop_default_arg = page_info.methods[0].signature.arguments[1]
-    self.assertNotIn('object at 0x', pop_default_arg)
-    self.assertIn('&lt;object&gt;', pop_default_arg)
+    generator = generate_lib.DocGenerator(
+        root_title='test',
+        py_modules=[('m', m)],
+        code_url_prefix='https://tensorflow.org')
+
+    parser_config = generator.run_extraction()
+
+    api_node = doc_generator_visitor.ApiTreeNode(
+        path=('m', 'fun'), py_object=m.fun)
+    page_info = docs_for_object.docs_for_object(
+        api_node=api_node, parser_config=parser_config)
+
+    output = str(page_info.signature)
+    self.assertNotIn('object at 0x', output)
+    self.assertIn('&lt;object object&gt;', output)
 
   @parameterized.named_parameters(
       ('mutable_mapping', 'ConcreteMutableMapping', '__contains__',
@@ -805,26 +675,21 @@ def test_empty_defined_in(self, cls, method, py_object):
       method: The class method name to generate docs for.
       py_object: The python object for the specified cls.method.
     """
+    m = types.ModuleType('m')
+    m.__file__ = __file__
+    m.ConcreteMutableMapping = ConcreteMutableMapping
 
-    visitor = DummyVisitor(index={}, duplicate_of={})
-    reference_resolver = parser.ReferenceResolver.from_visitor(
-        visitor=visitor, py_module_names=['tf'])
-
-    tree = {cls: [method]}
-    parser_config = parser.ParserConfig(
-        reference_resolver=reference_resolver,
-        duplicates={},
-        duplicate_of={},
-        tree=tree,
-        index={},
-        reverse_index={},
-        base_dir='/',
-        code_url_prefix='/')
-
-    function_info = parser.docs_for_object(
-        full_name='%s.%s' % (cls, method),
-        py_object=py_object,
-        parser_config=parser_config)
+    generator = generate_lib.DocGenerator(
+        root_title='test',
+        py_modules=[('m', m)],
+        code_url_prefix='https://tensorflow.org')
+
+    parser_config = generator.run_extraction()
+
+    api_node = doc_generator_visitor.ApiTreeNode(
+        path=(cls, method), py_object=py_object)
+    function_info = docs_for_object.docs_for_object(
+        api_node=api_node, parser_config=parser_config)
 
     self.assertIsNone(function_info.defined_in)
 
@@ -837,15 +702,16 @@ class A():
     a = A()
     a.__doc__ = 'Object doc'
 
-    parser_config = parser.ParserConfig(
-        reference_resolver=None,
-        duplicates={},
-        duplicate_of={},
-        tree={},
-        index={},
-        reverse_index={},
-        base_dir='/',
-        code_url_prefix='/')
+    m = types.ModuleType('m')
+    m.__file__ = __file__
+    m.a = a
+
+    generator = generate_lib.DocGenerator(
+        root_title='test',
+        py_modules=[('m', m)],
+        code_url_prefix='https://tensorflow.org')
+
+    parser_config = generator.run_extraction()
 
     result = parser._get_other_member_doc(a, parser_config, {})
 
@@ -892,15 +758,16 @@ class A():
 
     a = A()
 
-    parser_config = parser.ParserConfig(
-        reference_resolver=None,
-        duplicates={},
-        duplicate_of={},
-        tree={},
-        index={},
-        reverse_index={},
-        base_dir='/',
-        code_url_prefix='/')
+    m = types.ModuleType('m')
+    m.__file__ = __file__
+    m.a = a
+
+    generator = generate_lib.DocGenerator(
+        root_title='test',
+        py_modules=[('m', m)],
+        code_url_prefix='https://tensorflow.org')
+
+    parser_config = generator.run_extraction()
 
     result = parser._get_other_member_doc(a, parser_config, {})
     expected = textwrap.dedent("""\
@@ -916,55 +783,23 @@ class A():
 
     a = A()
 
-    parser_config = parser.ParserConfig(
-        reference_resolver=None,
-        duplicates={},
-        duplicate_of={},
-        tree={},
-        index={},
-        reverse_index={id(A): 'tf.test.A'},
-        base_dir='/',
-        code_url_prefix='/')
-
-    result = parser._get_other_member_doc(a, parser_config, {})
-
-    self.assertEqual('Instance of `tf.test.A`', result)
-
-
-class TestReferenceResolver(absltest.TestCase):
-  _BASE_DIR = tempfile.mkdtemp()
-
-  def setUp(self):
-    super(TestReferenceResolver, self).setUp()
-    self.workdir = os.path.join(self._BASE_DIR, self.id())
-    os.makedirs(self.workdir)
-
-  def testSaveReferenceResolver(self):
-    duplicate_of = {'AClass': ['AClass2']}
-    is_fragment = {
-        'tf': False,
-        'tf.VERSION': True,
-        'tf.AClass': False,
-        'tf.AClass.method': True,
-        'tf.AClass2': False,
-        'tf.function': False
-    }
-    py_module_names = ['tf', 'tfdbg']
-
-    resolver = parser.ReferenceResolver(duplicate_of, is_fragment,
-                                        py_module_names)
+    m = types.ModuleType('m')
+    m.__file__ = __file__
+    m.A = A
+    m.a = a
 
-    outdir = self.workdir
+    generator = generate_lib.DocGenerator(
+        root_title='test',
+        py_modules=[('m', m)],
+        code_url_prefix='https://tensorflow.org')
 
-    filepath = os.path.join(outdir, 'resolver.json')
+    parser_config = generator.run_extraction()
 
-    resolver.to_json_file(filepath)
-    resolver2 = parser.ReferenceResolver.from_json_file(filepath)
+    result = parser._get_other_member_doc(a, parser_config, {})
 
-    # There are no __slots__, so all fields are visible in __dict__.
-    self.assertEqual(resolver.__dict__, resolver2.__dict__)
+    self.assertEqual('Instance of `m.A`', result)
 
-  def testIsClasssAttr(self):
+  def testIsClassAttr(self):
     result = parser.is_class_attr('test_module.test_function',
                                   {'test_module': test_module})
     self.assertFalse(result)
@@ -973,41 +808,6 @@ def testIsClasssAttr(self):
                                   {'TestClass': TestClass})
     self.assertTrue(result)
 
-  def test_duplicate_fragment(self):
-    duplicate_of = {
-        'tf.Class2.method': 'tf.Class1.method',
-        'tf.sub.Class2.method': 'tf.Class1.method',
-        'tf.sub.Class2': 'tf.Class2'
-    }
-    is_fragment = {
-        'tf.Class1.method': True,
-        'tf.Class2.method': True,
-        'tf.sub.Class2.method': True,
-        'tf.Class1': False,
-        'tf.Class2': False,
-        'tf.sub.Class2': False
-    }
-    py_module_names = ['tf']
-
-    reference_resolver = parser.ReferenceResolver(duplicate_of, is_fragment,
-                                                  py_module_names)
-
-    # Method references point to the method, in the canonical class alias.
-    result = reference_resolver.reference_to_url('tf.Class1.method', '')
-    self.assertEqual('tf/Class1.md#method', result)
-    result = reference_resolver.reference_to_url('tf.Class2.method', '')
-    self.assertEqual('tf/Class2.md#method', result)
-    result = reference_resolver.reference_to_url('tf.sub.Class2.method', '')
-    self.assertEqual('tf/Class2.md#method', result)
-
-    # Class references point to the canonical class alias
-    result = reference_resolver.reference_to_url('tf.Class1', '')
-    self.assertEqual('tf/Class1.md', result)
-    result = reference_resolver.reference_to_url('tf.Class2', '')
-    self.assertEqual('tf/Class2.md', result)
-    result = reference_resolver.reference_to_url('tf.sub.Class2', '')
-    self.assertEqual('tf/Class2.md', result)
-
 
 RELU_DOC = """Computes rectified linear: `max(features, 0)`
 
@@ -1062,116 +862,32 @@ def test_split_title_blocks(self):
                      '\nSome tensors, with the same type as the input.\n')
     self.assertLen(returns.items, 2)
 
-
-class TestPartialSymbolAutoRef(parameterized.TestCase):
-  REF_TEMPLATE = '<a href="{link}"><code>{text}</code></a>'
-
-  @parameterized.named_parameters(
-      ('basic1', 'keras.Model.fit', '../tf/keras/Model.md#fit'),
-      ('duplicate_object', 'layers.Conv2D', '../tf/keras/layers/Conv2D.md'),
-      ('parens', 'Model.fit(x, y, epochs=5)', '../tf/keras/Model.md#fit'),
-      ('duplicate_name', 'tf.matmul', '../tf/linalg/matmul.md'),
-      ('full_name', 'tf.concat', '../tf/concat.md'),
-      ('normal_and_compat', 'linalg.matmul', '../tf/linalg/matmul.md'),
-      ('compat_only', 'math.deprecated', None),
-      ('contrib_only', 'y.z', None),
-  )
-  def test_partial_symbol_references(self, string, link):
-    duplicate_of = {
-        'tf.matmul': 'tf.linalg.matmul',
-        'tf.layers.Conv2d': 'tf.keras.layers.Conv2D',
-    }
-
-    is_fragment = {
-        'tf.keras.Model.fit': True,
-        'tf.concat': False,
-        'tf.keras.layers.Conv2D': False,
-        'tf.linalg.matmul': False,
-        'tf.compat.v1.math.deprecated': False,
-        'tf.compat.v1.linalg.matmul': False,
-        'tf.contrib.y.z': False,
-    }
-
-    py_module_names = ['tf']
-
-    resolver = parser.ReferenceResolver(duplicate_of, is_fragment,
-                                        py_module_names)
-    input_string = string.join('``')
-    ref_string = resolver.replace_references(input_string, '..')
-
-    if link is None:
-      expected = input_string
-    else:
-      expected = self.REF_TEMPLATE.format(link=link, text=string)
-
-    self.assertEqual(expected, ref_string)
-
-
-class TestIgnoreLineInBlock(parameterized.TestCase):
-
-  @parameterized.named_parameters(
-      ('ignore_backticks', ['```'], ['```'],
-       '```\nFiller\n```\n```Same line```\n```python\nDowner\n```'),
-      ('ignore_code_cell_output', ['<pre>{% html %}'], ['{% endhtml %}</pre>'],
-       '<pre>{% html %}\nOutput\nmultiline{% endhtml %}</pre>'),
-      ('ignore_backticks_and_cell_output', ['<pre>{% html %}', '```'
-                                           ], ['{% endhtml %}</pre>', '```'],
-       ('```\nFiller\n```\n```Same line```\n<pre>{% html %}\nOutput\nmultiline'
-        '{% endhtml %}</pre>\n```python\nDowner\n```')))
-  def test_ignore_lines(self, block_start, block_end, expected_ignored_lines):
-
-    text = textwrap.dedent("""\
-    ```
-    Filler
-    ```
-
-    ```Same line```
-
-    <pre>{% html %}
-    Output
-    multiline{% endhtml %}</pre>
-
-    ```python
-    Downer
-    ```
-    """)
-
-    filters = [
-        parser.IgnoreLineInBlock(start, end)
-        for start, end in zip(block_start, block_end)
-    ]
-
-    ignored_lines = []
-    for line in text.splitlines():
-      if any(filter_block(line) for filter_block in filters):
-        ignored_lines.append(line)
-
-    self.assertEqual('\n'.join(ignored_lines), expected_ignored_lines)
-
-  def test_clean_text(self):
-    text = textwrap.dedent("""\
-    ```
-    Ignore lines here.
-    ```
-    Useful information.
-    Don't ignore.
-    ```python
-    Ignore here too.
-    ```
-    Stuff.
-    ```Not useful.```
+  def test_title_block(self):
+    docstring = textwrap.dedent("""\
+      hello
+ 
+      Attributes:
+        extra paragraph?
+        item: description
+          describe describe
+        item2 (int): is a number
+        this is not an item: really not 
+        this either: nope
+
+      goodbye
     """)
-
-    filters = [parser.IgnoreLineInBlock('```', '```')]
-
-    clean_text = []
-    for line in text.splitlines():
-      if not any(filter_block(line) for filter_block in filters):
-        clean_text.append(line)
-
-    expected_clean_text = 'Useful information.\nDon\'t ignore.\nStuff.'
-
-    self.assertEqual('\n'.join(clean_text), expected_clean_text)
+    docstring_parts = parser.TitleBlock.split_string(docstring)
+    print(docstring_parts)
+    self.assertEqual('hello', docstring_parts[0])
+    self.assertIsInstance(docstring_parts[1], parser.TitleBlock)
+    self.assertEqual('\ngoodbye\n', docstring_parts[2])
+
+    block = docstring_parts[1]
+    self.assertEqual('\nextra paragraph?\n', block.text)
+    self.assertEqual('item', block.items[0][0])
+    self.assertEqual('item2', block.items[1][0])
+    self.assertStartsWith(block.items[1][1], '`int`')
+    self.assertLen(block.items, 2)
 
   def test_strip_todos(self):
     input_str = ("""#  TODO(blah) blah
@@ -1221,332 +937,24 @@ def test_strip_pylintandpyformat(self):
     strip_todos = parser._StripPylintAndPyformat()
     self.assertEqual(expected, strip_todos(input_str))
 
-
-class TestGenerateSignature(parameterized.TestCase, absltest.TestCase):
-
-  def setUp(self):
-    super().setUp()
-    self.known_object = object()
-    reference_resolver = parser.ReferenceResolver(
-        duplicate_of={},
-        is_fragment={'tfdocs.api_generator.parser.extract_decorators': False},
-        py_module_names=[])
-    self.parser_config = parser.ParserConfig(
-        reference_resolver=reference_resolver,
-        duplicates={},
-        duplicate_of={},
-        tree={},
-        index={},
-        reverse_index={
-            id(self.known_object):
-                'location.of.object.in.api',
-            id(parser.extract_decorators):
-                'tfdocs.api_generator.parser.extract_decorators',
-        },
-        base_dir='/',
-        code_url_prefix='/')
-
-  def test_known_object(self):
-
-    def example_fun(arg=self.known_object):  # pylint: disable=unused-argument
-      pass
-
-    sig = parser.generate_signature(
-        example_fun,
-        parser_config=self.parser_config,
-        func_full_name='',
-        func_type=parser.FuncType.FUNCTION)
-    self.assertEqual(sig.arguments, ['arg=location.of.object.in.api'])
-
-  def test_literals(self):
-
-    def example_fun(
-        self,
-        cls,
-        a=5,
-        b=5.0,
-        c=None,
-        d=True,
-        e='hello',
-        f=(1, (2, 3)),
-    ):  # pylint: disable=g-bad-name, unused-argument
-      pass
-
-    sig = parser.generate_signature(
-        example_fun,
-        parser_config=self.parser_config,
-        func_full_name='',
-        func_type=parser.FuncType.FUNCTION)
-    self.assertEqual(sig.arguments, [
-        'self', 'cls', 'a=5', 'b=5.0', 'c=None', 'd=True',
-        'e=&#x27;hello&#x27;', 'f=(1, (2, 3))'
-    ])
-
-  def test_dotted_name(self):
-    # pylint: disable=g-bad-name
-
-    class a(object):
-
-      class b(object):
-
-        class c(object):
-
-          class d(object):
-
-            def __init__(self, *args):
-              pass
-
-    # pylint: enable=g-bad-name
-
-    e = {'f': 1}
-
-    def example_fun(arg1=a.b.c.d, arg2=a.b.c.d(1, 2), arg3=e['f']):  # pylint: disable=unused-argument
-      pass
-
-    sig = parser.generate_signature(
-        example_fun,
-        parser_config=self.parser_config,
-        func_full_name='',
-        func_type=parser.FuncType.FUNCTION)
-    self.assertEqual(
-        sig.arguments,
-        ['arg1=a.b.c.d', 'arg2=a.b.c.d(1, 2)', 'arg3=e[&#x27;f&#x27;]'])
-
-  def test_compulsory_kwargs_without_defaults(self):
-
-    def example_fun(x, z, a=True, b='test', *, y=None, c, **kwargs) -> bool:  # pylint: disable=unused-argument
-      return True
-
-    sig = parser.generate_signature(
-        example_fun,
-        parser_config=self.parser_config,
-        func_full_name='',
-        func_type=parser.FuncType.FUNCTION)
-    self.assertEqual(sig.arguments, [
-        'x', 'z', 'a=True', 'b=&#x27;test&#x27;', '*', 'y=None', 'c', '**kwargs'
-    ])
-    self.assertEqual(sig.return_type, 'bool')
-    self.assertEqual(sig.arguments_typehint_exists, False)
-    self.assertEqual(sig.return_typehint_exists, True)
-
-  def test_compulsory_kwargs_without_defaults_with_args(self):
-
-    def example_fun(x, z, cls, *args, a=True, b='test', y=None, c, **kwargs):  # pylint: disable=unused-argument
-      return True
-
-    sig = parser.generate_signature(
-        example_fun,
-        parser_config=self.parser_config,
-        func_full_name='',
-        func_type=parser.FuncType.FUNCTION)
-    self.assertEqual(sig.arguments, [
-        'x', 'z', 'cls', '*args', 'a=True', 'b=&#x27;test&#x27;', 'y=None', 'c',
-        '**kwargs'
-    ])
-    self.assertEqual(sig.arguments_typehint_exists, False)
-    self.assertEqual(sig.return_typehint_exists, False)
-
-  def test_type_annotations(self):
-    # pylint: disable=unused-argument
-
-    class TestMethodSig:
-
-      def example_fun(self,
-                      x: List[str],
-                      z: int,
-                      a: Union[List[str], str, int] = None,
-                      b: str = 'test',
-                      *,
-                      y: bool = False,
-                      c: Callable[..., int],
-                      **kwargs) -> None:
-        pass
-
-    # pylint: enable=unused-argument
-
-    sig = parser.generate_signature(
-        TestMethodSig.example_fun,
-        parser_config=self.parser_config,
-        func_full_name='',
-        func_type=parser.FuncType.METHOD,
-    )
-    self.assertEqual(sig.arguments, [
-        'x: List[str]',
-        'z: int',
-        'a: Union[List[str], str, int] = None',
-        'b: str = &#x27;test&#x27;',
-        '*',
-        'y: bool = False',
-        'c: Callable[..., int]',
-        '**kwargs',
-    ])
-    self.assertEqual(sig.return_type, 'None')
-    self.assertEqual(sig.arguments_typehint_exists, True)
-    self.assertEqual(sig.return_typehint_exists, True)
-
-  def test_dataclasses_type_annotations(self):
-
-    sig = parser.generate_signature(
-        ExampleDataclass,
-        parser_config=self.parser_config,
-        func_full_name='',
-        func_type=parser.FuncType.FUNCTION)
-
-    self.assertEqual(sig.arguments, [
-        'x: List[str]',
-        'z: int',
-        'c: List[int] = &lt;factory&gt;',
-        'a: Union[List[str], str, int] = None',
-        'b: str = &#x27;test&#x27;',
-        'y: bool = False',
-    ])
-    self.assertEqual(sig.return_type, 'None')
-    self.assertEqual(sig.arguments_typehint_exists, True)
-
-  @parameterized.named_parameters(
-      ('deep_objects', Union[Dict[str, Dict[bool, parser.extract_decorators]],
-                             int, bool, parser.extract_decorators,
-                             List[Dict[int, parser.extract_decorators]]],
-       textwrap.dedent("""\
-        Union[
-            dict[str, dict[bool, <a href="../../../tfdocs/api_generator/parser/extract_decorators.md"><code>tfdocs.api_generator.parser.extract_decorators</code></a>]],
-            int,
-            bool,
-            <a href="../../../tfdocs/api_generator/parser/extract_decorators.md"><code>tfdocs.api_generator.parser.extract_decorators</code></a>,
-            list[dict[int, <a href="../../../tfdocs/api_generator/parser/extract_decorators.md"><code>tfdocs.api_generator.parser.extract_decorators</code></a>]]
-        ]""")), ('callable_ellipsis_sig', Union[Callable[..., int], str],
-                 textwrap.dedent("""\
-        Union[
-            Callable[..., int],
-            str
-        ]""")),
-      ('callable_args_sig', Union[Callable[[bool, parser.extract_decorators],
-                                           float], int],
-       textwrap.dedent("""\
-        Union[
-            Callable[[bool, <a href="../../../tfdocs/api_generator/parser/extract_decorators.md"><code>tfdocs.api_generator.parser.extract_decorators</code></a>], float],
-            int
-        ]""")),
-      ('callable_without_args', Union[None, dict, str, Callable],
-       textwrap.dedent("""\
-        Union[
-            NoneType,
-            dict,
-            str,
-            Callable
-        ]""")),
-  )  # pyformat: disable
-  def test_type_alias_signature(self, alias, expected_sig):
-    info_obj = parser.TypeAliasPageInfo(
-        full_name='tfdocs.api_generator.generate_lib.DocGenerator',
-        py_object=alias)
-    info_obj.collect_docs(self.parser_config)
-    if sys.version_info[:2] <= (3, 6):
-      # TypeAliasPageInfo.signature is built using the __origin__ attribute of
-      # type annotations. Before Python 3.7, __origin__ stored typing constructs
-      # (e.g., typing.List); in 3.7+, it stores the equivalent runtime class
-      # (e.g., builtins.list).
-      expected_sig = expected_sig.replace('dict[',
-                                          'Dict[').replace('list[', 'List[')
-      # For some reason, bool is missing from the deep_objects signature in 3.6.
-      expected_sig = expected_sig.replace('    bool,\n', '')
-    self.assertEqual(info_obj.signature, expected_sig)
-
-  def _setup_class_info(self, cls, method_name):
-    pc = self.parser_config
-    pc.tree['x.Cls'] = [method_name]
-    full_name = f'x.Cls.{method_name}'
-    pc.index[full_name] = getattr(cls, method_name)
-    pc.reference_resolver._duplicate_of[full_name] = full_name
-    pc.reference_resolver._is_fragment[full_name] = True
-    pc.reference_resolver._all_names.add(full_name)
-
-    info = parser.ClassPageInfo(full_name='x.Cls', py_object=cls)
-    info._doc = parser._DocstringInfo('doc', ['doc'], {})
-    info.collect_docs(self.parser_config)
-
-    return info
-
-  def test_signature_method_wrong_self_name(self):
-
-    # Calling these classes all `Cls` confuses inspect.getsource.
-    # Use unique names.
-    class Cls1:
-
-      def method(x):  # pylint: disable=no-self-argument
-        pass
-
-    info = self._setup_class_info(Cls1, 'method')
-    self.assertEqual('()', str(info.methods[0].signature))
-
-  def test_signature_method_star_args(self):
-
-    class Cls2:
-
-      def method(*args):  # pylint: disable=no-method-argument
-        pass
-
-    info = self._setup_class_info(Cls2, 'method')
-    self.assertEqual('(\n    *args\n)', str(info.methods[0].signature))
-
-  def test_signature_classmethod_wrong_cls_name(self):
-
-    class Cls3:
-
-      @classmethod
-      def method(x):  # pylint: disable=bad-classmethod-argument
-        pass
-
-    info = self._setup_class_info(Cls3, 'method')
-    self.assertEqual('()', str(info.methods[0].signature))
-
-  def test_signature_staticmethod(self):
-
-    class Cls4:
-
-      @staticmethod
-      def method(x):
-        pass
-
-    info = self._setup_class_info(Cls4, 'method')
-    self.assertEqual('(\n    x\n)', str(info.methods[0].signature))
-
-  def test_signature_new(self):
-
-    class Cls5:
-
-      def __new__(x):  # pylint: disable=bad-classmethod-argument
-        pass
-
-    info = self._setup_class_info(Cls5, '__new__')
-    self.assertEqual('()', str(info.methods[0].signature))
-
-  def test_signature_dataclass_auto_init(self):
+  def test_get_dataclass_docstring(self):
 
     @dataclasses.dataclass
-    class Cls6:
-      a: Optional[int]
-      b: Optional[str]
-
-    info = self._setup_class_info(Cls6, '__init__')
-    self.assertEqual('(\n    a: Optional[int],\n    b: Optional[str]\n)',
-                     str(info.methods[0].signature))
+    class MyClass():
+      """Docstring!"""
+      a: int
+      b: float
 
-  def test_signature_dataclass_custom_init(self):
+    self.assertEqual(parser._get_raw_docstring(MyClass), 'Docstring!')
 
-    @dataclasses.dataclass(init=False)
-    class Cls7:
-      a: Optional[int]
-      b: Optional[str]
+  def test_get_dataclass_docstring_no_autogen_docstring(self):
 
-      def __init__(self, x: Optional[Union[int, str]]):
-        self.a = int(x)
-        self.b = str(x)
-
-    info = self._setup_class_info(Cls7, '__init__')
-    self.assertEqual('(\n    x: Optional[Union[int, str]]\n)',
-                     str(info.methods[0].signature))
+    @dataclasses.dataclass
+    class MyClass():
+      a: int
+      b: float
 
+    self.assertEmpty(parser._get_raw_docstring(MyClass))
 
 if __name__ == '__main__':
   absltest.main()
diff --git a/tools/tensorflow_docs/api_generator/pretty_docs.py b/tools/tensorflow_docs/api_generator/pretty_docs.py
deleted file mode 100644
index 554645d3df4..00000000000
--- a/tools/tensorflow_docs/api_generator/pretty_docs.py
+++ /dev/null
@@ -1,812 +0,0 @@
-# Lint as: python3
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""A module for converting parsed doc content into markdown pages.
-
-The adjacent `parser` module creates `PageInfo` objects, containing all data
-necessary to document an element of the TensorFlow API.
-
-This module contains one public function, which handels the conversion of these
-`PageInfo` objects into a markdown string:
-
-    md_page = build_md_page(page_info)
-"""
-
-import textwrap
-
-from typing import Dict, List, Optional, NamedTuple, Tuple
-
-from tensorflow_docs.api_generator import doc_controls
-from tensorflow_docs.api_generator import parser
-
-_TABLE_ITEMS = ('arg', 'return', 'raise', 'attr', 'yield')
-
-
-def build_md_page(page_info: parser.PageInfo) -> str:
-  """Given a PageInfo object, return markdown for the page.
-
-  Args:
-    page_info: Must be a `parser.FunctionPageInfo`, `parser.ClassPageInfo`, or
-      `parser.ModulePageInfo`.
-
-  Returns:
-    Markdown for the page
-
-  Raises:
-    ValueError: if `page_info` is an instance of an unrecognized class
-  """
-  if isinstance(page_info, parser.ClassPageInfo):
-    return _build_class_page(page_info)
-
-  if isinstance(page_info, parser.FunctionPageInfo):
-    return _build_function_page(page_info)
-
-  if isinstance(page_info, parser.ModulePageInfo):
-    return _build_module_page(page_info)
-
-  if isinstance(page_info, parser.TypeAliasPageInfo):
-    return _build_type_alias_page(page_info)
-
-  raise ValueError(f'Unknown Page Info Type: {type(page_info)}')
-
-
-def _format_docstring(item,
-                      *,
-                      table_title_template: Optional[str] = None) -> str:
-  """Formats TitleBlock into a table or list or a normal string.
-
-  Args:
-    item: A TitleBlock instance or a normal string.
-    table_title_template: Template for title detailing how to display it in the
-      table.
-
-  Returns:
-    A formatted docstring.
-  """
-
-  if isinstance(item, parser.TitleBlock):
-    if item.title.lower().startswith(_TABLE_ITEMS):
-      return item.table_view(title_template=table_title_template)
-    else:
-      return str(item)
-  else:
-    return str(item)
-
-
-def _build_function_page(page_info: parser.FunctionPageInfo) -> str:
-  """Constructs a markdown page given a `FunctionPageInfo` object.
-
-  Args:
-    page_info: A `FunctionPageInfo` object containing information that's used to
-      create a function page.
-      For example, see https://www.tensorflow.org/api_docs/python/tf/concat
-
-  Returns:
-    The function markdown page.
-  """
-
-  parts = [f'# {page_info.full_name}\n\n']
-
-  parts.append('<!-- Insert buttons and diff -->\n')
-
-  parts.append(_top_source_link(page_info.defined_in))
-  parts.append('\n\n')
-
-  parts.append(page_info.doc.brief + '\n\n')
-
-  parts.append(_build_collapsable_aliases(page_info.aliases))
-
-  if page_info.signature is not None:
-    parts.append(_build_signature(page_info, obj_name=page_info.full_name))
-    parts.append('\n\n')
-
-  parts.append(_top_compat(page_info, h_level=2))
-
-  # This will be replaced by the "Used in: <notebooks>" whenever it is run.
-  parts.append('<!-- Placeholder for "Used in" -->\n')
-
-  for item in page_info.doc.docstring_parts:
-    parts.append(
-        _format_docstring(
-            item,
-            table_title_template='<h2 class="add-link">{title}</h2>'))
-
-  parts.append(_bottom_compat(page_info, h_level=2))
-
-  custom_content = doc_controls.get_custom_page_content(page_info.py_object)
-  if custom_content is not None:
-    parts.append(custom_content)
-
-  return ''.join(parts)
-
-
-def _build_type_alias_page(page_info: parser.TypeAliasPageInfo) -> str:
-  """Constructs a markdown page given a `TypeAliasPageInfo` object.
-
-  Args:
-    page_info: A `TypeAliasPageInfo` object containing information that's used
-      to create a type alias page.
-
-  Returns:
-    The type alias's markdown page.
-  """
-
-  parts = [f'# {page_info.full_name}\n\n']
-
-  parts.append('<!-- Insert buttons and diff -->\n')
-
-  parts.append('This symbol is a **type alias**.\n\n')
-  parts.append(page_info.doc.brief)
-  parts.append('\n\n')
-
-  if page_info.signature is not None:
-    parts.append('#### Source:\n\n')
-    parts.append(
-        _build_signature(
-            page_info, obj_name=page_info.short_name, type_alias=True))
-    parts.append('\n\n')
-
-  parts.append('<!-- Placeholder for "Used in" -->\n')
-
-  for item in page_info.doc.docstring_parts:
-    parts.append(
-        _format_docstring(
-            item,
-            table_title_template='<h2 class="add-link">{title}</h2>'))
-
-  return ''.join(parts)
-
-
-class Methods(NamedTuple):
-  info_dict: Dict[str, parser.MethodInfo]
-  constructor: parser.MethodInfo
-
-
-def split_methods(methods: List[parser.MethodInfo]) -> Methods:
-  """Splits the given methods list into constructors and the remaining methods.
-
-  If both `__init__` and `__new__` exist on the class, then prefer `__init__`
-  as the constructor over `__new__` to document.
-
-  Args:
-    methods: List of all the methods on the `ClassPageInfo` object.
-
-  Returns:
-    A `DocumentMethods` object containing a {method_name: method object}
-    dictionary and a constructor object.
-  """
-
-  # Create a method_name to methods object dictionary.
-  method_info_dict = {method.short_name: method for method in methods}
-
-  # Pop the constructors from the dictionary.
-  init_constructor = method_info_dict.pop('__init__', None)
-  new_constructor = method_info_dict.pop('__new__', None)
-
-  constructor = None
-  # Prefers `__init__` over `__new__` as the constructor to document.
-  if init_constructor is not None:
-    constructor = init_constructor
-  elif new_constructor is not None:
-    constructor = new_constructor
-
-  return Methods(info_dict=method_info_dict, constructor=constructor)
-
-
-def merge_blocks(class_page_info: parser.ClassPageInfo,
-                 ctor_info: parser.MethodInfo):
-  """Helper function to merge TitleBlock in constructor and class docstring."""
-
-  # Get the class docstring. `.doc.docstring_parts` contain the entire
-  # docstring except for the one-line docstring that's compulsory.
-  class_doc = list(class_page_info.doc.docstring_parts)
-
-  # If constructor doesn't exist, return the class docstring as it is.
-  if ctor_info is None:
-    return class_doc
-
-  # Get the constructor's docstring parts.
-  constructor_doc = ctor_info.doc.docstring_parts
-
-  # If `Args`/`Arguments` and `Raises` already exist in the class docstring,
-  # then record them and don't lift those sections from the constructor.
-  existing_items_in_class = []
-  for item in class_doc:
-    if isinstance(item, parser.TitleBlock):
-      title = item.title
-      if title.startswith(('Args', 'Arguments')):
-        title = 'Arg'
-      existing_items_in_class.append(title)
-
-  # Extract the `Arguments`/`Args` from the constructor's docstring.
-  # A constructor won't contain `Args` and `Arguments` section at once.
-  # It can contain either one of these so check for both.
-  for block in constructor_doc:
-    if isinstance(block, parser.TitleBlock):
-      # If the block doesn't exist in class docstring, then lift the block.
-      if (block.title.startswith(('Args', 'Arguments', 'Raises')) and
-          not block.title.startswith(tuple(existing_items_in_class))):
-        class_doc.append(block)
-  return class_doc
-
-
-def merge_class_and_constructor_docstring(
-    class_page_info: parser.ClassPageInfo,
-    ctor_info: parser.MethodInfo,
-) -> List[str]:
-  """Merges the class and the constructor docstrings.
-
-  While merging, the following rules are followed:
-
-  * Only `Arguments` and `Raises` blocks from constructor are uplifted to the
-    class docstring. Rest of the stuff is ignored since it doesn't add much
-    value and in some cases the information is repeated.
-
-  * The `Raises` block is added to the end of the classes docstring.
-
-  * The `Arguments` or `Args` block is inserted before the `Attributes` section.
-    If `Attributes` section does not exist in the class docstring then add it
-    to the end.
-
-  * If the constructor does not exist on the class, then the class docstring
-    is returned as it is.
-
-  Args:
-    class_page_info: Object containing information about the class.
-    ctor_info: Object containing information about the constructor of the class.
-
-  Returns:
-    A list of strings containing the merged docstring.
-  """
-
-  def _create_class_doc(doc):
-    updated_doc = []
-    for item in doc:
-      updated_doc.append(
-          _format_docstring(
-              item,
-              table_title_template='<h2 class="add-link">{title}</h2>'))
-    return updated_doc
-
-  class_doc = merge_blocks(class_page_info, ctor_info)
-
-  return _create_class_doc(class_doc)
-
-
-def _build_class_page(page_info: parser.ClassPageInfo) -> str:
-  """Constructs a markdown page given a `ClassPageInfo` object.
-
-  Args:
-    page_info: A `ClassPageInfo` object containing information that's used to
-      create a class page. For example, see
-      https://www.tensorflow.org/api_docs/python/tf/data/Dataset
-
-  Returns:
-    The class markdown page.
-  """
-
-  # Add the full_name of the symbol to the page.
-  parts = ['# {page_info.full_name}\n\n'.format(page_info=page_info)]
-
-  # This is used as a marker to initiate the diffing process later down in the
-  # pipeline.
-  parts.append('<!-- Insert buttons and diff -->\n')
-
-  # Add the github button.
-  parts.append(_top_source_link(page_info.defined_in))
-  parts.append('\n\n')
-
-  # Add the one line docstring of the class.
-  parts.append(page_info.doc.brief + '\n\n')
-
-  header = doc_controls.get_inheritable_header(page_info.py_object)
-  if header is not None:
-    parts.append(textwrap.dedent(header))
-    parts.append('\n\n')
-
-  # If a class is a child class, add which classes it inherits from.
-  if page_info.bases:
-    parts.append('Inherits From: ')
-
-    link_template = '[`{short_name}`]({url})'
-    parts.append(', '.join(
-        link_template.format(**base._asdict()) for base in page_info.bases))
-    parts.append('\n\n')
-
-  # Build the aliases section and keep it collapses by default.
-  parts.append(_build_collapsable_aliases(page_info.aliases))
-
-  # Split the methods into constructor and other methods.
-  methods = split_methods(page_info.methods)
-
-  # If the class has a constructor, build its signature.
-  # The signature will contain the class name followed by the arguments it
-  # takes.
-  if methods.constructor is not None:
-    parts.append(
-        _build_signature(methods.constructor, obj_name=page_info.full_name))
-    parts.append('\n\n')
-
-  parts.append(_top_compat(page_info, h_level=2))
-
-  # This will be replaced by the "Used in: <notebooks>" later in the pipeline.
-  parts.append('<!-- Placeholder for "Used in" -->\n')
-
-  # Merge the class and constructor docstring.
-  parts.extend(
-      merge_class_and_constructor_docstring(page_info, methods.constructor))
-
-  parts.append('\n\n')
-
-  custom_content = doc_controls.get_custom_page_content(page_info.py_object)
-  if custom_content is not None:
-    parts.append(custom_content)
-    return ''.join(parts)
-
-  if page_info.attr_block is not None:
-    parts.append(
-        _format_docstring(
-            page_info.attr_block,
-            table_title_template='<h2 class="add-link">{title}</h2>'))
-    parts.append('\n\n')
-
-  # If the class has child classes, add that information to the page.
-  if page_info.classes:
-    parts.append('## Child Classes\n')
-
-    link_template = ('[`class {class_info.short_name}`]'
-                     '({class_info.url})\n\n')
-    class_links = sorted(
-        link_template.format(class_info=class_info)
-        for class_info in page_info.classes)
-
-    parts.extend(class_links)
-
-  # If the class contains methods other than the constructor, then add them
-  # to the page.
-  if methods.info_dict:
-    parts.append('## Methods\n\n')
-    for method_name in sorted(methods.info_dict, key=_method_sort):
-      parts.append(_build_method_section(methods.info_dict[method_name]))
-    parts.append('\n\n')
-
-  # Add class variables/members if they exist to the page.
-  if page_info.other_members:
-    parts.append(
-        _other_members(
-            page_info.other_members,
-            title='<h2 class="add-link">Class Variables</h2>',
-        ))
-
-  # Add the compatibility section to the page.
-  parts.append(_bottom_compat(page_info, h_level=2))
-
-  return ''.join(parts)
-
-
-def _method_sort(method_name):
-  # All private methods will be at the end of the list in an alphabetically
-  # sorted order. All dunder methods will be above private methods and below
-  # public methods. Public methods will be at the top in an alphabetically
-  # sorted order.
-  if method_name.startswith('__'):
-    return (1, method_name)
-  if method_name.startswith('_'):
-    return (2, method_name)
-  return (-1, method_name)
-
-
-def _other_members(other_members: List[parser.MemberInfo], title: str):
-  """Returns "other_members" rendered to markdown.
-
-  `other_members` is used for anything that is not a class, function, module,
-  or method.
-
-  Args:
-    other_members: A list of `MemberInfo` objects.
-    title: Title of the table.
-
-  Returns:
-    A markdown string
-  """
-
-  items = []
-
-  for other_member in other_members:
-    description = [other_member.doc.brief]
-    for doc_part in other_member.doc.docstring_parts:
-      if isinstance(doc_part, parser.TitleBlock):
-        # Use list_view here because description will be part of a table.
-        description.append(str(doc_part))
-      else:
-        description.append(doc_part)
-
-    items.append(
-        parser.ITEMS_TEMPLATE.format(
-            name=other_member.short_name,
-            anchor=f'<a id="{other_member.short_name}"></a>',
-            description='\n'.join(description),
-        ))
-  return '\n' + parser.TABLE_TEMPLATE.format(
-      title=title, text='', items=''.join(items)) + '\n'
-
-
-def _build_method_section(method_info, heading_level=3):
-  """Generates a markdown section for a method.
-
-  Args:
-    method_info: A `MethodInfo` object.
-    heading_level: An Int, which HTML heading level to use.
-
-  Returns:
-    A markdown string.
-  """
-  parts = []
-  heading = ('<h{heading_level} id="{short_name}">'
-             '<code>{short_name}</code>'
-             '</h{heading_level}>\n\n')
-  parts.append(
-      heading.format(heading_level=heading_level, **method_info._asdict()))
-
-  if method_info.defined_in:
-    parts.append(_small_source_link(method_info.defined_in))
-
-  if method_info.signature is not None:
-    parts.append(_build_signature(method_info, obj_name=method_info.short_name))
-
-  parts.append(method_info.doc.brief + '\n')
-
-  parts.append(_top_compat(method_info, h_level=4))
-
-  for item in method_info.doc.docstring_parts:
-    parts.append(_format_docstring(item, table_title_template=None))
-
-  parts.append(_bottom_compat(method_info, h_level=4))
-
-  parts.append('\n\n')
-  return ''.join(parts)
-
-
-def _build_module_parts(module_parts: List[parser.MemberInfo],
-                        template: str) -> List[str]:
-  mod_str_parts = []
-  for item in module_parts:
-    mod_str_parts.append(template.format(**item._asdict()))
-    if item.doc.brief:
-      mod_str_parts.append(': ' + item.doc.brief)
-    mod_str_parts.append('\n\n')
-  return mod_str_parts
-
-
-def _build_module_page(page_info: parser.ModulePageInfo) -> str:
-  """Constructs a markdown page given a `ModulePageInfo` object.
-
-  Args:
-    page_info: A `ModulePageInfo` object containing information that's used to
-      create a module page.
-      For example, see https://www.tensorflow.org/api_docs/python/tf/data
-
-  Returns:
-    The module markdown page.
-  """
-
-  parts = [f'# Module: {page_info.full_name}\n\n']
-
-  parts.append('<!-- Insert buttons and diff -->\n')
-
-  parts.append(_top_source_link(page_info.defined_in))
-  parts.append('\n\n')
-
-  # First line of the docstring i.e. a brief introduction about the symbol.
-  parts.append(page_info.doc.brief + '\n\n')
-
-  parts.append(_build_collapsable_aliases(page_info.aliases))
-
-  parts.append(_top_compat(page_info, h_level=2))
-
-  # All lines in the docstring, expect the brief introduction.
-  for item in page_info.doc.docstring_parts:
-    parts.append(_format_docstring(item, table_title_template=None))
-
-  parts.append(_bottom_compat(page_info, h_level=2))
-
-  parts.append('\n\n')
-
-  custom_content = doc_controls.get_custom_page_content(page_info.py_object)
-  if custom_content is not None:
-    parts.append(custom_content)
-    return ''.join(parts)
-
-  if page_info.modules:
-    parts.append('## Modules\n\n')
-    parts.extend(
-        _build_module_parts(
-            module_parts=page_info.modules,
-            template='[`{short_name}`]({url}) module'))
-
-  if page_info.classes:
-    parts.append('## Classes\n\n')
-    parts.extend(
-        _build_module_parts(
-            module_parts=page_info.classes,
-            template='[`class {short_name}`]({url})'))
-
-  if page_info.functions:
-    parts.append('## Functions\n\n')
-    parts.extend(
-        _build_module_parts(
-            module_parts=page_info.functions,
-            template='[`{short_name}(...)`]({url})'))
-
-  if page_info.type_alias:
-    parts.append('## Type Aliases\n\n')
-    parts.extend(
-        _build_module_parts(
-            module_parts=page_info.type_alias,
-            template='[`{short_name}`]({url})'))
-
-  if page_info.other_members:
-    parts.append(
-        _other_members(
-            page_info.other_members,
-            title='<h2 class="add-link">Other Members</h2>',
-        ))
-
-  return ''.join(parts)
-
-
-DECORATOR_ALLOWLIST = {
-    'classmethod',
-    'staticmethod',
-    'tf_contextlib.contextmanager',
-    'contextlib.contextmanager',
-    'tf.function',
-    'types.method',
-    'abc.abstractmethod',
-}
-
-
-def _build_signature(obj_info: parser.PageInfo,
-                     obj_name: str,
-                     type_alias: bool = False) -> str:
-  """Returns a markdown code block containing the function signature.
-
-  Wraps the signature and limits it to 80 characters.
-
-  Args:
-    obj_info: Object containing information about the class/method/function for
-      which a signature will be created.
-    obj_name: The name to use to build the signature.
-    type_alias: If True, uses an `=` instead of `()` for the signature.
-      For example: `TensorLike = (Union[str, tf.Tensor, int])`. Defaults to
-        `False`.
-
-  Returns:
-    The signature of the object.
-  """
-
-  # Special case tf.range, since it has an optional first argument
-  if obj_info.full_name == 'tf.range':
-    return textwrap.dedent("""
-      ```python
-      tf.range(limit, delta=1, dtype=None, name='range')
-      tf.range(start, limit, delta=1, dtype=None, name='range')
-      ```
-      """)
-
-  full_signature = str(obj_info.signature)
-
-  parts = [
-      '<pre class="devsite-click-to-copy prettyprint lang-py '
-      'tfo-signature-link">'
-  ]
-
-  if hasattr(obj_info, 'decorators'):
-    parts.extend([
-        f'<code>@{dec}</code>' for dec in obj_info.decorators
-        if dec in DECORATOR_ALLOWLIST
-    ])
-
-  if type_alias:
-    parts.append(f'<code>{obj_name} = {full_signature}')
-  else:
-    parts.append(f'<code>{obj_name}{full_signature}')
-  parts.append('</code></pre>\n\n')
-
-  return '\n'.join(parts)
-
-
-def _split_compat_top_bottom(page_info) -> Tuple[Optional[str], Dict[str, str]]:
-  """Split the compatibility dict between the top and bottom sections."""
-  compat: Dict[str, str] = page_info.doc.compatibility
-  top_compat = None
-
-  if ('compat.v1' in page_info.full_name or 'estimator' in page_info.full_name):
-    bottom_compat = {}
-    for key, value in compat.items():
-      if key == 'TF2':
-        top_compat = value
-      else:
-        bottom_compat[key] = value
-  else:
-    bottom_compat = compat
-
-  return top_compat, bottom_compat
-
-
-_TOP_COMPAT_TEMPLATE = """
-
- <section><devsite-expandable expanded>
- <h{h_level} class="showalways">Migrate to TF2</h{h_level}>
-
-Caution: This API was designed for TensorFlow v1.
-Continue reading for details on how to migrate from this API to a native
-TensorFlow v2 equivalent. See the
-[TensorFlow v1 to TensorFlow v2 migration guide](https://www.tensorflow.org/guide/migrate)
-for instructions on how to migrate the rest of your code.
-
-{value}
-
- </aside></devsite-expandable></section>
-
-<h{h_level}>Description</h{h_level}>
-
-"""
-
-
-def _top_compat(page_info: parser.PageInfo, h_level: int) -> str:
-  """Add the top section compatibility blocks."""
-  compat, _ = _split_compat_top_bottom(page_info)
-  if compat:
-    value = textwrap.dedent(compat)
-    return _TOP_COMPAT_TEMPLATE.format(value=value, h_level=h_level)
-  else:
-    return ''
-
-
-_BOTTOM_COMPAT_TEMPLATE = """
-
- <section><devsite-expandable {expanded}>
- <h{h_level} class="showalways">{title}</h{h_level}>
-
-{value}
-
- </devsite-expandable></section>
-
-"""
-
-
-def _bottom_compat(page_info: parser.PageInfo, h_level: int) -> str:
-  """Add the bottom section compatibility blocks."""
-  _, compat = _split_compat_top_bottom(page_info)
-
-  def _tf2_key_tuple(key):
-    # False sorts before True.
-    return (key == 'TF2', key)
-
-  parts = []
-  for key in sorted(compat, key=_tf2_key_tuple):
-    value = textwrap.dedent(compat[key])
-    if key == 'TF2':
-      expanded = ''
-      title = 'Migrate to TF2'
-    else:
-      expanded = 'expanded'
-      title = key + ' compatibility'
-    parts.append(
-        _BOTTOM_COMPAT_TEMPLATE.format(
-            title=title, value=value, h_level=h_level, expanded=expanded))
-
-  return ''.join(parts)
-
-
-TABLE_HEADER = (
-    '<table class="tfo-notebook-buttons tfo-api nocontent" align="left">')
-
-_TABLE_TEMPLATE = textwrap.dedent("""
-    {table_header}
-    {table_content}
-    </table>
-
-    {table_footer}""")
-
-_TABLE_LINK_TEMPLATE = textwrap.dedent("""\
-    <td>
-      <a target="_blank" href="{url}">
-        <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
-        View source on GitHub
-      </a>
-    </td>""")
-
-
-def _top_source_link(location):
-  """Retrns a source link with Github image, like the notebook butons."""
-
-  table_content = ''
-  table_footer = ''
-
-  if location and location.url:
-    if 'github.com' not in location.url:
-      table_footer = _small_source_link(location)
-    else:
-      table_content = _TABLE_LINK_TEMPLATE.format(url=location.url)
-
-  table = _TABLE_TEMPLATE.format(
-      table_header=TABLE_HEADER,
-      table_content=table_content,
-      table_footer=table_footer)
-
-  return table
-
-
-def _small_source_link(location):
-  """Returns a small source link."""
-  template = '<a target="_blank" href="{url}">View source</a>\n\n'
-
-  if not location.url:
-    return ''
-
-  return template.format(url=location.url)
-
-
-def _build_collapsable_aliases(aliases: List[str]) -> str:
-  """Returns the top "Aliases" line."""
-
-  def join_aliases(aliases: List[str]) -> str:
-    return ', '.join('`{}`'.format(name) for name in aliases)
-
-  collapsable_template = textwrap.dedent("""\
-    <section class="expandable">
-      <h4 class="showalways">View aliases</h4>
-      <p>{content}</p>
-    </section>
-    """)
-
-  main_alias_template = textwrap.dedent("""
-    <b>Main aliases</b>
-    <p>{content}</p>
-    """)
-
-  compat_alias_template = textwrap.dedent("""
-    <b>Compat aliases for migration</b>
-    <p>See
-    <a href="https://www.tensorflow.org/guide/migrate">Migration guide</a> for
-    more details.</p>
-    <p>{content}</p>
-    """)
-
-  main_aliases = []
-  compat_aliases = []
-
-  for alias in aliases:
-    if '__' in alias:
-      continue
-    elif 'compat.v' in alias:
-      compat_aliases.append(alias)
-    else:
-      main_aliases.append(alias)
-
-  alias_content = ''
-  if main_aliases:
-    alias_content += main_alias_template.format(
-        content=join_aliases(main_aliases))
-  if compat_aliases:
-    alias_content += compat_alias_template.format(
-        content=join_aliases(compat_aliases))
-
-  if alias_content:
-    return collapsable_template.format(content=alias_content) + '\n'
-
-  return alias_content
diff --git a/tools/tensorflow_docs/api_generator/pretty_docs/__init__.py b/tools/tensorflow_docs/api_generator/pretty_docs/__init__.py
new file mode 100644
index 00000000000..b98e8d8ce8c
--- /dev/null
+++ b/tools/tensorflow_docs/api_generator/pretty_docs/__init__.py
@@ -0,0 +1,30 @@
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from tensorflow_docs.api_generator.pretty_docs.base_page import PageBuilder
+from tensorflow_docs.api_generator.pretty_docs.base_page import PageInfo
+from tensorflow_docs.api_generator.pretty_docs.base_page import TemplatePageBuilder
+
+from tensorflow_docs.api_generator.pretty_docs.class_page import ClassPageBuilder
+from tensorflow_docs.api_generator.pretty_docs.class_page import ClassPageInfo
+
+from tensorflow_docs.api_generator.pretty_docs.function_page import FunctionPageBuilder
+from tensorflow_docs.api_generator.pretty_docs.function_page import FunctionPageInfo
+
+from tensorflow_docs.api_generator.pretty_docs.module_page import ModulePageBuilder
+from tensorflow_docs.api_generator.pretty_docs.module_page import ModulePageInfo
+
+from tensorflow_docs.api_generator.pretty_docs.type_alias_page import TypeAliasPageBuilder
+from tensorflow_docs.api_generator.pretty_docs.type_alias_page import TypeAliasPageInfo
diff --git a/tools/tensorflow_docs/api_generator/pretty_docs/base_page.py b/tools/tensorflow_docs/api_generator/pretty_docs/base_page.py
new file mode 100644
index 00000000000..59c20006a5e
--- /dev/null
+++ b/tools/tensorflow_docs/api_generator/pretty_docs/base_page.py
@@ -0,0 +1,560 @@
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Base classes for page construction."""
+import abc
+import os
+import pathlib
+import posixpath
+import textwrap
+from typing import Any, ClassVar, Dict, List, NamedTuple, Optional, Sequence, Tuple, Type
+
+from tensorflow_docs.api_generator import config
+from tensorflow_docs.api_generator import doc_controls
+from tensorflow_docs.api_generator import parser
+from tensorflow_docs.api_generator import signature as signature_lib
+
+import jinja2
+
+
+class PageBuilder(abc.ABC):
+
+  def __init__(self, page_info):
+    self.page_info = page_info
+
+  @abc.abstractmethod
+  def build(self) -> str:
+    pass
+
+
+class TemplatePageBuilder(PageBuilder):
+  """A Page builder implemented on a jinja template."""
+
+  TEMPLATE = 'templates/page.jinja'
+  TEMPLATE_SEARCH_PATH = tuple([str(pathlib.Path(__file__).parent)])
+  JINJA_ENV = jinja2.Environment(
+      trim_blocks=True,
+      lstrip_blocks=True,
+      loader=jinja2.FileSystemLoader(TEMPLATE_SEARCH_PATH))
+
+  def build(self) -> str:
+    template = self.JINJA_ENV.get_template(self.TEMPLATE)
+    content = template.render(builder=self, page_info=self.page_info)
+    return content
+
+  def top_source_link(self):
+    return top_source_link(self.page_info.defined_in)
+
+  def build_collapsable_aliases(self):
+    return build_collapsable_aliases(sorted(self.page_info.aliases))
+
+  def top_compat(self):
+    return build_top_compat(self.page_info, h_level=2)
+
+  def bottom_compat(self):
+    return build_bottom_compat(self.page_info, h_level=2)
+
+  def format_docstring_part(self, part):
+    return str(part)
+
+  def get_devsite_headers(self):
+    """Returns the list of header lines for this page."""
+    hidden = doc_controls.should_hide_from_search(self.page_info.py_object)
+    brief_no_backticks = self.page_info.doc.brief.replace('`', '').strip()
+    headers = []
+    if brief_no_backticks:
+      headers.append(f'description: {brief_no_backticks}')
+
+    if self.page_info.search_hints and not hidden:
+      if headers:
+        headers.append('')
+      headers.append(self.page_info.get_metadata_html())
+    else:
+      headers.append('robots: noindex')
+      headers.append('')
+
+    result = '\n'.join(headers)
+    return result
+
+
+class PageInfo:
+  """Base-class for api_pages objects.
+
+  Converted to markdown by pretty_docs.py.
+
+  Attributes:
+    full_name: The full, main name, of the object being documented.
+    short_name: The last part of the full name.
+    py_object: The object being documented.
+    defined_in: A _FileLocation describing where the object was defined.
+    aliases: A list of full-name for all aliases for this object.
+    doc: A list of objects representing the docstring. These can all be
+      converted to markdown using str().
+    search_hints: If true include metadata search hints, else include a
+      "robots: noindex"
+    text: The resulting page text.
+    page_text: The cached result.
+  """
+  DEFAULT_BUILDER_CLASS: ClassVar[Type[PageBuilder]] = TemplatePageBuilder
+
+  def __init__(
+      self,
+      api_node,
+      extra_docs: Optional[Dict[int, str]] = None,
+      search_hints: bool = True,
+      parser_config=None,
+  ):
+    """Initialize a PageInfo.
+
+    Args:
+      full_name: The full, main name, of the object being documented.
+      py_object: The object being documented.
+      extra_docs: Extra docs for symbols like public constants(list, tuple, etc)
+        that need to be added to the markdown pages created.
+      search_hints: If true include metadata search hints, else include a
+        "robots: noindex"
+
+    """
+    self.api_node = api_node
+    self.full_name = api_node.full_name
+    self.py_object = api_node.py_object
+    self._extra_docs = extra_docs
+    self.search_hints = search_hints
+    self.parser_config = parser_config
+
+    self._defined_in = None
+    self._aliases = None
+    self._doc = None
+    self._page_text = None
+
+  def collect_docs(self):
+    """Collects additional information from the `config.ParserConfig`."""
+    pass
+
+  def docs_for_object(self):
+    relative_path = os.path.relpath(
+        path='.',
+        start=os.path.dirname(parser.documentation_path(self.full_name)) or '.')
+
+    # Convert from OS-specific path to URL/POSIX path.
+    relative_path = posixpath.join(*relative_path.split(os.path.sep))
+
+    with self.parser_config.reference_resolver.temp_prefix(relative_path):
+      self.set_doc(
+          parser.parse_md_docstring(
+              self.py_object,
+              self.full_name,
+              self.parser_config,
+              self._extra_docs,
+          ))
+
+      self.collect_docs()
+
+      aliases = ['.'.join(alias) for alias in self.api_node.aliases]
+      if self.full_name in aliases:
+        aliases.remove(self.full_name)
+      self.set_aliases(aliases)
+
+      self.set_defined_in(
+          parser.get_defined_in(self.py_object, self.parser_config))
+
+      self._page_text = self.build()
+
+    return self._page_text
+
+  def build(self) -> str:
+    """Builds the documentation."""
+    cls = self.DEFAULT_BUILDER_CLASS
+    return cls(self).build()
+
+  @property
+  def page_text(self):
+    if self._page_text is None:
+      self._page_text = self.build()
+    return self._page_text
+
+  def __eq__(self, other):
+    if isinstance(other, PageInfo):
+      return self.__dict__ == other.__dict__
+    else:
+      return NotImplemented
+
+  @property
+  def short_name(self):
+    """Returns the documented object's short name."""
+    return self.full_name.split('.')[-1]
+
+  @property
+  def defined_in(self):
+    """Returns the path to the file where the documented object is defined."""
+    return self._defined_in
+
+  def set_defined_in(self, defined_in):
+    """Sets the `defined_in` path."""
+    assert self.defined_in is None
+    self._defined_in = defined_in
+
+  @property
+  def self_link(self):
+    if not self.parser_config.self_link_base:
+      return None
+    rel_path = parser.documentation_path(self.full_name)
+    rel_path = rel_path[: -1 * len('.md')]  # strip suffix
+    return f'{self.parser_config.self_link_base}/{rel_path}'
+
+  @property
+  def aliases(self):
+    """Returns a list of all full names for the documented object."""
+    return self._aliases
+
+  def set_aliases(self, aliases):
+    """Sets the `aliases` list.
+
+    Args:
+      aliases: A list of strings. Containing all the object's full names.
+    """
+    assert self.aliases is None
+    self._aliases = aliases
+
+  @property
+  def doc(self) -> parser.DocstringInfo:
+    """Returns a `parser.DocstringInfo` created from the object's docstring."""
+    return self._doc
+
+  def set_doc(self, doc: parser.DocstringInfo):
+    """Sets the `doc` field.
+
+    Args:
+      doc: An instance of `parser.DocstringInfo`.
+    """
+    assert self.doc is None
+    self._doc = doc
+
+
+class MemberInfo(NamedTuple):
+  """Describes an attribute of a class or module."""
+  short_name: str
+  full_name: str
+  py_object: Any
+  doc: parser.DocstringInfo
+  url: str
+
+
+_ALWAYS_TABLE_ITEMS = ('arg', 'return', 'raise', 'attr', 'yield')
+
+
+def format_docstring(item,
+                     *,
+                     table_title_template: Optional[str] = None,
+                     anchors: bool = True) -> str:
+  """Formats a docstring part into a string.
+
+  Args:
+    item: A TitleBlock instance or a normal string.
+    table_title_template: Template for title detailing how to display it in the
+      table.
+
+  Returns:
+    A formatted docstring.
+  """
+
+  if isinstance(item, parser.TitleBlock):
+    if (item.items or  # A colon-list like under args
+        item.text.strip() or  # An indented block
+        item.title.lower().startswith(_ALWAYS_TABLE_ITEMS)):
+      return item.table_view(
+          title_template=table_title_template, anchors=anchors)
+    else:
+      return str(item)
+  else:
+    return str(item)
+
+
+def build_other_members(other_members: List[MemberInfo], title: str):
+  """Returns "other_members" rendered to markdown.
+
+  `other_members` is used for anything that is not a class, function, module,
+  or method.
+
+  Args:
+    other_members: A list of `base_page.MemberInfo` objects.
+    title: Title of the table.
+
+  Returns:
+    A markdown string
+  """
+
+  items = []
+
+  for other_member in other_members:
+    description = [other_member.doc.brief]
+    for doc_part in other_member.doc.docstring_parts:
+      if isinstance(doc_part, parser.TitleBlock):
+        # Use list_view here because description will be part of a table.
+        description.append(str(doc_part))
+      else:
+        description.append(doc_part)
+
+    items.append(
+        parser.ITEMS_TEMPLATE.format(
+            name=other_member.short_name,
+            anchor=f'<a id="{other_member.short_name}"></a>',
+            description='\n'.join(description),
+        ))
+  return '\n' + parser.TABLE_TEMPLATE.format(
+      title=title, text='', items=''.join(items))
+
+
+DECORATOR_ALLOWLIST = frozenset({
+    'classmethod',
+    'staticmethod',
+    'tf_contextlib.contextmanager',
+    'contextlib.contextmanager',
+    'tf.function',
+    'types.method',
+    'abc.abstractmethod',
+})
+
+
+def build_signature(name: str,
+                    signature: signature_lib.TfSignature,
+                    decorators: Optional[Sequence[str]],
+                    type_alias: bool = False) -> str:
+  """Returns a markdown code block containing the function signature.
+
+  Wraps the signature and limits it to 80 characters.
+
+  Args:
+    name: the name to put in the template.
+    signature: the signature object.
+    decorators: a list of decorators to apply.
+    type_alias: If True, uses an `=` instead of `()` for the signature.
+      For example: `TensorLike = (Union[str, tf.Tensor, int])`. Defaults to
+        `False`.
+
+  Returns:
+    The signature of the object.
+  """
+  if name == 'tf.range':
+    # Special case tf.range, since it has an optional first argument
+    return textwrap.dedent("""
+      ```python
+      tf.range(limit, delta=1, dtype=None, name='range')
+      tf.range(start, limit, delta=1, dtype=None, name='range')
+      ```
+      """)
+
+  full_signature = str(signature)
+
+  parts = [
+      '<pre class="devsite-click-to-copy prettyprint lang-py ' +
+      'tfo-signature-link">'
+  ]
+
+  if decorators:
+    parts.extend([
+        f'<code>@{dec}</code>' for dec in decorators
+        if dec in DECORATOR_ALLOWLIST
+    ])
+
+  if type_alias:
+    parts.append(f'<code>{name} = {full_signature}')
+  else:
+    parts.append(f'<code>{name}{full_signature}')
+  parts.append('</code></pre>\n\n')
+
+  return '\n'.join(parts)
+
+
+def _split_compat_top_bottom(page_info) -> Tuple[Optional[str], Dict[str, str]]:
+  """Split the compatibility dict between the top and bottom sections."""
+  compat: Dict[str, str] = page_info.doc.compatibility
+  top_compat = None
+
+  if ('compat.v1' in page_info.full_name or 'estimator' in page_info.full_name):
+    bottom_compat = {}
+    for key, value in compat.items():
+      if key == 'TF2':
+        top_compat = value
+      else:
+        bottom_compat[key] = value
+  else:
+    bottom_compat = compat
+
+  return top_compat, bottom_compat
+
+
+_TOP_COMPAT_TEMPLATE = """
+
+ <section><devsite-expandable expanded>
+ <h{h_level} class="showalways">Migrate to TF2</h{h_level}>
+
+Caution: This API was designed for TensorFlow v1.
+Continue reading for details on how to migrate from this API to a native
+TensorFlow v2 equivalent. See the
+[TensorFlow v1 to TensorFlow v2 migration guide](https://www.tensorflow.org/guide/migrate)
+for instructions on how to migrate the rest of your code.
+
+{value}
+
+ </aside></devsite-expandable></section>
+
+<h{h_level}>Description</h{h_level}>
+
+"""
+
+
+def build_top_compat(page_info: PageInfo, h_level: int) -> str:
+  """Add the top section compatibility blocks."""
+  compat, _ = _split_compat_top_bottom(page_info)
+  if compat:
+    value = textwrap.dedent(compat)
+    return _TOP_COMPAT_TEMPLATE.format(value=value, h_level=h_level)
+  else:
+    return ''
+
+
+_BOTTOM_COMPAT_TEMPLATE = """
+
+ <section><devsite-expandable {expanded}>
+ <h{h_level} class="showalways">{title}</h{h_level}>
+
+{value}
+
+ </devsite-expandable></section>
+
+"""
+
+
+def build_bottom_compat(page_info: PageInfo, h_level: int) -> str:
+  """Add the bottom section compatibility blocks."""
+  _, compat = _split_compat_top_bottom(page_info)
+
+  def _tf2_key_tuple(key):
+    # False sorts before True.
+    return (key == 'TF2', key)
+
+  parts = []
+  for key in sorted(compat, key=_tf2_key_tuple):
+    value = textwrap.dedent(compat[key])
+    if key == 'TF2':
+      expanded = ''
+      title = 'Migrate to TF2'
+    else:
+      expanded = 'expanded'
+      title = key + ' compatibility'
+    parts.append(
+        _BOTTOM_COMPAT_TEMPLATE.format(
+            title=title, value=value, h_level=h_level, expanded=expanded))
+
+  return ''.join(parts)
+
+
+TABLE_HEADER = (
+    '<table class="tfo-notebook-buttons tfo-api nocontent" align="left">')
+
+_TABLE_TEMPLATE = textwrap.dedent("""
+    {table_header}
+    {table_content}
+    </table>
+
+    {table_footer}""")
+
+_TABLE_LINK_TEMPLATE = textwrap.dedent("""\
+    <td>
+      <a target="_blank" href="{url}">
+        <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
+        View source on GitHub
+      </a>
+    </td>""")
+
+
+def top_source_link(location):
+  """Returns a source link with Github image, like the notebook butons."""
+
+  table_content = ''
+  table_footer = ''
+
+  if location and location.url:
+    if 'github.com' not in location.url:
+      table_footer = small_source_link(location)
+    else:
+      table_content = _TABLE_LINK_TEMPLATE.format(url=location.url)
+
+  table = _TABLE_TEMPLATE.format(
+      table_header=TABLE_HEADER,
+      table_content=table_content,
+      table_footer=table_footer)
+
+  return table
+
+
+def small_source_link(location, text='View source'):
+  """Returns a small source link."""
+  if location.url:
+    return ('<a target="_blank" class="external" '
+            f'href="{location.url}">{text}</a>\n\n')
+  else:
+    return ''
+
+
+def build_collapsable_aliases(aliases: List[str]) -> str:
+  """Returns the top "Aliases" line."""
+
+  def join_aliases(aliases: List[str]) -> str:
+    return ', '.join('`{}`'.format(name) for name in aliases)
+
+  collapsable_template = textwrap.dedent("""\
+    <section class="expandable">
+      <h4 class="showalways">View aliases</h4>
+      <p>{content}</p>
+    </section>
+    """)
+
+  main_alias_template = textwrap.dedent("""
+    <b>Main aliases</b>
+    <p>{content}</p>
+    """)
+
+  compat_alias_template = textwrap.dedent("""
+    <b>Compat aliases for migration</b>
+    <p>See
+    <a href="https://www.tensorflow.org/guide/migrate">Migration guide</a> for
+    more details.</p>
+    <p>{content}</p>
+    """)
+
+  main_aliases = []
+  compat_aliases = []
+
+  for alias in aliases:
+    if '__' in alias:
+      continue
+    elif 'compat.v' in alias:
+      compat_aliases.append(alias)
+    else:
+      main_aliases.append(alias)
+
+  alias_content = ''
+  if main_aliases:
+    alias_content += main_alias_template.format(
+        content=join_aliases(main_aliases))
+  if compat_aliases:
+    alias_content += compat_alias_template.format(
+        content=join_aliases(compat_aliases))
+
+  if alias_content:
+    return collapsable_template.format(content=alias_content) + '\n'
+
+  return alias_content
diff --git a/tools/tensorflow_docs/api_generator/pretty_docs/class_page.py b/tools/tensorflow_docs/api_generator/pretty_docs/class_page.py
new file mode 100644
index 00000000000..b14bfcee606
--- /dev/null
+++ b/tools/tensorflow_docs/api_generator/pretty_docs/class_page.py
@@ -0,0 +1,664 @@
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Page builder classes for class pages."""
+import collections
+import dataclasses
+import itertools
+import re
+import textwrap
+from typing import Any, Dict, List, NamedTuple, Optional
+
+from tensorflow_docs.api_generator import config
+from tensorflow_docs.api_generator import doc_controls
+from tensorflow_docs.api_generator import obj_type as obj_type_lib
+from tensorflow_docs.api_generator import parser
+from tensorflow_docs.api_generator import signature as signature_lib
+from tensorflow_docs.api_generator.pretty_docs import base_page
+
+from google.protobuf.message import Message as ProtoMessage
+
+
+class ClassPageBuilder(base_page.TemplatePageBuilder):
+  """Builds a markdown page from a `ClassPageInfo` instance."""
+  TEMPLATE = 'templates/class.jinja'
+
+  def __init__(self, page_info):
+    super().__init__(page_info)
+    # Split the methods into constructor and other methods.
+    self.methods = split_methods(page_info.methods)
+
+  def build_bases(self):
+    page_info = self.page_info
+    # If a class is a child class, add which classes it inherits from.
+    parts = []
+    if self.page_info.bases:
+      parts.append('\nInherits From: ')
+
+      link_template = '[`{short_name}`]({url})'
+      parts.append(', '.join(
+          link_template.format(**base._asdict()) for base in page_info.bases))
+      parts.append('\n')
+
+    return ''.join(parts)
+
+  def build_constructor(self):
+    page_info = self.page_info
+
+    # If the class has a constructor, build its signature.
+    # The signature will contain the class name followed by the arguments it
+    # takes.
+    parts = []
+    if self.methods.constructor is not None:
+      parts.append(
+          base_page.build_signature(
+              name=page_info.full_name,
+              signature=self.methods.constructor.signature,
+              decorators=self.methods.constructor.decorators))
+      parts.append('\n\n')
+
+    return ''.join(parts)
+
+  def build_class_docstring(self):
+
+    parts = merge_class_and_constructor_docstring(self.page_info,
+                                                  self.methods.constructor)
+
+    parts.append('\n\n')
+
+    return ''.join(parts)
+
+  def build_attr_block(self):
+    parts = []
+    if self.page_info.attr_block is not None:
+      parts.append(
+          base_page.format_docstring(
+              self.page_info.attr_block,
+              table_title_template='<h2 class="add-link">{title}</h2>'))
+      parts.append('\n\n')
+    return ''.join(parts)
+
+  def build_method_section(self, method):
+    return _build_method_section(method)
+
+  def build_other_member_section(self):
+    if self.page_info.other_members:
+      return base_page.build_other_members(
+          self.page_info.other_members,
+          title='<h2 class="add-link">Class Variables</h2>',
+      )
+    else:
+      return ''
+
+
+class ClassPageInfo(base_page.PageInfo):
+  """Collects docs for a class page.
+
+  Attributes:
+    full_name: The full, main name, of the object being documented.
+    short_name: The last part of the full name.
+    py_object: The object being documented.
+    defined_in: A _FileLocation describing where the object was defined.
+    aliases: A list of full-name for all aliases for this object.
+    doc: A list of objects representing the docstring. These can all be
+      converted to markdown using str().
+    attributes: A dict mapping from "name" to a docstring
+    bases: A list of `base_page.MemberInfo` objects pointing to the docs for the
+      parent classes.
+    methods: A list of `MethodInfo` objects documenting the class' methods.
+    classes: A list of `base_page.MemberInfo` objects pointing to docs for any
+      nested classes.
+    other_members: A list of `base_page.MemberInfo` objects documenting any
+      other object's defined inside the class object (mostly enum style fields).
+    attr_block: A `TitleBlock` containing information about the Attributes of
+      the class.
+    inheritable_header: A header that may be placed on a base-class.
+  """
+  DEFAULT_BUILDER_CLASS = ClassPageBuilder
+
+  def __init__(self, *, api_node, **kwargs):
+    """Initialize a ClassPageInfo.
+
+    Args:
+      full_name: The full, main name, of the object being documented.
+      py_object: The object being documented.
+      **kwargs: Extra arguments.
+    """
+    super().__init__(api_node, **kwargs)
+
+    self._namedtuplefields = collections.OrderedDict()
+    if issubclass(api_node.py_object, tuple):
+      namedtuple_attrs = ('_asdict', '_fields', '_make', '_replace')
+      if all(hasattr(api_node.py_object, attr) for attr in namedtuple_attrs):
+        for name in api_node.py_object._fields:
+          self._namedtuplefields[name] = None
+
+    self._properties = collections.OrderedDict()
+    self._bases = None
+    self._methods = []
+    self._classes = []
+    self._other_members = []
+    self.attr_block = None
+
+  @property
+  def bases(self):
+    """Returns a list of `base_page.MemberInfo` objects pointing to the class' parents."""
+    return self._bases
+
+  @property
+  def inheritable_header(self) -> Optional[str]:
+    header = doc_controls.get_inheritable_header(self.py_object)
+    if header is not None:
+      header = textwrap.dedent(header)
+    return header
+
+  def set_attr_block(self, attr_block):
+    assert self.attr_block is None
+    self.attr_block = attr_block
+
+  def _set_bases(self):
+    """Builds the `bases` attribute, to document this class' parent-classes.
+
+    This method sets the `bases` to a list of `base_page.MemberInfo` objects
+    point to the
+    doc pages for the class' parents.
+    """
+    bases = []
+    for base in self.py_object.__mro__[1:]:
+      base_api_node = self.parser_config.api_tree.node_for_object(base)
+      if base_api_node is None:
+        continue
+      base_full_name = base_api_node.full_name
+      base_doc = parser.parse_md_docstring(base, base_full_name,
+                                           self.parser_config, self._extra_docs)
+      base_url = self.parser_config.reference_resolver.reference_to_url(
+          base_full_name)
+
+      link_info = base_page.MemberInfo(
+          short_name=base_full_name.split('.')[-1],
+          full_name=base_full_name,
+          py_object=base,
+          doc=base_doc,
+          url=base_url)
+      bases.append(link_info)
+
+    self._bases = bases
+
+  def _add_property(self, member_info: base_page.MemberInfo):
+    """Adds an entry to the `properties` list.
+
+    Args:
+      member_info: a `base_page.MemberInfo` describing the property.
+    """
+    doc = member_info.doc
+    # Clarify the default namedtuple docs-strings.
+    if re.match('Alias for field number [0-9]+', doc.brief):
+      new_brief = f'A `namedtuple` {doc.brief.lower()}'
+      doc = doc._replace(docstring_parts=[], brief=new_brief)
+
+    new_parts = [doc.brief]
+    # Strip args/returns/raises from property
+    new_parts.extend([
+        str(part)
+        for part in doc.docstring_parts
+        if not isinstance(part, parser.TitleBlock)
+    ])
+    new_parts.append('')
+    desc = '\n'.join(new_parts)
+
+    if member_info.short_name in self._namedtuplefields:
+      self._namedtuplefields[member_info.short_name] = desc
+    else:
+      self._properties[member_info.short_name] = desc
+
+  @property
+  def methods(self):
+    """Returns a list of `MethodInfo` describing the class' methods."""
+    return self._methods
+
+  def _add_method(
+      self,
+      member_info: base_page.MemberInfo,
+      defining_class: Optional[type],  # pylint: disable=g-bare-generic
+  ) -> None:
+    """Adds a `MethodInfo` entry to the `methods` list.
+
+    Args:
+      member_info: a `base_page.MemberInfo` describing the method.
+      defining_class: The `type` object where this method is defined.
+    """
+    if defining_class is None:
+      return
+
+    # Omit methods defined by namedtuple.
+    original_method = defining_class.__dict__[member_info.short_name]
+    if (hasattr(original_method, '__module__') and
+        (original_method.__module__ or '').startswith('namedtuple')):
+      return
+
+    # Some methods are often overridden without documentation. Because it's
+    # obvious what they do, don't include them in the docs if there's no
+    # docstring.
+    if (not member_info.doc.brief.strip() and
+        member_info.short_name in ['__del__', '__copy__']):
+      return
+
+    # If the current class py_object is a dataclass then use the class object
+    # instead of the __init__ method object because __init__ is a
+    # generated method on dataclasses (unless the definition used init=False)
+    # and `api_generator.get_source.get_source` doesn't work on generated
+    # methods (as the source file doesn't exist) which is required for
+    # signature generation.
+    if (dataclasses.is_dataclass(self.py_object) and
+        member_info.short_name == '__init__' and
+        self.py_object.__dataclass_params__.init):
+      is_dataclass = True
+      py_obj = self.py_object
+    else:
+      is_dataclass = False
+      py_obj = member_info.py_object
+
+    func_type = signature_lib.get_method_type(original_method,
+                                              member_info.short_name,
+                                              is_dataclass)
+    signature = signature_lib.generate_signature(
+        py_obj, self.parser_config, func_type=func_type)
+
+    decorators = signature_lib.extract_decorators(member_info.py_object)
+
+    defined_in = parser.get_defined_in(member_info.py_object,
+                                       self.parser_config)
+
+    method_info = MethodInfo.from_member_info(member_info, signature,
+                                              decorators, defined_in)
+    self._methods.append(method_info)
+
+  @property
+  def classes(self):
+    """Returns a list of `base_page.MemberInfo` pointing to any nested classes."""
+    return sorted(self._classes, key=lambda x: x.short_name)
+
+  def get_metadata_html(self) -> str:
+    meta_data = parser.Metadata(self.full_name)
+    for item in itertools.chain(self.classes, self.methods, self.other_members):
+      meta_data.append(item)
+
+    return meta_data.build_html()
+
+  def _add_class(self, member_info):
+    """Adds a `base_page.MemberInfo` for a nested class to `classes` list.
+
+    Args:
+      member_info: a `base_page.MemberInfo` describing the class.
+    """
+    self._classes.append(member_info)
+
+  @property
+  def other_members(self):
+    """Returns a list of `base_page.MemberInfo` describing any other contents."""
+    return self._other_members
+
+  def _add_other_member(self, member_info: base_page.MemberInfo):
+    """Adds an `base_page.MemberInfo` entry to the `other_members` list.
+
+    Args:
+      member_info: a `base_page.MemberInfo` describing the object.
+    """
+    self.other_members.append(member_info)
+
+  def _add_member(
+      self,
+      member_info: base_page.MemberInfo,
+      defining_class: Optional[type],  # pylint: disable=g-bare-generic
+  ) -> None:
+    """Adds a member to the class page."""
+    obj_type = obj_type_lib.ObjType.get(member_info.py_object)
+
+    if obj_type is obj_type_lib.ObjType.PROPERTY:
+      self._add_property(member_info)
+    elif obj_type is obj_type_lib.ObjType.CLASS:
+      if defining_class is None:
+        return
+      self._add_class(member_info)
+    elif obj_type is obj_type_lib.ObjType.CALLABLE:
+      self._add_method(member_info, defining_class)
+    elif obj_type is obj_type_lib.ObjType.OTHER:
+      # Exclude members defined by protobuf that are useless
+      if issubclass(self.py_object, ProtoMessage):
+        if (member_info.short_name.endswith('_FIELD_NUMBER') or
+            member_info.short_name in ['__slots__', 'DESCRIPTOR']):
+          return
+
+      self._add_other_member(member_info)
+
+  def collect_docs(self):
+    """Collects information necessary specifically for a class's doc page.
+
+    Mainly, this is details about the class's members.
+    """
+    py_class = self.py_object
+
+    self._set_bases()
+
+    class_path_node = self.parser_config.path_tree[self.api_node.path]
+    for _, path_node in sorted(class_path_node.children.items()):
+      # TODO(b/284321463): This should go in the `traverse` function.
+      # Don't document anything that is defined in common builtin types.
+      defining_class = parser.get_defining_class(py_class, path_node.short_name)
+      if defining_class in [
+          object,
+          type,
+          tuple,
+          dict,
+          list,
+          BaseException,
+          Exception,
+      ]:
+        continue
+
+      # The following condition excludes most protobuf-defined symbols.
+      if (defining_class and
+          defining_class.__name__ in ['CMessage', 'Message', 'MessageMeta']):
+        continue
+
+      if doc_controls.should_skip_class_attr(py_class, path_node.short_name):
+        continue
+
+      child_doc = parser.parse_md_docstring(path_node.py_object, self.full_name,
+                                            self.parser_config,
+                                            self._extra_docs)
+
+      child_url = self.parser_config.reference_resolver.reference_to_url(
+          path_node.full_name)
+
+      member_info = base_page.MemberInfo(path_node.short_name,
+                                         path_node.full_name,
+                                         path_node.py_object, child_doc,
+                                         child_url)
+      self._add_member(member_info, defining_class)
+
+    self.set_attr_block(self._augment_attributes(self.doc.docstring_parts))
+
+  def _augment_attributes(
+      self, docstring_parts: List[Any]) -> Optional[parser.TitleBlock]:
+    """Augments and deletes the "Attr" block of the docstring.
+
+    The augmented block is returned and then added to the markdown page by
+    pretty_docs.py. The existing Attribute block is deleted from the docstring.
+
+    Merges `namedtuple` fields and properties into the attrs block.
+
+    + `namedtuple` fields first, in order.
+    + Then the docstring `Attr:` block.
+    + Then any `properties` or `dataclass` fields not mentioned above.
+
+    Args:
+      docstring_parts: A list of docstring parts.
+
+    Returns:
+      Augmented "Attr" block.
+    """
+    attribute_block = None
+
+    for attr_block_index, part in enumerate(docstring_parts):
+      if isinstance(part, parser.TitleBlock) and part.title.startswith('Attr'):
+        raw_attrs = collections.OrderedDict(part.items)
+        old_block = part
+        break
+    else:
+      # Didn't find the attributes block, there may still be attributes so
+      # add a placeholder for them at the end.
+      raw_attrs = collections.OrderedDict()
+      old_block = None
+      attr_block_index = len(docstring_parts)
+      docstring_parts.append(None)
+
+    attrs = collections.OrderedDict()
+    # namedtuple fields first, in order.
+    for name, desc in self._namedtuplefields.items():
+      # If a namedtuple field has been filtered out, it's description will
+      # not have been set in loop in `collect_docs`, so skip fields with `None`
+      # as the description.
+      if desc is not None:
+        attrs[name] = desc
+    # the contents of the `Attrs:` block from the docstring
+    attrs.update(raw_attrs)
+
+    # properties and dataclass fields last.
+    for name, desc in self._properties.items():
+      # Don't overwrite existing items
+      attrs.setdefault(name, desc)
+
+    if dataclasses.is_dataclass(self.py_object):
+      for name, desc in self._dataclass_fields().items():
+        # Don't overwrite existing items
+        attrs.setdefault(name, desc)
+
+    if attrs:
+      if old_block is not None:
+        text = old_block.text
+      else:
+        text = ''
+      attribute_block = parser.TitleBlock(
+          title='Attributes', text=text, items=list(attrs.items())
+      )
+
+    # Delete the Attrs block if it exists or delete the placeholder.
+    del docstring_parts[attr_block_index]
+
+    return attribute_block
+
+  def _dataclass_fields(self):
+    fields = {
+        name: 'Dataclass field'
+        for name in self.py_object.__dataclass_fields__.keys()
+        if not name.startswith('_')
+    }
+
+    return fields
+
+
+class MethodInfo(NamedTuple):
+  """Described a method."""
+  short_name: str
+  full_name: str
+  py_object: Any
+  doc: parser.DocstringInfo
+  url: str
+  signature: signature_lib.TfSignature
+  decorators: List[str]
+  defined_in: Optional[parser.FileLocation]
+
+  @classmethod
+  def from_member_info(cls, method_info: base_page.MemberInfo,
+                       signature: signature_lib.TfSignature,
+                       decorators: List[str],
+                       defined_in: Optional[parser.FileLocation]):
+    """Upgrades a `base_page.MemberInfo` to a `MethodInfo`."""
+    return cls(
+        **method_info._asdict(),
+        signature=signature,
+        decorators=decorators,
+        defined_in=defined_in)
+
+
+class Methods(NamedTuple):
+  info_dict: Dict[str, MethodInfo]
+  constructor: MethodInfo
+
+
+def split_methods(methods: List[MethodInfo]) -> Methods:
+  """Splits the given methods list into constructors and the remaining methods.
+
+  If both `__init__` and `__new__` exist on the class, then prefer `__init__`
+  as the constructor over `__new__` to document.
+
+  Args:
+    methods: List of all the methods on the `ClassPageInfo` object.
+
+  Returns:
+    A `DocumentMethods` object containing a {method_name: method object}
+    dictionary and a constructor object.
+  """
+
+  # Create a method_name to methods object dictionary.
+  methods = sorted(methods, key=_method_sort)
+  method_info_dict = {method.short_name: method for method in methods}
+
+  # Pop the constructors from the dictionary.
+  init_constructor = method_info_dict.pop('__init__', None)
+  new_constructor = method_info_dict.pop('__new__', None)
+
+  constructor = None
+  # Prefers `__init__` over `__new__` as the constructor to document.
+  if init_constructor is not None:
+    constructor = init_constructor
+  elif new_constructor is not None:
+    constructor = new_constructor
+
+  return Methods(info_dict=method_info_dict, constructor=constructor)
+
+
+def merge_blocks(class_page_info: ClassPageInfo, ctor_info: MethodInfo):
+  """Helper function to merge TitleBlock in constructor and class docstring."""
+
+  # Get the class docstring. `.doc.docstring_parts` contain the entire
+  # docstring except for the one-line docstring that's compulsory.
+  class_doc = list(class_page_info.doc.docstring_parts)
+
+  # If constructor doesn't exist, return the class docstring as it is.
+  if ctor_info is None:
+    return class_doc
+
+  # Get the constructor's docstring parts.
+  constructor_doc = ctor_info.doc.docstring_parts
+
+  # If `Args`/`Arguments` and `Raises` already exist in the class docstring,
+  # then record them and don't lift those sections from the constructor.
+  existing_items_in_class = []
+  for item in class_doc:
+    if isinstance(item, parser.TitleBlock):
+      title = item.title
+      if title.startswith(('Args', 'Arguments')):
+        title = 'Arg'
+      existing_items_in_class.append(title)
+
+  # Extract the `Arguments`/`Args` from the constructor's docstring.
+  # A constructor won't contain `Args` and `Arguments` section at once.
+  # It can contain either one of these so check for both.
+  for block in constructor_doc:
+    if isinstance(block, parser.TitleBlock):
+      # If the block doesn't exist in class docstring, then lift the block.
+      if (block.title.startswith(('Args', 'Arguments', 'Raises')) and
+          not block.title.startswith(tuple(existing_items_in_class))):
+        class_doc.append(block)
+  return class_doc
+
+
+def merge_class_and_constructor_docstring(
+    class_page_info: ClassPageInfo,
+    ctor_info: MethodInfo,
+) -> List[str]:
+  """Merges the class and the constructor docstrings.
+
+  While merging, the following rules are followed:
+
+  * Only `Arguments` and `Raises` blocks from constructor are uplifted to the
+    class docstring. Rest of the stuff is ignored since it doesn't add much
+    value and in some cases the information is repeated.
+
+  * The `Raises` block is added to the end of the classes docstring.
+
+  * The `Arguments` or `Args` block is inserted before the `Attributes` section.
+    If `Attributes` section does not exist in the class docstring then add it
+    to the end.
+
+  * If the constructor does not exist on the class, then the class docstring
+    is returned as it is.
+
+  Args:
+    class_page_info: Object containing information about the class.
+    ctor_info: Object containing information about the constructor of the class.
+
+  Returns:
+    A list of strings containing the merged docstring.
+  """
+
+  def _create_class_doc(doc):
+    updated_doc = []
+    for item in doc:
+      updated_doc.append(
+          base_page.format_docstring(
+              item, table_title_template='<h2 class="add-link">{title}</h2>'))
+    return updated_doc
+
+  class_doc = merge_blocks(class_page_info, ctor_info)
+
+  return _create_class_doc(class_doc)
+
+
+def _method_sort(method):
+  """Create a sort-key tuple for a method based on its name."""
+  # All private methods will be at the end of the list in an alphabetically
+  # sorted order. All dunder methods will be above private methods and below
+  # public methods. Public methods will be at the top in an alphabetically
+  # sorted order.
+  method_name = method.short_name
+  if method_name.startswith('__'):
+    return (1, method_name)
+  if method_name.startswith('_'):
+    return (2, method_name)
+  return (-1, method_name)
+
+
+def _build_method_section(method_info, heading_level=3):
+  """Generates a markdown section for a method.
+
+  Args:
+    method_info: A `MethodInfo` object.
+    heading_level: An Int, which HTML heading level to use.
+
+  Returns:
+    A markdown string.
+  """
+  parts = []
+  heading = ('<h{heading_level} id="{short_name}">'
+             '<code>{short_name}</code>'
+             '</h{heading_level}>\n\n')
+  parts.append(
+      heading.format(heading_level=heading_level, **method_info._asdict()))
+
+  if method_info.defined_in:
+    parts.append(base_page.small_source_link(method_info.defined_in))
+
+  if method_info.signature is not None:
+    parts.append(
+        base_page.build_signature(
+            name=method_info.short_name,
+            signature=method_info.signature,
+            decorators=method_info.decorators))
+
+  parts.append(method_info.doc.brief + '\n')
+
+  parts.append(base_page.build_top_compat(method_info, h_level=4))
+
+  for item in method_info.doc.docstring_parts:
+    parts.append(
+        base_page.format_docstring(
+            item, table_title_template=None, anchors=False))
+
+  parts.append(base_page.build_bottom_compat(method_info, h_level=4))
+
+  parts.append('\n\n')
+  return ''.join(parts)
diff --git a/tools/tensorflow_docs/api_generator/pretty_docs/docs_for_object.py b/tools/tensorflow_docs/api_generator/pretty_docs/docs_for_object.py
new file mode 100644
index 00000000000..91f5af47292
--- /dev/null
+++ b/tools/tensorflow_docs/api_generator/pretty_docs/docs_for_object.py
@@ -0,0 +1,85 @@
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Create a `pretty_docs.base_page.PageInfo` from a python object."""
+from typing import Any, Dict, Optional, Type
+
+from tensorflow_docs.api_generator import config
+from tensorflow_docs.api_generator import doc_controls
+from tensorflow_docs.api_generator import doc_generator_visitor
+from tensorflow_docs.api_generator import obj_type as obj_type_lib
+from tensorflow_docs.api_generator.pretty_docs import base_page
+from tensorflow_docs.api_generator.pretty_docs import class_page
+from tensorflow_docs.api_generator.pretty_docs import function_page
+from tensorflow_docs.api_generator.pretty_docs import module_page
+from tensorflow_docs.api_generator.pretty_docs import type_alias_page
+
+_DEFAULT_PAGE_BUILDER_CLASSES = {
+    obj_type_lib.ObjType.CLASS: class_page.ClassPageInfo,
+    obj_type_lib.ObjType.CALLABLE: function_page.FunctionPageInfo,
+    obj_type_lib.ObjType.MODULE: module_page.ModulePageInfo,
+    obj_type_lib.ObjType.TYPE_ALIAS: type_alias_page.TypeAliasPageInfo,
+}
+
+PageBuilderDict = Dict[obj_type_lib.ObjType, Type[base_page.PageInfo]]
+
+
+def docs_for_object(
+    *,
+    api_node: doc_generator_visitor.ApiTreeNode,
+    parser_config: config.ParserConfig,
+    extra_docs: Optional[Dict[int, str]] = None,
+    search_hints: bool = True,
+    page_builder_classes: Optional[PageBuilderDict] = None,
+) -> base_page.PageInfo:
+  """Return a PageInfo object describing a given object from the TF API.
+
+  This function resolves `tf.symbol` references in the docstrings into links
+  to the appropriate location.
+
+  Args:
+    api_node: The ApiTreeNode for the object.
+    parser_config: A `config.ParserConfig` object.
+    extra_docs: Extra docs for symbols like public constants(list, tuple, etc)
+      that need to be added to the markdown pages created.
+    search_hints: If true include metadata search hints, else include a
+        "robots: noindex".
+    page_builder_classes: An optional dict of `{ObjectType:Type[PageInfo]}` for
+        overriding the default page builder classes.
+
+  Returns:
+    Either a subclass of `pretty_docs.base_page.PageInfo` depending on the type
+    of the python object being documented.
+
+  Raises:
+    RuntimeError: If an object is encountered for which we don't know how
+      to make docs.
+  """
+  if page_builder_classes is None:
+    page_builder_classes = _DEFAULT_PAGE_BUILDER_CLASSES
+
+  page_info_class = doc_controls.get_custom_page_builder_cls(api_node.py_object)
+  if page_info_class is None:
+    obj_type = obj_type_lib.ObjType.get(api_node.py_object)
+    page_info_class = page_builder_classes[obj_type]
+
+  page_info = page_info_class(
+      api_node=api_node,
+      search_hints=search_hints,
+      extra_docs=extra_docs,
+      parser_config=parser_config)
+
+  page_info.docs_for_object()
+
+  return page_info
diff --git a/tools/tensorflow_docs/api_generator/pretty_docs/function_page.py b/tools/tensorflow_docs/api_generator/pretty_docs/function_page.py
new file mode 100644
index 00000000000..54b184b1c1e
--- /dev/null
+++ b/tools/tensorflow_docs/api_generator/pretty_docs/function_page.py
@@ -0,0 +1,99 @@
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import textwrap
+from typing import Any, Optional
+
+from tensorflow_docs.api_generator import doc_controls
+from tensorflow_docs.api_generator import parser
+from tensorflow_docs.api_generator import signature as signature_lib
+from tensorflow_docs.api_generator.pretty_docs import base_page
+
+
+class FunctionPageBuilder(base_page.TemplatePageBuilder):
+  """Builds a markdown page from a `FunctionPageInfo` object."""
+  TEMPLATE = 'templates/function.jinja'
+
+  def format_docstring_part(self, part):
+    ttt = '<h2 class="add-link">{title}</h2>'
+    return base_page.format_docstring(part, table_title_template=ttt)
+
+  def build_signature(self):
+    return base_page.build_signature(
+        name=self.page_info.full_name,
+        signature=self.page_info.signature,
+        decorators=self.page_info.decorators)
+
+
+class FunctionPageInfo(base_page.PageInfo):
+  """Collects docs For a function Page.
+
+  Attributes:
+    full_name: The full, main name, of the object being documented.
+    short_name: The last part of the full name.
+    py_object: The object being documented.
+    defined_in: A _FileLocation describing where the object was defined.
+    aliases: A list of full-name for all aliases for this object.
+    doc: A list of objects representing the docstring. These can all be
+      converted to markdown using str().
+    signature: the parsed signature (see: generate_signature)
+    decorators: A list of decorator names.
+  """
+  DEFAULT_BUILDER_CLASS = FunctionPageBuilder
+
+  def __init__(self, *, api_node, **kwargs):
+    """Initialize a FunctionPageInfo.
+
+    Args:
+      api_node: the api tree node.
+      **kwargs: Extra arguments.
+    """
+    super().__init__(api_node, **kwargs)
+
+    self._signature = None
+    self._decorators = []
+
+  @property
+  def signature(self):
+    return self._signature
+
+  def collect_docs(self):
+    """Collect all information necessary to genertate the function page.
+
+    Mainly this is details about the function signature.
+    """
+    assert self.signature is None
+    self._signature = signature_lib.generate_signature(
+        self.py_object,
+        self.parser_config,
+        func_type=signature_lib.FuncType.FUNCTION,
+    )
+    self._decorators = signature_lib.extract_decorators(self.py_object)
+
+  @property
+  def decorators(self):
+    return list(self._decorators)
+
+  def add_decorator(self, dec):
+    self._decorators.append(dec)
+
+  @property
+  def header(self) -> Optional[str]:
+    header = doc_controls.get_header(self.py_object)
+    if header is not None:
+      header = textwrap.dedent(header)
+    return header
+
+  def get_metadata_html(self):
+    return parser.Metadata(self.full_name).build_html()
diff --git a/tools/tensorflow_docs/api_generator/pretty_docs/module_page.py b/tools/tensorflow_docs/api_generator/pretty_docs/module_page.py
new file mode 100644
index 00000000000..4154eb7a456
--- /dev/null
+++ b/tools/tensorflow_docs/api_generator/pretty_docs/module_page.py
@@ -0,0 +1,169 @@
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from typing import List
+
+from tensorflow_docs.api_generator import doc_controls
+from tensorflow_docs.api_generator import obj_type as obj_type_lib
+from tensorflow_docs.api_generator import parser
+
+from tensorflow_docs.api_generator.pretty_docs import base_page
+
+
+class ModulePageBuilder(base_page.TemplatePageBuilder):
+  """Builds a markdown page from a `ModulePageInfo` instance."""
+  TEMPLATE = 'templates/module.jinja'
+
+  def __init__(self, page_info: 'ModulePageInfo'):
+    super().__init__(page_info)
+
+  def build_other_member_section(self):
+    if self.page_info.other_members:
+      return base_page.build_other_members(
+          self.page_info.other_members,
+          title='<h2 class="add-link">Other Members</h2>',
+      )
+    else:
+      return ''
+
+
+class ModulePageInfo(base_page.PageInfo):
+  """Collects docs for a module page.
+
+  Attributes:
+    full_name: The full, main name, of the object being documented.
+    short_name: The last part of the full name.
+    py_object: The object being documented.
+    defined_in: A _FileLocation describing where the object was defined.
+    aliases: A list of full-name for all aliases for this object.
+    doc: A list of objects representing the docstring. These can all be
+      converted to markdown using str().
+    classes: A list of `base_page.MemberInfo` objects pointing to docs for the
+      classes in this module.
+    functions: A list of `base_page.MemberInfo` objects pointing to docs for the
+      functions in this module
+    modules: A list of `base_page.MemberInfo` objects pointing to docs for the
+      modules in this module.
+    type_alias: A list of `base_page.MemberInfo` objects pointing to docs for
+      the type aliases in this module.
+    other_members: A list of `base_page.MemberInfo` objects documenting any
+      other object's defined on the module object (mostly enum style fields).
+  """
+  DEFAULT_BUILDER_CLASS = ModulePageBuilder
+
+  def __init__(self, *, api_node, **kwargs):
+    """Initialize a `ModulePageInfo`.
+
+    Args:
+      full_name: The full, main name, of the object being documented.
+      py_object: The object being documented.
+      **kwargs: Extra arguments.
+    """
+    super().__init__(api_node, **kwargs)
+
+    self._modules = []
+    self._classes = []
+    self._functions = []
+    self._other_members = []
+    self._type_alias = []
+
+  @property
+  def modules(self):
+    return self._modules
+
+  @property
+  def functions(self):
+    return self._functions
+
+  @property
+  def classes(self):
+    return self._classes
+
+  @property
+  def type_alias(self):
+    return self._type_alias
+
+  @property
+  def other_members(self):
+    return self._other_members
+
+  def _add_module(self, member_info: base_page.MemberInfo):
+    self._modules.append(member_info)
+
+  def _add_class(self, member_info: base_page.MemberInfo):
+    self._classes.append(member_info)
+
+  def _add_function(self, member_info: base_page.MemberInfo):
+    self._functions.append(member_info)
+
+  def _add_type_alias(self, member_info: base_page.MemberInfo):
+    self._type_alias.append(member_info)
+
+  def _add_other_member(self, member_info: base_page.MemberInfo):
+    self.other_members.append(member_info)
+
+  def get_metadata_html(self):
+    meta_data = parser.Metadata(self.full_name)
+
+    # Objects with their own pages are not added to the metadata list for the
+    # module, the module only has a link to the object page. No docs.
+    for item in self.other_members:
+      meta_data.append(item)
+
+    return meta_data.build_html()
+
+  def _add_member(self, member_info: base_page.MemberInfo) -> None:
+    """Adds members of the modules to the respective lists."""
+    obj_type = obj_type_lib.ObjType.get(member_info.py_object)
+    if obj_type is obj_type_lib.ObjType.MODULE:
+      self._add_module(member_info)
+    elif obj_type is obj_type_lib.ObjType.CLASS:
+      self._add_class(member_info)
+    elif obj_type is obj_type_lib.ObjType.CALLABLE:
+      self._add_function(member_info)
+    elif obj_type is obj_type_lib.ObjType.TYPE_ALIAS:
+      self._add_type_alias(member_info)
+    elif obj_type is obj_type_lib.ObjType.OTHER:
+      self._add_other_member(member_info)
+
+  def collect_docs(self):
+    """Collect information necessary specifically for a module's doc page.
+
+    Mainly this is information about the members of the module.
+    """
+    # the path_tree has nodes for all api-paths, not just the preferred paths.
+    module_path_node = self.parser_config.path_tree[self.api_node.path]
+    for (_, path_node) in sorted(module_path_node.children.items()):
+      member_doc = parser.parse_md_docstring(path_node.py_object,
+                                             self.full_name, self.parser_config,
+                                             self._extra_docs)
+
+      url = self.parser_config.reference_resolver.reference_to_url(
+          path_node.full_name)
+
+      member_info = base_page.MemberInfo(path_node.short_name,
+                                         path_node.full_name,
+                                         path_node.py_object, member_doc, url)
+      self._add_member(member_info)
+
+
+def _build_module_parts(module_parts: List[base_page.MemberInfo],
+                        template: str) -> List[str]:
+  mod_str_parts = []
+  for item in module_parts:
+    mod_str_parts.append(template.format(**item._asdict()))
+    if item.doc.brief:
+      mod_str_parts.append(': ' + item.doc.brief)
+    mod_str_parts.append('\n\n')
+  return mod_str_parts
diff --git a/tools/tensorflow_docs/api_generator/pretty_docs/pretty_docs_test.py b/tools/tensorflow_docs/api_generator/pretty_docs/pretty_docs_test.py
new file mode 100644
index 00000000000..27292a0306a
--- /dev/null
+++ b/tools/tensorflow_docs/api_generator/pretty_docs/pretty_docs_test.py
@@ -0,0 +1,140 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for MD page generator."""
+
+import textwrap
+
+from absl.testing import absltest
+
+from tensorflow_docs.api_generator import doc_controls
+from tensorflow_docs.api_generator import parser
+from tensorflow_docs.api_generator import doc_generator_visitor
+
+from tensorflow_docs.api_generator.pretty_docs import base_page
+from tensorflow_docs.api_generator.pretty_docs import function_page
+
+
+class ParserTest(absltest.TestCase):
+
+  def test_github_source_link_in_table(self):
+    url = "https://github.com/tensorflow/docs/blob/master/path/to/file"
+    location = parser.FileLocation(base_url=url)
+    table = base_page.top_source_link(location)
+
+    expected = textwrap.dedent(f"""
+        <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
+        <td>
+          <a target="_blank" href="{url}">
+            <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
+            View source on GitHub
+          </a>
+        </td>
+        </table>
+
+        """)
+    self.assertEqual(expected, table)
+
+  def test_other_source_link_after_table(self):
+    url = "somewhere/else"
+    location = parser.FileLocation(base_url=url)
+    table = base_page.top_source_link(location)
+
+    expected = textwrap.dedent(f"""
+       <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
+
+       </table>
+
+       <a target="_blank" class="external" href="{url}">View source</a>
+
+       """)
+    self.assertEqual(expected, table)
+
+  def test_no_source_link(self):
+    location = parser.FileLocation()
+    table = base_page.top_source_link(location)
+
+    expected = textwrap.dedent("""
+       <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
+
+       </table>
+
+       """)
+    self.assertEqual(expected, table)
+
+  def _get_test_page_builder(self, search_hints):
+
+    def test_function():
+      pass
+
+    api_node = doc_generator_visitor.ApiTreeNode(
+        path=('abc',), py_object=test_function, children={})
+    page_info = function_page.FunctionPageInfo(
+        api_node=api_node, search_hints=search_hints)
+    docstring_info = parser.DocstringInfo(
+        brief='hello `tensorflow`',
+        docstring_parts=['line1', 'line2'],
+        compatibility={})
+    page_info.set_doc(docstring_info)
+    page_builder = function_page.FunctionPageBuilder(page_info)
+    return page_builder
+
+  def test_get_headers_global_hints(self):
+    page_builder = self._get_test_page_builder(search_hints=True)
+    result = page_builder.get_devsite_headers()
+
+    expected = textwrap.dedent("""\
+      description: hello tensorflow
+
+      <div itemscope itemtype="http://developers.google.com/ReferenceObject">
+      <meta itemprop="name" content="abc" />
+      <meta itemprop="path" content="Stable" />
+      </div>
+      """)
+
+    self.assertEqual(expected, result)
+
+  def test_get_headers_global_no_hints(self):
+    page_builder = self._get_test_page_builder(search_hints=False)
+    result = page_builder.get_devsite_headers()
+
+    expected = textwrap.dedent("""\
+      description: hello tensorflow
+      robots: noindex
+      """)
+
+    self.assertEqual(expected, result)
+
+  def test_get_headers_local_no_hints(self):
+    page_builder = self._get_test_page_builder(search_hints=True)
+    result = page_builder.get_devsite_headers()
+
+    @doc_controls.hide_from_search
+    def py_object():
+      pass
+
+    page_builder.page_info.py_object = py_object
+
+    result = page_builder.get_devsite_headers()
+
+    expected = textwrap.dedent("""\
+      description: hello tensorflow
+      robots: noindex
+      """)
+
+    self.assertEqual(expected, result)
+
+
+if __name__ == "__main__":
+  absltest.main()
diff --git a/tools/tensorflow_docs/api_generator/pretty_docs/templates/class.jinja b/tools/tensorflow_docs/api_generator/pretty_docs/templates/class.jinja
new file mode 100644
index 00000000000..d80a44ae740
--- /dev/null
+++ b/tools/tensorflow_docs/api_generator/pretty_docs/templates/class.jinja
@@ -0,0 +1,69 @@
+{% extends "templates/page.jinja" %}
+
+{#----------------------------------------------------------------------------#}
+
+{% block metadata %}
+{{ builder.top_source_link() }}
+
+{{ page_info.doc.brief -}}
+
+{% if page_info.inheritable_header %}
+
+
+{{ page_info.inheritable_header -}}
+{% endif %}
+
+{{ builder.build_bases() }}
+{{ builder.build_collapsable_aliases() -}}
+{{ builder.build_constructor() -}}
+{% endblock metadata %}
+
+{#----------------------------------------------------------------------------#}
+
+{% block body %}
+{{- builder.top_compat() -}}
+
+<!-- Placeholder for "Used in" -->
+{{ builder.build_class_docstring() -}}
+{{ builder.build_attr_block() }}
+{{- self.child_classes() -}}
+{{- self.methods() -}}
+
+{{- self.other_members() -}}
+
+{{- builder.bottom_compat() -}}
+{% endblock body %}
+
+{#----------------------------------------------------------------------------#}
+
+{% block child_classes %}
+{%- if page_info.classes %}
+## Child Classes
+  {% for child_class in page_info.classes %}
+[`class {{child_class.short_name}}`]({{child_class.url}})
+
+  {% endfor %}
+{% endif -%}
+{% endblock child_classes%}
+
+{#----------------------------------------------------------------------------#}
+
+{% block methods %}
+{%- if builder.methods.info_dict -%}
+## Methods
+
+  {% for name, method in builder.methods.info_dict.items() %}
+    {{- builder.build_method_section(method) -}}
+  {% endfor %}
+
+
+{% endif -%}
+{% endblock methods%}
+
+{#----------------------------------------------------------------------------#}
+
+{% block other_members %}
+{%- if page_info.other_members -%}
+{{ builder.build_other_member_section() }}
+{% endif %}
+{% endblock other_members%}
diff --git a/tools/tensorflow_docs/api_generator/pretty_docs/templates/function.jinja b/tools/tensorflow_docs/api_generator/pretty_docs/templates/function.jinja
new file mode 100644
index 00000000000..3d445435d2c
--- /dev/null
+++ b/tools/tensorflow_docs/api_generator/pretty_docs/templates/function.jinja
@@ -0,0 +1,26 @@
+{% extends "templates/page.jinja" %}
+
+{#----------------------------------------------------------------------------#}
+
+{% block metadata %}
+{{ builder.top_source_link() }}
+
+{{ page_info.doc.brief }}
+
+{% if page_info.header %}
+
+
+{{ page_info.header -}}
+{% endif %}
+
+{{ builder.build_collapsable_aliases() -}}
+{% endblock metadata %}
+
+{#----------------------------------------------------------------------------#}
+
+{% block body %}
+{% if page_info.signature %}
+{{ builder.build_signature() }}
+{% endif %}
+{{ super() -}}
+{% endblock %}
diff --git a/tools/tensorflow_docs/api_generator/pretty_docs/templates/module.jinja b/tools/tensorflow_docs/api_generator/pretty_docs/templates/module.jinja
new file mode 100644
index 00000000000..c31e2f3b6e9
--- /dev/null
+++ b/tools/tensorflow_docs/api_generator/pretty_docs/templates/module.jinja
@@ -0,0 +1,100 @@
+{% extends "templates/page.jinja" %}
+
+{#----------------------------------------------------------------------------#}
+
+{% block header %}
+{{ builder.get_devsite_headers() }}
+# Module: {{page_info.full_name}}
+
+<!-- Insert buttons and diff -->
+{% endblock header%}
+
+{#----------------------------------------------------------------------------#}
+
+{% block body %}
+{{ builder.top_compat() -}}
+
+  {% for part in page_info.doc.docstring_parts %}
+    {{- builder.format_docstring_part(part) -}}
+  {% endfor %}
+{{ builder.bottom_compat() }}
+
+{{ self.modules() }}
+{{- self.classes() }}
+{{- self.functions() }}
+{{- self.type_aliases() }}
+{{- self.other_members() -}}
+{% endblock body %}
+
+{#----------------------------------------------------------------------------#}
+
+{% block modules %}
+{%- if page_info.modules %}
+## Modules
+
+  {% for module in page_info.modules %}
+    {%if module.doc.brief%}
+[`{{module.short_name}}`]({{module.url}}) module: {{module.doc.brief}}
+    {%else%}
+[`{{module.short_name}}`]({{module.url}}) module
+    {%endif%}
+
+  {% endfor %}
+{% endif -%}
+{% endblock modules  %}
+
+{#----------------------------------------------------------------------------#}
+
+{% block classes %}
+{%- if page_info.classes %}
+## Classes
+
+  {% for cls in page_info.classes %}
+    {%if cls.doc.brief%}
+[`class {{cls.short_name}}`]({{cls.url}}): {{cls.doc.brief}}
+    {%else%}
+[`class {{cls.short_name}}`]({{cls.url}})
+    {%endif%}
+
+  {% endfor %}
+{% endif -%}
+{% endblock classes%}
+
+{#----------------------------------------------------------------------------#}
+
+{% block functions %}
+{%- if page_info.functions -%}
+## Functions
+
+  {% for fun in page_info.functions %}
+    {%if fun.doc.brief%}
+[`{{fun.short_name}}(...)`]({{fun.url}}): {{fun.doc.brief}}
+    {%else%}
+[`{{fun.short_name}}(...)`]({{fun.url}})
+    {%endif%}
+
+  {% endfor %}
+{% endif -%}
+{% endblock functions%}
+
+{#----------------------------------------------------------------------------#}
+
+{% block type_aliases %}
+{%- if page_info.type_alias -%}
+## Type Aliases
+
+  {% for alias in page_info.type_alias %}
+[`{{alias.short_name}}`]({{alias.url}})
+
+  {% endfor %}
+{% endif -%}
+{% endblock type_aliases%}
+
+{#----------------------------------------------------------------------------#}
+
+{% block other_members %}
+{%- if page_info.other_members -%}
+{{ builder.build_other_member_section() }}
+{% endif %}
+{% endblock other_members%}
+
diff --git a/tools/tensorflow_docs/api_generator/pretty_docs/templates/page.jinja b/tools/tensorflow_docs/api_generator/pretty_docs/templates/page.jinja
new file mode 100644
index 00000000000..30073a1ec46
--- /dev/null
+++ b/tools/tensorflow_docs/api_generator/pretty_docs/templates/page.jinja
@@ -0,0 +1,43 @@
+{%- extends 'templates/root.jinja' -%}
+
+{#----------------------------------------------------------------------------#}
+
+{% block content %}
+  {{- self.header() -}}
+  {{- self.metadata() -}}
+  {{- self.body() -}}
+{% endblock content %}}
+
+{#----------------------------------------------------------------------------#}
+
+{% block header %}
+{{ builder.get_devsite_headers() }}
+# {{page_info.full_name}}
+
+<!-- Insert buttons and diff -->
+{% endblock header%}
+
+{#----------------------------------------------------------------------------#}
+
+{% block metadata %}
+{{ builder.top_source_link() }}
+
+{{ page_info.doc.brief }}
+
+{{ builder.build_collapsable_aliases() -}}
+{% endblock metadata %}
+
+{#----------------------------------------------------------------------------#}
+
+{% block body %}
+
+{{ builder.top_compat() -}}
+<!-- Placeholder for "Used in" -->
+  {% for part in page_info.doc.docstring_parts %}
+    {{- builder.format_docstring_part(part) -}}
+  {% endfor %}
+{{ builder.bottom_compat() -}}
+{% endblock body %}
+
+
+
diff --git a/tools/tensorflow_docs/api_generator/pretty_docs/templates/root.jinja b/tools/tensorflow_docs/api_generator/pretty_docs/templates/root.jinja
new file mode 100644
index 00000000000..a4d77de6349
--- /dev/null
+++ b/tools/tensorflow_docs/api_generator/pretty_docs/templates/root.jinja
@@ -0,0 +1 @@
+{{- self.content() -}}
diff --git a/tools/tensorflow_docs/api_generator/pretty_docs/templates/type_alias.jinja b/tools/tensorflow_docs/api_generator/pretty_docs/templates/type_alias.jinja
new file mode 100644
index 00000000000..d4550f9f608
--- /dev/null
+++ b/tools/tensorflow_docs/api_generator/pretty_docs/templates/type_alias.jinja
@@ -0,0 +1,20 @@
+{% extends "templates/page.jinja" %}
+
+
+{% block metadata %}
+This symbol is a **type alias**.
+
+{{ page_info.doc.brief }}
+
+{% endblock metadata %}
+
+{#----------------------------------------------------------------------------#}
+
+{% block body %}
+{% if page_info.signature %}
+#### Source:
+
+{{ builder.build_signature() }}
+{% endif %}
+{{ super() -}}
+{% endblock %}
diff --git a/tools/tensorflow_docs/api_generator/pretty_docs/type_alias_page.py b/tools/tensorflow_docs/api_generator/pretty_docs/type_alias_page.py
new file mode 100644
index 00000000000..90441eadcf0
--- /dev/null
+++ b/tools/tensorflow_docs/api_generator/pretty_docs/type_alias_page.py
@@ -0,0 +1,157 @@
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Bage builder classes for type alias pages."""
+import textwrap
+import types
+import typing
+from typing import Any, Dict, List
+
+from tensorflow_docs.api_generator import parser
+from tensorflow_docs.api_generator import signature as signature_lib
+from tensorflow_docs.api_generator.pretty_docs import base_page
+
+
+class TypeAliasPageBuilder(base_page.TemplatePageBuilder):
+  """Builds a markdown page from a `TypeAliasPageBuilder` object."""
+  TEMPLATE = 'templates/type_alias.jinja'
+
+  def build_signature(self):
+    return base_page.build_signature(
+        name=self.page_info.short_name,
+        signature=self.page_info.signature,
+        decorators=None,
+        type_alias=True)
+
+
+class TypeAliasPageInfo(base_page.PageInfo):
+  """Collects docs For a type alias page.
+
+  Attributes:
+    full_name: The full, main name, of the object being documented.
+    short_name: The last part of the full name.
+    py_object: The object being documented.
+    defined_in: A _FileLocation describing where the object was defined.
+    aliases: A list of full-name for all aliases for this object.
+    doc: A list of objects representing the docstring. These can all be
+      converted to markdown using str().
+    signature: the parsed signature (see: generate_signature)
+    decorators: A list of decorator names.
+  """
+  DEFAULT_BUILDER_CLASS = TypeAliasPageBuilder
+
+  def __init__(self, *, api_node, **kwargs) -> None:
+    """Initialize a `TypeAliasPageInfo`.
+
+    Args:
+      full_name: The full, main name, of the object being documented.
+      py_object: The object being documented.
+      **kwargs: Extra arguments.
+    """
+
+    super().__init__(api_node, **kwargs)
+    self._signature = None
+
+  @property
+  def signature(self):
+    return self._signature
+
+  def _custom_join(self, args: List[str], origin: str) -> str:
+    """Custom join for Callable and other type hints.
+
+    Args:
+      args: Args of a type annotation object returned by `__args__`.
+      origin: Origin of a type annotation object returned by `__origin__`.
+
+    Returns:
+      A joined string containing the representation of a type annotation.
+    """
+    if 'Callable' in origin:
+      if args[0] == '...':
+        return 'Callable[%s]' % ', '.join(args)
+      else:
+        return 'Callable[[%s], %s]' % (', '.join(args[:-1]), args[-1])
+    elif 'UnionType' in origin:
+      return ' | '.join(args)
+
+    return '%s[%s]' % (origin, ', '.join(args))
+
+  def _link_type_args(self, obj: Any, reverse_index: Dict[int, str],
+                      linker: signature_lib.FormatArguments) -> str:
+    """Recurses into typehint object and links known objects to their pages."""
+    arg_full_name = reverse_index.get(id(obj), None)
+    if arg_full_name is not None:
+      return linker.get_link(arg_full_name)
+
+    result = []
+    if getattr(obj, '__args__', None):
+      for arg in obj.__args__:
+        result.append(self._link_type_args(arg, reverse_index, linker))
+      origin_str = typing._type_repr(typing.get_origin(obj))  # pylint: disable=protected-access # pytype: disable=module-attr
+      return self._custom_join(result, origin_str)
+    else:
+      return typing._type_repr(obj)  # pylint: disable=protected-access # pytype: disable=module-attr
+
+  def collect_docs(self) -> None:
+    """Collect all information necessary to genertate the function page.
+
+    Mainly this is details about the function signature.
+
+    For the type alias signature, the args are extracted and replaced with the
+    full_name if the object is present in `parser_config.reverse_index`. They
+    are also linkified to point to that symbol's page.
+
+    For example (If generating docs for symbols in TF library):
+
+    ```
+    X = Union[int, str, bool, tf.Tensor, np.ndarray]
+    ```
+
+    In this case `tf.Tensor` will get linked to that symbol's page.
+    Note: In the signature `tf.Tensor` is an object, so it will show up as
+    `tensorflow.python.framework.ops.Tensor`. That's why we need to query
+    `parser_config.reverse_index` to get the full_name of the object which will
+    be `tf.Tensor`. Hence the signature will be:
+
+    ```
+    X = Union[int, str, bool, <a href="URL">tf.Tensor</a>, np.ndarray]
+    ```
+    """
+    assert self.signature is None
+
+    linker = signature_lib.FormatArguments(parser_config=self.parser_config)
+
+    sig_args = []
+    if typing.get_origin(self.py_object):
+      for arg_obj in self.py_object.__args__:
+        sig_args.append(
+            self._link_type_args(arg_obj, self.parser_config.reverse_index,
+                                 linker))
+
+    sig_args_str = textwrap.indent(',\n'.join(sig_args), '    ')
+    if typing.get_origin(self.py_object):
+      origin_str = typing._type_repr(typing.get_origin(self.py_object))  # pylint: disable=protected-access # pytype: disable=module-attr
+      sig = f'{origin_str}[\n{sig_args_str}\n]'
+    else:
+      sig = repr(self.py_object)
+
+    # pytype: enable=module-attr
+
+    # Starting in Python 3.7, the __origin__ attribute of typing constructs
+    # contains the equivalent runtime class rather than the construct itself
+    # (e.g., typing.Callable.__origin__ is collections.abc.Callable).
+    self._signature = sig.replace('typing.', '').replace('collections.abc.', '')
+
+  def get_metadata_html(self) -> str:
+    return parser.Metadata(self.full_name).build_html()
diff --git a/tools/tensorflow_docs/api_generator/pretty_docs_test.py b/tools/tensorflow_docs/api_generator/pretty_docs_test.py
deleted file mode 100644
index 821bd59f9ca..00000000000
--- a/tools/tensorflow_docs/api_generator/pretty_docs_test.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# Lint as: python3
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for MD page generator."""
-
-import textwrap
-
-from absl.testing import absltest
-
-from tensorflow_docs.api_generator import parser
-from tensorflow_docs.api_generator import pretty_docs
-
-
-class ParserTest(absltest.TestCase):
-
-  def test_github_source_link_in_table(self):
-    url = "https://github.com/tensorflow/docs/blob/master/path/to/file"
-    location = parser._FileLocation(base_url=url)
-    table = pretty_docs._top_source_link(location)
-
-    expected = textwrap.dedent(f"""
-        <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
-        <td>
-          <a target="_blank" href="{url}">
-            <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />
-            View source on GitHub
-          </a>
-        </td>
-        </table>
-
-        """)
-    self.assertEqual(expected, table)
-
-  def test_other_source_link_after_table(self):
-    url = "somewhere/else"
-    location = parser._FileLocation(base_url=url)
-    table = pretty_docs._top_source_link(location)
-
-    expected = textwrap.dedent(f"""
-       <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
-
-       </table>
-
-       <a target="_blank" href="{url}">View source</a>
-
-       """)
-    self.assertEqual(expected, table)
-
-  def test_no_source_link(self):
-    location = parser._FileLocation()
-    table = pretty_docs._top_source_link(location)
-
-    expected = textwrap.dedent("""
-       <table class="tfo-notebook-buttons tfo-api nocontent" align="left">
-
-       </table>
-
-       """)
-    self.assertEqual(expected, table)
-
-
-if __name__ == "__main__":
-  absltest.main()
diff --git a/tools/tensorflow_docs/api_generator/public_api.py b/tools/tensorflow_docs/api_generator/public_api.py
index 06497bdce0f..e6a994bff5b 100644
--- a/tools/tensorflow_docs/api_generator/public_api.py
+++ b/tools/tensorflow_docs/api_generator/public_api.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -16,14 +15,26 @@
 """Visitor restricting traversal to only the public tensorflow API."""
 
 import ast
+import dataclasses
 import inspect
 import os
 import pathlib
+import sys
+import types
 import typing
-from typing import Any, Callable, List, Sequence, Tuple
+from typing import Any, Callable, Dict, Iterable, List, Sequence, Tuple, Union
 
 from tensorflow_docs.api_generator import doc_controls
 from tensorflow_docs.api_generator import doc_generator_visitor
+from tensorflow_docs.api_generator import get_source
+
+from google.protobuf.message import Message as ProtoMessage
+
+try:
+  import proto  # pylint: disable=g-import-not-at-top  # pytype: disable=import-error
+except ImportError:
+  proto = None
+
 
 _TYPING_IDS = frozenset(
     id(obj)
@@ -31,7 +42,7 @@
     if not doc_generator_visitor.maybe_singleton(obj))
 
 
-Children = List[Tuple[str, Any]]
+Children = Iterable[Tuple[str, Any]]
 ApiFilter = Callable[[Tuple[str, ...], Any, Children], Children]
 
 
@@ -53,7 +64,7 @@ def get_module_base_dirs(module) -> Tuple[pathlib.Path, ...]:
     # available in `__path__._path`.
     # https://www.python.org/dev/peps/pep-0451/
     # This is a **list of paths**.
-    base_dirs = module.__path__._path  # pylint: disable=protected-access
+    base_dirs = module.__path__._path  # pylint: disable=protected-access  # pytype: disable=attribute-error
   elif mod_file.endswith('__init__.py'):
     # A package directory will have an `__init__.py`,
     # accept anything in that directory.
@@ -167,7 +178,7 @@ def util_2
   return filtered_children
 
 
-def _get_imported_symbols(obj):
+def _get_imported_symbols(obj: Union[str, types.ModuleType]):
   """Returns a list of symbol names imported by the given `obj`."""
 
   class ImportNodeVisitor(ast.NodeVisitor):
@@ -177,7 +188,13 @@ def __init__(self):
       self.imported_symbols = []
 
     def _add_imported_symbol(self, node):
-      self.imported_symbols.extend([alias.name for alias in node.names])
+      for alias in node.names:
+        name = alias.asname or alias.name
+        if name == '*':
+          continue
+        if '.' in name:
+          continue
+        self.imported_symbols.append(name)
 
     def visit_Import(self, node):  # pylint: disable=invalid-name
       self._add_imported_symbol(node)
@@ -185,8 +202,10 @@ def visit_Import(self, node):  # pylint: disable=invalid-name
     def visit_ImportFrom(self, node):  # pylint: disable=invalid-name
       self._add_imported_symbol(node)
 
-  source = inspect.getsource(obj)
-  tree = ast.parse(source)
+  tree = get_source.get_ast(obj)
+  if tree is None:
+    return []
+
   visitor = ImportNodeVisitor()
   visitor.visit(tree)
   return visitor.imported_symbols
@@ -194,21 +213,57 @@ def visit_ImportFrom(self, node):  # pylint: disable=invalid-name
 
 def explicit_package_contents_filter(path: Sequence[str], parent: Any,
                                      children: Children) -> Children:
-  """Filter modules to only include explicit contents.
-
-  This function returns the children explicitly included by this module, meaning
-  that it will exclude:
+  """Filter submodules, only keep what's explicitly included.
 
-  *   Modules in a package not explicitly imported by the package (submodules
-      are implicitly injected into their parent's namespace).
-  *   Modules imported by a module that is not a package.
+  This filter only affects the visibility of **modules**. Other objects are not
+  affected.
 
   This filter is useful if you explicitly define your API in the packages of
-  your library, but do not expliticly define that API in the `__all__` variable
-  of each module. The purpose is to make it easier to maintain that API.
+  your library (the __init__.py files), but do not expliticly define that API
+  in the `__all__` variable of each module. The purpose is to make it easier to
+  maintain that API.
+
+  **This filter makes it so that modules are only documented where they are
+  explicitly imported in an __init__.py**
 
-  Note: This filter does work with wildcard imports, however it is generally not
-  recommended to use wildcard imports.
+  ### Packages
+
+  Lots of imports **indirectly** inject modules into package namespaces, this
+  filter helps you ignore those. Anywhere you `import pkg.sub1` it will inject
+  `sub1` into the `pkg` namsspace.
+
+  When filtering a package it only keeps modules that are **directly**
+  impotrted in the package. This code, injects `[sub0, sub1, sub2, sub3, sub4,
+  sub_sub1, *]` into the pkg namespace:
+
+  pkg/__init__.py
+
+  ```
+  import sub0
+  import pkg.sub1
+  from pkg import sub2
+  from pkg.sub3 import sub_sub1
+  from pkg.sub4 import *
+  ```
+
+  But this filter will only keep the modules `[sub0, sub2, sub_sub1]` in the
+  docs for `pkg`.
+
+  ### Regular modules
+
+  For regular modules all modules in the namespace are assumed to be
+  implementation details and/or documented in their source location. For example
+  in this package:
+
+  ```
+  pkg/
+    __init__.py
+    sub1.py
+    sub2.py
+  ```
+
+  If you `import sub2` in `__init__.py` `sub2` will documented in `pkg`
+  But if you `import sub2` in `sub1.py` `sub2` will not be documented in `sub1`
 
   Args:
     path: A tuple of names forming the path to the object.
@@ -240,79 +295,197 @@ def explicit_package_contents_filter(path: Sequence[str], parent: Any,
 
 
 ALLOWED_DUNDER_METHODS = frozenset([
-    '__abs__', '__add__', '__and__', '__bool__', '__call__', '__concat__',
-    '__contains__', '__div__', '__enter__', '__eq__', '__exit__',
+    '__abs__', '__add__', '__and__', '__array__', '__bool__', '__call__',
+    '__concat__', '__contains__', '__div__', '__enter__', '__eq__', '__exit__',
     '__floordiv__', '__ge__', '__getitem__', '__gt__', '__init__', '__invert__',
-    '__iter__', '__le__', '__len__', '__lt__', '__matmul__', '__mod__',
-    '__mul__', '__new__', '__ne__', '__neg__', '__pos__', '__nonzero__',
-    '__or__', '__pow__', '__radd__', '__rand__', '__rdiv__', '__rfloordiv__',
-    '__rmatmul__', '__rmod__', '__rmul__', '__ror__', '__rpow__', '__rsub__',
+    '__iter__', '__le__', '__len__', '__lshift__', '__lt__', '__matmul__',
+    '__mod__', '__mul__', '__new__', '__ne__', '__neg__', '__pos__',
+    '__nonzero__', '__or__', '__pow__', '__radd__', '__rand__', '__rdiv__',
+    '__rfloordiv__', '__rlshift__', '__rmatmul__', '__rmod__', '__rmul__',
+    '__ror__', '__rpow__', '__rrshift__', '__rshift__', '__rsub__',
     '__rtruediv__', '__rxor__', '__sub__', '__truediv__', '__xor__',
     '__version__'
 ])
 
 
-class PublicAPIFilter(object):
-  """Visitor to use with `traverse` to filter just the public API."""
-
-  def __init__(self, base_dir, private_map=None):
-    """Constructor.
-
-    Args:
-      base_dir: The directory to take source file paths relative to.
-      private_map: A mapping from dotted path like "tf.symbol" to a list of
-        names. Included names will not be listed at that location.
-    """
-    self._base_dir = base_dir
-    self._private_map = private_map or {}
+@dataclasses.dataclass
+class FailIfNestedTooDeep:
+  max_depth: int
 
-  def _is_private(self, path, parent, name, obj):
-    """Returns whether a name is private or not."""
-
-    # Skip objects blocked by doc_controls.
-    if doc_controls.should_skip(obj):
-      return True
+  def __call__(self, path: Sequence[str], parent: Any,
+               children: Children) -> Children:
+    if inspect.ismodule(parent) and len(path) > 10:
+      raise RuntimeError('Modules nested too deep:\n\n{}\n\nThis is likely a '
+                         'problem with an accidental public import.'.format(
+                             '.'.join(path)))
+    return children
 
-    if isinstance(parent, type):
-      if doc_controls.should_skip_class_attr(parent, name):
-        return True
 
-    if doc_controls.should_doc_private(obj):
-      return False
+@dataclasses.dataclass
+class FilterBaseDirs:
+  """A class for filtering based on a list of allowed parent directories."""
+  base_dirs: Sequence[pathlib.Path]
 
-    if inspect.ismodule(obj):
-      mod_base_dirs = get_module_base_dirs(obj)
+  def __call__(self, path: Sequence[str], parent: Any,
+               children: Children) -> Children:
+    for name, child in children:
+      if not inspect.ismodule(child):
+        yield name, child
+        continue
+      mod_base_dirs = get_module_base_dirs(child)
       # This check only handles normal packages/modules. Namespace-package
       # contents will get filtered when the submodules are checked.
       if len(mod_base_dirs) == 1:
         mod_base_dir = mod_base_dirs[0]
-        # Check that module is in one of the `self._base_dir`s
-        if not any(base in mod_base_dir.parents for base in self._base_dir):
-          return True
+        # Check that module is, or is in one of the `self._base_dir`s
+        if not (any(base in mod_base_dir.parents for base in self.base_dirs) or
+                mod_base_dir in self.base_dirs):
+          continue
+      yield name, child
+
+
+@dataclasses.dataclass
+class FilterPrivateMap:
+  private_map: Dict[str, List[str]]
+
+  def __call__(self, path: Sequence[str], parent: Any,
+               children: Children) -> Children:
+    if self.private_map is None:
+      yield from children
+
+    for name, child in children:
+      if name in self.private_map.get('.'.join(path), []):
+        continue
+      yield (name, child)
 
-    # Skip objects blocked by the private_map
-    if name in self._private_map.get('.'.join(path), []):
-      return True
 
+def filter_private_symbols(path: Sequence[str], parent: Any,
+                           children: Children) -> Children:
+  del path
+  del parent
+  for name, child in children:
     # Skip "_" hidden attributes
     if name.startswith('_') and name not in ALLOWED_DUNDER_METHODS:
-      return True
+      if not doc_controls.should_doc_private(child):
+        continue
+    yield (name, child)
 
-    return False
 
-  def __call__(self, path: Sequence[str], parent: Any,
-               children: Children) -> Children:
-    """Visitor interface, see `traverse` for details."""
+def filter_doc_controls_skip(path: Sequence[str], parent: Any,
+                             children: Children) -> Children:
+  del path
+  for name, child in children:
+    if doc_controls.should_skip(child):
+      continue
+    if isinstance(parent, type):
+      if doc_controls.should_skip_class_attr(parent, name):
+        continue
+    yield (name, child)
 
-    # Avoid long waits in cases of pretty unambiguous failure.
-    if inspect.ismodule(parent) and len(path) > 10:
-      raise RuntimeError('Modules nested too deep:\n\n{}\n\nThis is likely a '
-                         'problem with an accidental public import.'.format(
-                             '.'.join(path)))
 
-    # Remove things that are not visible.
-    children = [(child_name, child_obj)
-                for child_name, child_obj in list(children)
-                if not self._is_private(path, parent, child_name, child_obj)]
+def filter_module_all(path: Sequence[str], parent: Any,
+                      children: Children) -> Children:
+  """Filters module children based on the "__all__" arrtibute.
+
+  Args:
+    path: API to this symbol
+    parent: The object
+    children: A list of (name, object) pairs.
+
+  Returns:
+    `children` filtered to respect __all__
+  """
+  del path
+  if not (inspect.ismodule(parent) and hasattr(parent, '__all__')):
+    return children
+  module_all = set(parent.__all__)
+  children = [(name, value) for (name, value) in children if name in module_all]
+
+  return children
+
+
+def add_proto_fields(path: Sequence[str], parent: Any,
+                     children: Children) -> Children:
+  """Add properties to Proto classes, so they can be documented.
+
+  Warning: This inserts the Properties into the class so the rest of the system
+  is unaffected. This patching is acceptable because there is never a reason to
+  run other tensorflow code in the same process as the doc generator.
 
+  Args:
+    path: API to this symbol
+    parent: The object
+    children: A list of (name, object) pairs.
+
+  Returns:
+    `children` with proto fields added as properties.
+  """
+  del path
+  if not inspect.isclass(parent):
     return children
+
+  real_parent = parent
+  if not issubclass(parent, ProtoMessage):
+    if proto is not None:
+      if issubclass(parent, proto.message.Message):
+        parent = parent.pb()
+        children = [
+            (name, value) for (name, value) in children if name != 'meta'
+        ]
+      else:
+        return children
+
+  descriptor = getattr(parent, 'DESCRIPTOR', None)
+  if descriptor is None:
+    return children
+  fields = descriptor.fields
+  if not fields:
+    return children
+
+  field = fields[0]
+  # Make the dictionaries mapping from int types and labels to type and
+  # label names.
+  field_types = {
+      getattr(field, name): name
+      for name in dir(field)
+      if name.startswith('TYPE')
+  }
+
+  labels = {
+      getattr(field, name): name
+      for name in dir(field)
+      if name.startswith('LABEL')
+  }
+
+  field_properties = {}
+
+  for field in fields:
+    name = field.name
+    doc_parts = []
+
+    label = labels[field.label].lower().replace('label_', '')
+    if label != 'optional':
+      doc_parts.append(label)
+
+    type_name = field_types[field.type]
+    if type_name == 'TYPE_MESSAGE':
+      type_name = field.message_type.name
+    elif type_name == 'TYPE_ENUM':
+      type_name = field.enum_type.name
+    else:
+      type_name = type_name.lower().replace('type_', '')
+
+    doc_parts.append(type_name)
+    doc_parts.append(name)
+    doc = '`{}`'.format(' '.join(doc_parts))
+    prop = property(fget=lambda x: x, doc=doc)
+    field_properties[name] = prop
+
+  for name, prop in field_properties.items():
+    setattr(real_parent, name, prop)
+
+  children = dict(children)
+  children.update(field_properties)
+  children = sorted(children.items(), key=lambda item: item[0])
+
+  return children
diff --git a/tools/tensorflow_docs/api_generator/public_api_test.py b/tools/tensorflow_docs/api_generator/public_api_test.py
index 140acd42e61..4130099d0b8 100644
--- a/tools/tensorflow_docs/api_generator/public_api_test.py
+++ b/tools/tensorflow_docs/api_generator/public_api_test.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -16,6 +15,7 @@
 """Tests for tensorflow.tools.common.public_api."""
 
 import inspect
+import pathlib
 import types
 
 import typing
@@ -42,60 +42,24 @@ def __call__(self, path, parent, children):
       self.last_children = list(children)  # Make a copy to preserve state.
       return children
 
-  def test_call_forward(self):
-    visitor = self.TestVisitor()
-
-    api_visitors = [public_api.PublicAPIFilter(base_dir='/'), visitor]
-
-    path = ('tf', 'test')
-    parent = 'dummy'
-    children = [('name1', 'thing1'), ('name2', 'thing2')]
-
-    for api_visitor in api_visitors:
-      children = api_visitor(path, parent, children)
-
-    self.assertEqual(set([(
-        'tf',
-        'test',
-    )]), visitor.symbols)
-    self.assertEqual('dummy', visitor.last_parent)
-    self.assertEqual([('name1', 'thing1'), ('name2', 'thing2')],
-                     visitor.last_children)
-
-  def test_private_child_removal(self):
-    visitor = self.TestVisitor()
-    api_visitors = [
-        public_api.PublicAPIFilter(base_dir='/'),
-        visitor,
-    ]
-
-    children = [('name1', 'thing1'), ('_name2', 'thing2')]
-    path = ('tf', 'test')
-    parent = 'dummy'
-    for api_visitor in api_visitors:
-      children = api_visitor(path, parent, children)
-
-    # Make sure the private symbols are removed before the visitor is called.
-    self.assertEqual([('name1', 'thing1')], visitor.last_children)
-    self.assertEqual([('name1', 'thing1')], children)
-
-  def test_private_map_child_removal(self):
-    visitor = self.TestVisitor()
-
-    api_visitors = [
-        public_api.PublicAPIFilter(
-            base_dir='/', private_map={'tf.test': ['mock']}), visitor
-    ]
-
-    children = [('name1', 'thing1'), ('mock', 'thing2')]
-    path = ('tf', 'test')
-    parent = 'dummy'
-
-    for api_visitor in api_visitors:
-      children = api_visitor(path, parent, children)
-    # Make sure private aliases are removed.
-    self.assertEqual([('name1', 'thing1')], visitor.last_children)
-    self.assertEqual([('name1', 'thing1')], children)
+  def test_filter_private_symbols(self):
+    module = types.ModuleType('module')
+    module.a = 1
+    module._b = 2
+
+    result = public_api.filter_private_symbols(('module'), module,
+                                               [('a', module.a),
+                                                ('_b', module._b)])
+    self.assertEqual([('a', module.a)], list(result))
+
+  def test_private_map_filter(self):
+    private_map_filter = public_api.FilterPrivateMap({'tf.test': ['mock']})
+    result = private_map_filter(
+        path=('tf', 'test'),
+        parent='dummy',
+        children=[('name1', 'thing1'), ('mock', 'thing2')])
+
+    self.assertEqual([('name1', 'thing1')], list(result))
 
   def test_local_definitions_filter(self):
     tf = types.ModuleType('tf')
@@ -173,6 +137,21 @@ def test_explicit_package_contents_filter_removes_modules_imported_by_modules(
     # Assert that the filtered_members do not include a module named `inspect`.
     self.assertNotIn('inspect', [name for name, _ in filtered_members])
 
+  def test_get_imported_symbols(self):
+    source = """
+        import sub0
+        import pkg.sub1
+        from pkg import sub2
+        from pkg.sub3 import sub_sub1
+        from pkg.sub4 import *
+        from pkg import sub5 as r1
+        from pkg import sub6 as r2, sub7, sub8 as r3
+
+        """
+    imported = public_api._get_imported_symbols(source)
+    self.assertCountEqual(
+        ['sub0', 'sub2', 'sub_sub1', 'r1', 'r2', 'sub7', 'r3'], imported)
+
   def test_ignore_typing(self):
     children_before = [('a', 1), ('b', 3), ('c', typing.List)]
     children_after = public_api.ignore_typing('ignored', 'ignored',
@@ -183,17 +162,68 @@ def test_ignore_class_attr(self):
 
     class MyClass:
 
+      def method(self):
+        pass
+
       @doc_controls.do_not_doc_inheritable
-      def my_method(self):
+      def hidden_method(self):
         pass
 
-    private = public_api.PublicAPIFilter._is_private(
-        self=None,
+    class SubClass(MyClass):
+
+      def hidden_method(self):
+        'still hidden'
+
+    result = public_api.filter_doc_controls_skip(
         path=('a', 'b'),
-        parent=MyClass,
-        name='my_method',
-        obj=MyClass.my_method)
-    self.assertTrue(private)
+        parent=SubClass,
+        children=[('method', SubClass.method),
+                  ('hidden_method', SubClass.hidden_method)])
+
+    self.assertEqual([('method', MyClass.method)], list(result))
+
+  def test_filter_all(self):
+    module = types.ModuleType('module')
+    module.__all__ = ['a']
+    module.a = 1
+    module.b = 2
+
+    result = public_api.filter_module_all(('module'), module, [('a', module.a),
+                                                               ('b', module.b)])
+    self.assertEqual([('a', module.a)], list(result))
+
+  def test_filter_base_dirs(self):
+    module = types.ModuleType('module')
+    module.__file__ = '/1/2/3/module'
+    module.a = 1
+    module.sub1 = types.ModuleType('sub1')
+    module.sub1.__file__ = '/1/2/3/4/sub1'
+    module.sub2 = types.ModuleType('sub2')
+    module.sub2.__file__ = '/1/2/bad/sub2'
+
+    my_filter = public_api.FilterBaseDirs(base_dirs=[pathlib.Path('/1/2/3/')])
+
+    result = my_filter(
+        path=('module',),
+        parent=module,
+        children=[('a', module.a), ('sub1', module.sub1),
+                  ('sub2', module.sub2)])
+    self.assertEqual([('a', module.a), ('sub1', module.sub1)], list(result))
+
+  def test_filter_base_dir_pointing_to_submodule_dir(self):
+    module = types.ModuleType('module')
+    module.__file__ = '/1/2/3/module'
+    module.submodule = types.ModuleType('submodule')
+    module.submodule.__file__ = '/1/2/3/submodule/__init__.py'
+
+    test_filter = public_api.FilterBaseDirs(
+        base_dirs=[pathlib.Path('/1/2/3/submodule')])
+    result = test_filter(
+        path=('module',),
+        parent=module,
+        children=[('submodule', module.submodule)])
+
+    self.assertEqual([('submodule', module.submodule)], list(result))
 
 
 if __name__ == '__main__':
diff --git a/tools/tensorflow_docs/api_generator/reference_resolver.py b/tools/tensorflow_docs/api_generator/reference_resolver.py
new file mode 100644
index 00000000000..2e6c0c91a7c
--- /dev/null
+++ b/tools/tensorflow_docs/api_generator/reference_resolver.py
@@ -0,0 +1,441 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Turn Python docstrings into Markdown for TensorFlow documentation."""
+
+from __future__ import annotations
+
+import collections
+import contextlib
+import html
+import json
+import os
+import posixpath
+import re
+
+from typing import Optional, Union
+
+from tensorflow_docs.api_generator import parser
+
+
+class TFDocsError(Exception):
+  pass
+
+
+class IgnoreLineInBlock:
+  """Ignores the lines in a block.
+
+  Attributes:
+    block_start: Contains the start string of a block to ignore.
+    block_end: Contains the end string of a block to ignore.
+  """
+
+  def __init__(self, block_start, block_end):
+    self._block_start = block_start
+    self._block_end = block_end
+    self._in_block = False
+
+    self._start_end_regex = re.escape(self._block_start) + r'.*?' + re.escape(
+        self._block_end)
+
+  def __call__(self, line):
+    # If start and end block are on the same line, return True.
+    if re.match(self._start_end_regex, line):
+      return True
+
+    if not self._in_block:
+      if self._block_start in line:
+        self._in_block = True
+
+    elif self._block_end in line:
+      self._in_block = False
+      # True is being returned here because the last line in the block should
+      # also be ignored.
+      return True
+
+    return self._in_block
+
+
+class ReferenceResolver:
+  """Class for replacing `tf.symbol` references with links."""
+
+  # ?P<...> helps to find the match by entering the group name instead of the
+  # index. For example, instead of doing match.group(1) we can do
+  # match.group('brackets')
+  AUTO_REFERENCE_RE = re.compile(
+      r"""
+      (?P<brackets>\[.*?\])|                      # match a '[]' span
+      ``?(?P<backticks>@?[\w\(\[\)\]\{\}.,=\s]+?)``?  # or a `` span
+      """,
+      flags=re.VERBOSE,
+  )
+
+  def __init__(
+      self,
+      *,
+      duplicate_of: dict[str, str],
+      is_fragment: dict[str, bool],
+      py_module_names: Union[list[str], dict[str, str]],
+      link_prefix: Optional[str] = None,
+      physical_path: Optional[dict[str, str]] = None,
+  ):
+    """Initializes a Reference Resolver.
+
+    Args:
+      duplicate_of: A map from duplicate names to preferred names of API
+        symbols.
+      is_fragment: A map from full names to bool for each symbol. If True the
+        object lives at a page fragment `tf.a.b.c` --> `tf/a/b#c`. If False
+        object has a page to itself: `tf.a.b.c` --> `tf/a/b/c`.
+      py_module_names: A dict from short name to module name Like
+        `{'tf': 'tensorflow'}`. Or [deprecated] a list of short-names like
+        `['tf']`.
+      link_prefix: The website to which these symbols should link to. A prefix
+        is added before the links to enable cross-site linking if `link_prefix`
+        is not None.
+      physical_path: A mapping from the preferred full_name to the object's
+        physical path.
+    """
+    self._duplicate_of = duplicate_of
+    self._is_fragment = is_fragment
+    self._physical_path = physical_path
+    if isinstance(py_module_names, list):
+      py_module_names = {short: short for short in py_module_names}
+    self._py_module_names = py_module_names
+
+    self._link_prefix = link_prefix
+
+    self._all_names = set(is_fragment.keys())
+    self._partial_symbols_dict = self._create_partial_symbols_dict()
+
+  def get_main_name(self, name: str) -> Optional[str]:
+    full_name = self._partial_symbols_dict.get(name, name)
+    main_name = self._duplicate_of.get(full_name, full_name)
+    if main_name in self._all_names:
+      return main_name
+    else:
+      return None
+
+  @classmethod
+  def from_visitor(cls, visitor, **kwargs):
+    """A factory function for building a ReferenceResolver from a visitor.
+
+    Args:
+      visitor: an instance of `DocGeneratorVisitor`
+      **kwargs: all remaining args are passed to the constructor
+
+    Returns:
+      an instance of `ReferenceResolver` ()
+    """
+    api_tree = visitor.api_tree
+    all_is_fragment = {}
+    duplicate_of = {}
+    physical_path = {}
+    for node in api_tree.iter_nodes():
+      full_name = node.full_name
+      is_fragment = node.output_type() is node.OutputType.FRAGMENT
+      if node.physical_path:
+        physical_path[node.full_name] = '.'.join(node.physical_path)
+      for alias in node.aliases:
+        alias_name = '.'.join(alias)
+        duplicate_of[alias_name] = full_name
+        all_is_fragment[alias_name] = is_fragment
+
+    return cls(
+        duplicate_of=visitor.duplicate_of,
+        is_fragment=all_is_fragment,
+        physical_path=physical_path,
+        **kwargs)
+
+  def with_prefix(self, prefix):
+    return type(self)(
+        duplicate_of=self._duplicate_of,
+        is_fragment=self._is_fragment,
+        py_module_names=self._py_module_names,
+        link_prefix=prefix,
+    )
+
+  @contextlib.contextmanager
+  def temp_prefix(self, link_prefix):
+    old_prefix = self._link_prefix
+    self._link_prefix = link_prefix
+    try:
+      yield
+    finally:
+      self._link_prefix = old_prefix
+
+  def is_fragment(self, full_name: str):
+    """Returns True if the object's doc is a subsection of another page."""
+    return self._is_fragment[full_name]
+
+  @classmethod
+  def from_json_file(cls, filepath):
+    """Initialize the reference resolver via _api_cache.json."""
+    with open(filepath) as f:
+      json_dict = json.load(f)
+
+    return cls(**json_dict)
+
+  def _partial_symbols(self, symbol):
+    """Finds the partial symbols given the true symbol.
+
+    For example, symbol: `tf.keras.layers.Conv2D`, then the partial dictionary
+    returned will be:
+
+    partials = ["tf.keras.layers.Conv2D","keras.layers.Conv2D","layers.Conv2D"]
+
+    There should at least be one '.' in the partial symbol generated so as to
+    avoid guessing for the true symbol.
+
+    Args:
+      symbol: String, representing the true symbol.
+
+    Returns:
+      A list of partial symbol names
+    """
+
+    split_symbol = symbol.split('.')
+    partials = [
+        '.'.join(split_symbol[i:]) for i in range(1,
+                                                  len(split_symbol) - 1)
+    ]
+    return partials
+
+  def _create_partial_symbols_dict(self):
+    """Creates a partial symbols dictionary for all the symbols in TensorFlow.
+
+    Returns:
+      A dictionary mapping {partial_symbol: real_symbol}
+    """
+    partial_symbols_dict = collections.defaultdict(list)
+
+    for name in sorted(self._all_names):
+      if 'tf.compat.v' in name or 'tf.contrib' in name:
+        continue
+      # TODO(yashkatariya): Remove `tf.experimental.numpy` after `tf.numpy` is
+      # in not in experimental namespace.
+      if 'tf.experimental.numpy' in name or 'tf.numpy' in name:
+        continue
+      partials = self._partial_symbols(name)
+      for partial in partials:
+        partial_symbols_dict[partial].append(name)
+
+    new_partial_dict = {}
+    for partial, full_names in partial_symbols_dict.items():
+      if not full_names:
+        continue
+
+      full_names = [
+          self._duplicate_of.get(full_name, full_name)
+          for full_name in full_names
+      ]
+
+      new_partial_dict[partial] = full_names[0]
+
+    return new_partial_dict
+
+  def to_json_file(self, filepath):
+    """Converts the ReferenceResolver to json and writes it to the specified file.
+
+    Args:
+      filepath: The file path to write the json to.
+    """
+
+    try:
+      os.makedirs(os.path.dirname(filepath))
+    except OSError:
+      pass
+
+    json_dict = {}
+    for key, value in self.__dict__.items():
+      # Drop these fields, they are generated by the constructor.
+      if key == '_all_names' or key == '_partial_symbols_dict':
+        continue
+
+      # Strip off any leading underscores on field names as these are not
+      # recognized by the constructor.
+      json_dict[key.lstrip('_')] = value
+
+    with open(filepath, 'w') as f:
+      json.dump(json_dict, f, indent=2, sort_keys=True)
+      f.write('\n')
+
+  def replace_references(self, string, full_name=None):
+    """Replace `tf.symbol` references with links to symbol's documentation page.
+
+    This function finds all occurrences of "`tf.symbol`" in `string`
+    and replaces them with links to the documentation page
+    for "symbol".
+
+
+    Args:
+      string: A string in which "`tf.symbol`" references should be replaced.
+      full_name: (optional) The full name of current object, so replacements can
+        depend on context.
+
+    Returns:
+      `string`, with "`tf.symbol`" references replaced by links.
+    """
+
+    def one_ref(match):
+      return self._one_ref(match, full_name)
+
+    fixed_lines = []
+
+    filters = [
+        IgnoreLineInBlock('<pre class="tfo-notebook-code-cell-output">',
+                          '</pre>'),
+        IgnoreLineInBlock('```', '```'),
+        IgnoreLineInBlock(
+            '<pre class="devsite-click-to-copy prettyprint lang-py">',
+            '</pre>'),
+        IgnoreLineInBlock('![', ')'),  # Don't replace within image's caption
+    ]
+
+    for line in string.splitlines():
+      if not any(filter_block(line) for filter_block in filters):
+        line = re.sub(self.AUTO_REFERENCE_RE, one_ref, line)
+      fixed_lines.append(line)
+
+    return '\n'.join(fixed_lines)
+
+  def python_link(self, link_text: str, ref_full_name: Optional[str] = None):
+    """Resolve a "`tf.symbol`" reference to a link.
+
+    This will pick the canonical location for duplicate symbols.
+
+    Args:
+      link_text: The text of the link.
+      ref_full_name: The fully qualified name of the symbol to link to.
+
+    Returns:
+      A link to the documentation page of `ref_full_name`.
+    """
+    if ref_full_name is None:
+      ref_full_name = link_text
+    link_text = html.escape(link_text, quote=True)
+
+    url = self.reference_to_url(ref_full_name)
+    url = html.escape(url, quote=True)
+    return f'<a href="{url}"><code>{link_text}</code></a>'
+
+  def py_main_name(self, full_name):
+    """Return the main name for a Python symbol name."""
+    return self._duplicate_of.get(full_name, full_name)
+
+  def reference_to_url(self, ref_full_name):
+    """Resolve a "`tf.symbol`" reference to a relative path.
+
+    The input to this function should already be stripped of the '@'
+    and '{}', and its output is only the link, not the full Markdown.
+
+    If `ref_full_name` is the name of a class member, method, or property, the
+    link will point to the page of the containing class, and it will include the
+    method name as an anchor. For example, `tf.module.MyClass.my_method`.
+
+    Args:
+      ref_full_name: The fully qualified name of the symbol to link to.
+
+    Returns:
+      A relative path that links from the documentation page of `from_full_name`
+      to the documentation page of `ref_full_name`.
+
+    Raises:
+      TFDocsError: If the symbol is not found.
+    """
+    if self._is_fragment.get(ref_full_name, False):
+      # methods and constants get duplicated. And that's okay.
+      # Use the main name of their parent.
+      parent_name, short_name = ref_full_name.rsplit('.', 1)
+      parent_main_name = self._duplicate_of.get(parent_name, parent_name)
+      main_name = '.'.join([parent_main_name, short_name])
+    else:
+      main_name = self._duplicate_of.get(ref_full_name, ref_full_name)
+
+    # Check whether this link exists
+    if main_name not in self._all_names:
+      raise TFDocsError(f'Cannot make link to {main_name!r}: Not in index.')
+
+    rel_path = parser.documentation_path(main_name,
+                                         self._is_fragment[main_name])
+
+    if self._link_prefix is None:
+      raise ValueError('you must set the `link_prefix`')
+    url = posixpath.join(self._link_prefix, rel_path)
+    return url
+
+  def _one_ref(self, match, full_name=None):
+    """Return a link for a single "`tf.symbol`" reference."""
+
+    if match.group(1):
+      # Found a '[]' group, return it unmodified.
+      return match.group('brackets')
+
+    # Found a '``' group.
+    string = match.group('backticks')
+
+    link_text = string
+
+    # Drop everything after the *last* ( or [ to get the
+    # symbol name. The last is used so complex nested or chained calls are not
+    # recognized as valid links.
+    string = re.sub(r'^(.*)[\(\[].*', r'\1', string)
+    # Drop the optional leading `@`.
+    string = re.sub(r'^@', r'', string)
+
+    if string.startswith('compat.v1') or string.startswith('compat.v2'):
+      string = 'tf.' + string
+    elif string.startswith('v1') or string.startswith('v2'):
+      string = 'tf.compat.' + string
+
+    elif full_name is None or ('tf.compat.v' not in full_name and
+                               'tf.contrib' not in full_name):
+      string = self._partial_symbols_dict.get(string, string)
+
+    if not string:
+      return match.group(0)
+
+    try:
+      if string.startswith('tensorflow::'):
+        # C++ symbol
+        return self._cc_link(string, link_text)
+
+      return self.python_link(link_text, string)
+    except TFDocsError:
+      pass
+
+    return match.group(0)
+
+  def _cc_link(self, string, link_text):
+    """Generate a link for a `tensorflow::...` reference."""
+    # TODO(joshl): Fix this hard-coding of paths.
+    if string == 'tensorflow::ClientSession':
+      ret = 'class/tensorflow/client-session.md'
+    elif string == 'tensorflow::Scope':
+      ret = 'class/tensorflow/scope.md'
+    elif string == 'tensorflow::Status':
+      ret = 'class/tensorflow/status.md'
+    elif string == 'tensorflow::Tensor':
+      ret = 'class/tensorflow/tensor.md'
+    elif string == 'tensorflow::ops::Const':
+      ret = 'namespace/tensorflow/ops.md#const'
+    else:
+      raise TFDocsError(f'C++ reference not understood: "{string}"')
+
+    # relative_path_to_root gets you to api_docs/python, we go from there
+    # to api_docs/cc, and then add ret.
+    cc_relative_path = os.path.normpath(
+        posixpath.join(self._link_prefix, '../cc', ret))
+
+    return f'<a href="{cc_relative_path}"><code>{link_text}</code></a>'
diff --git a/tools/tensorflow_docs/api_generator/reference_resolver_test.py b/tools/tensorflow_docs/api_generator/reference_resolver_test.py
new file mode 100644
index 00000000000..31ca192baf9
--- /dev/null
+++ b/tools/tensorflow_docs/api_generator/reference_resolver_test.py
@@ -0,0 +1,222 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for documentation parser."""
+
+import os
+import tempfile
+import textwrap
+
+from typing import Dict, List, Optional, Union
+
+from absl.testing import absltest
+from absl.testing import parameterized
+
+
+from tensorflow_docs.api_generator import parser
+from tensorflow_docs.api_generator import reference_resolver as reference_resolver_lib
+
+
+class TestReferenceResolver(absltest.TestCase):
+  _BASE_DIR = tempfile.mkdtemp()
+
+  def setUp(self):
+    super(TestReferenceResolver, self).setUp()
+    self.workdir = os.path.join(self._BASE_DIR, self.id())
+    os.makedirs(self.workdir)
+
+  def testSaveReferenceResolver(self):
+    duplicate_of = {'AClass': ['AClass2']}
+    is_fragment = {
+        'tf': False,
+        'tf.VERSION': True,
+        'tf.AClass': False,
+        'tf.AClass.method': True,
+        'tf.AClass2': False,
+        'tf.function': False
+    }
+    py_module_names = {'tf': 'tensorflow'}
+
+    resolver = reference_resolver_lib.ReferenceResolver(
+        duplicate_of=duplicate_of,
+        is_fragment=is_fragment,
+        py_module_names=py_module_names)
+
+    outdir = self.workdir
+
+    filepath = os.path.join(outdir, 'resolver.json')
+
+    resolver.to_json_file(filepath)
+    resolver2 = reference_resolver_lib.ReferenceResolver.from_json_file(
+        filepath)
+
+    # There are no __slots__, so all fields are visible in __dict__.
+    self.assertEqual(resolver.__dict__, resolver2.__dict__)
+
+  def test_duplicate_fragment(self):
+    duplicate_of = {
+        'tf.Class2.method': 'tf.Class1.method',
+        'tf.sub.Class2.method': 'tf.Class1.method',
+        'tf.sub.Class2': 'tf.Class2'
+    }
+    is_fragment = {
+        'tf.Class1.method': True,
+        'tf.Class2.method': True,
+        'tf.sub.Class2.method': True,
+        'tf.Class1': False,
+        'tf.Class2': False,
+        'tf.sub.Class2': False
+    }
+    py_module_names = {'tf': 'tensorflow'}
+
+    reference_resolver = reference_resolver_lib.ReferenceResolver(
+        duplicate_of=duplicate_of,
+        is_fragment=is_fragment,
+        py_module_names=py_module_names,
+        link_prefix='')
+
+    # Method references point to the method, in the canonical class alias.
+    result = reference_resolver.reference_to_url('tf.Class1.method')
+    self.assertEqual('tf/Class1.md#method', result)
+    result = reference_resolver.reference_to_url('tf.Class2.method')
+    self.assertEqual('tf/Class2.md#method', result)
+    result = reference_resolver.reference_to_url('tf.sub.Class2.method')
+    self.assertEqual('tf/Class2.md#method', result)
+
+    # Class references point to the canonical class alias
+    result = reference_resolver.reference_to_url('tf.Class1')
+    self.assertEqual('tf/Class1.md', result)
+    result = reference_resolver.reference_to_url('tf.Class2')
+    self.assertEqual('tf/Class2.md', result)
+    result = reference_resolver.reference_to_url('tf.sub.Class2')
+    self.assertEqual('tf/Class2.md', result)
+
+
+class TestPartialSymbolAutoRef(parameterized.TestCase):
+  REF_TEMPLATE = '<a href="{link}"><code>{text}</code></a>'
+
+  @parameterized.named_parameters(
+      ('basic1', 'keras.Model.fit', '../tf/keras/Model.md#fit'),
+      ('duplicate_object', 'layers.Conv2D', '../tf/keras/layers/Conv2D.md'),
+      ('parens', 'Model.fit(x, y, epochs=5)', '../tf/keras/Model.md#fit'),
+      ('duplicate_name', 'tf.matmul', '../tf/linalg/matmul.md'),
+      ('full_name', 'tf.concat', '../tf/concat.md'),
+      ('extra_backticks', '`tf.concat`', '../tf/concat.md'),
+      ('normal_and_compat', 'linalg.matmul', '../tf/linalg/matmul.md'),
+      ('compat_only', 'math.deprecated', None),
+      ('contrib_only', 'y.z', None),
+  )
+  def test_partial_symbol_references(self, string, link):
+    duplicate_of = {
+        'tf.matmul': 'tf.linalg.matmul',
+        'tf.layers.Conv2d': 'tf.keras.layers.Conv2D',
+    }
+
+    is_fragment = {
+        'tf.keras.Model.fit': True,
+        'tf.concat': False,
+        'tf.keras.layers.Conv2D': False,
+        'tf.linalg.matmul': False,
+        'tf.compat.v1.math.deprecated': False,
+        'tf.compat.v1.linalg.matmul': False,
+        'tf.contrib.y.z': False,
+    }
+
+    py_module_names = {'tf': 'tensorflow'}
+
+    resolver = reference_resolver_lib.ReferenceResolver(
+        duplicate_of=duplicate_of,
+        is_fragment=is_fragment,
+        py_module_names=py_module_names,
+        link_prefix='..')
+    input_string = string.join('``')
+    ref_string = resolver.replace_references(input_string)
+
+    if link is None:
+      expected = input_string
+    else:
+      expected = self.REF_TEMPLATE.format(link=link, text=string.strip('`'))
+
+    self.assertEqual(expected, ref_string)
+
+
+class TestIgnoreLineInBlock(parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      ('ignore_backticks', ['```'], ['```'],
+       '```\nFiller\n```\n```Same line```\n```python\nDowner\n```'),
+      ('ignore_code_cell_output', ['<pre>{% html %}'], ['{% endhtml %}</pre>'],
+       '<pre>{% html %}\nOutput\nmultiline{% endhtml %}</pre>'),
+      ('ignore_backticks_and_cell_output', ['<pre>{% html %}', '```'
+                                           ], ['{% endhtml %}</pre>', '```'],
+       ('```\nFiller\n```\n```Same line```\n<pre>{% html %}\nOutput\nmultiline'
+        '{% endhtml %}</pre>\n```python\nDowner\n```')))
+  def test_ignore_lines(self, block_start, block_end, expected_ignored_lines):
+
+    text = textwrap.dedent("""\
+    ```
+    Filler
+    ```
+
+    ```Same line```
+
+    <pre>{% html %}
+    Output
+    multiline{% endhtml %}</pre>
+
+    ```python
+    Downer
+    ```
+    """)
+
+    filters = [
+        reference_resolver_lib.IgnoreLineInBlock(start, end)
+        for start, end in zip(block_start, block_end)
+    ]
+
+    ignored_lines = []
+    for line in text.splitlines():
+      if any(filter_block(line) for filter_block in filters):
+        ignored_lines.append(line)
+
+    self.assertEqual('\n'.join(ignored_lines), expected_ignored_lines)
+
+  def test_clean_text(self):
+    text = textwrap.dedent("""\
+    ```
+    Ignore lines here.
+    ```
+    Useful information.
+    Don't ignore.
+    ```python
+    Ignore here too.
+    ```
+    Stuff.
+    ```Not useful.```
+    """)
+
+    filters = [reference_resolver_lib.IgnoreLineInBlock('```', '```')]
+
+    clean_text = []
+    for line in text.splitlines():
+      if not any(filter_block(line) for filter_block in filters):
+        clean_text.append(line)
+
+    expected_clean_text = 'Useful information.\nDon\'t ignore.\nStuff.'
+
+    self.assertEqual('\n'.join(clean_text), expected_clean_text)
+
+
+if __name__ == '__main__':
+  absltest.main()
diff --git a/tools/tensorflow_docs/api_generator/report/__init__.py b/tools/tensorflow_docs/api_generator/report/__init__.py
index 668bd75faa8..efaab0c5923 100644
--- a/tools/tensorflow_docs/api_generator/report/__init__.py
+++ b/tools/tensorflow_docs/api_generator/report/__init__.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/tools/tensorflow_docs/api_generator/report/linter.py b/tools/tensorflow_docs/api_generator/report/linter.py
index 87a78fade92..0dab205eba2 100644
--- a/tools/tensorflow_docs/api_generator/report/linter.py
+++ b/tools/tensorflow_docs/api_generator/report/linter.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -24,18 +23,12 @@
 
 import astor
 
+from tensorflow_docs.api_generator import get_source
 from tensorflow_docs.api_generator import parser
-from tensorflow_docs.api_generator.report.schema import api_report_generated_pb2 as api_report_pb2
+from tensorflow_docs.api_generator.pretty_docs import base_page
+from tensorflow_docs.api_generator.report.schema import api_report_pb2
 
 
-def _get_source(py_object: Any) -> Optional[str]:
-  if py_object is not None:
-    try:
-      source = textwrap.dedent(inspect.getsource(py_object))
-      return source
-    except Exception:  # pylint: disable=broad-except
-      return None
-  return None
 
 
 def _count_empty_param(items: List[Tuple[str, Optional[str]]]) -> int:
@@ -47,7 +40,7 @@ def _count_empty_param(items: List[Tuple[str, Optional[str]]]) -> int:
   return count
 
 
-def lint_params(page_info: parser.PageInfo) -> api_report_pb2.ParameterLint:
+def lint_params(page_info: base_page.PageInfo) -> api_report_pb2.ParameterLint:
   """Lints the parameters of a docstring.
 
   Args:
@@ -90,7 +83,7 @@ def lint_params(page_info: parser.PageInfo) -> api_report_pb2.ParameterLint:
 
 
 def lint_description(
-    page_info: parser.PageInfo) -> api_report_pb2.DescriptionLint:
+    page_info: base_page.PageInfo) -> api_report_pb2.DescriptionLint:
   """Lints the description of a docstring.
 
   If a field in the proto is assigned 0, then it means that that field doesn't
@@ -124,7 +117,7 @@ def lint_description(
 
 
 def lint_usage_example(
-    page_info: parser.PageInfo) -> api_report_pb2.UsageExampleLint:
+    page_info: base_page.PageInfo) -> api_report_pb2.UsageExampleLint:
   """Counts the number of doctests and untested examples in a docstring.
 
   Args:
@@ -170,7 +163,7 @@ def visit_Return(self, node) -> None:  # pylint: disable=invalid-name
 
 
 def lint_returns(
-    page_info: parser.PageInfo) -> Optional[api_report_pb2.ReturnLint]:
+    page_info: base_page.PageInfo) -> Optional[api_report_pb2.ReturnLint]:
   """"Lints the returns/yields block in the docstring.
 
   This linter only checks if a `Returns`/`Yields` block exists in the docstring
@@ -183,12 +176,13 @@ def lint_returns(
   Returns:
     A filled `ReturnLint` proto object.
   """
-  source = _get_source(page_info.py_object)
-
   return_visitor = ReturnVisitor()
-  if source is not None:
+
+  source = get_source.get_source(page_info.py_object)
+  obj_ast = get_source.get_ast(page_info.py_object)
+  if obj_ast is not None:
     try:
-      return_visitor.visit(ast.parse(source))
+      return_visitor.visit(obj_ast)
     except Exception:  # pylint: disable=broad-except
       pass
 
@@ -225,7 +219,7 @@ def visit_Raise(self, node) -> None:  # pylint: disable=invalid-name
     self.total_raises.append(astor.to_source(node.exc.func).strip())
 
 
-def lint_raises(page_info: parser.PageInfo) -> api_report_pb2.RaisesLint:
+def lint_raises(page_info: base_page.PageInfo) -> api_report_pb2.RaisesLint:
   """Lints the raises block in the docstring.
 
   The total raises in code are extracted via an AST and compared against those
@@ -243,12 +237,13 @@ def lint_raises(page_info: parser.PageInfo) -> api_report_pb2.RaisesLint:
 
   # Extract the raises from the source code.
   raise_visitor = RaiseVisitor()
-  source = _get_source(page_info.py_object)
-  if source is not None:
+  obj_ast = get_source.get_ast(page_info.py_object)
+  if obj_ast is not None:
     try:
-      raise_visitor.visit(ast.parse(source))
+      raise_visitor.visit(obj_ast)
     except Exception:  # pylint: disable=broad-except
       pass
+
   raises_lint.total_raises_in_code = len(raise_visitor.total_raises)
 
   # Extract the raises defined in the docstring.
diff --git a/tools/tensorflow_docs/api_generator/report/linter_test.py b/tools/tensorflow_docs/api_generator/report/linter_test.py
index 1011a88b384..1b0367a5d9c 100644
--- a/tools/tensorflow_docs/api_generator/report/linter_test.py
+++ b/tools/tensorflow_docs/api_generator/report/linter_test.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -17,13 +16,19 @@
 
 import copy
 
+import types
 from typing import Optional
 
 from absl.testing import absltest
 
+from tensorflow_docs.api_generator import config
+from tensorflow_docs.api_generator import doc_generator_visitor
+from tensorflow_docs.api_generator import generate_lib
 from tensorflow_docs.api_generator import parser
+from tensorflow_docs.api_generator import reference_resolver as reference_resolver_lib
+from tensorflow_docs.api_generator.pretty_docs import docs_for_object
 from tensorflow_docs.api_generator.report import utils
-from tensorflow_docs.api_generator.report.schema import api_report_generated_pb2 as api_report_pb2
+from tensorflow_docs.api_generator.report.schema import api_report_pb2
 
 
 class DummyVisitor(object):
@@ -111,100 +116,81 @@ def method_one(self, x: str) -> Optional[str]:
 
 class LinterTest(absltest.TestCase):
 
-  def setUp(self):
-    super(LinterTest, self).setUp()
-    index = {
-        'TestClass': TestClass,
-        'TestClass.__init__': TestClass.__init__,
-        'TestClass.method_one': TestClass.method_one,
-        'TestClass.temp_c': TestClass.temp_c,
-    }
-    tree = {
-        'TestClass': ['__init__', 'method_one', 'temp_c'],
-    }
-    reference_resolver = parser.ReferenceResolver.from_visitor(
-        visitor=DummyVisitor(index=index, duplicate_of={}),
-        py_module_names=['tf'],
-    )
-    self.parser_config = parser.ParserConfig(
-        reference_resolver=reference_resolver,
-        duplicates={},
-        duplicate_of={},
-        tree=tree,
-        index=index,
-        reverse_index={},
-        base_dir='/',
-        code_url_prefix='/')
+  def _build_page_info(self):
+    m = types.ModuleType('m')
+    m.__file__ = __file__
+    m.TestClass = TestClass
 
-  def test_class_raises_lint(self):
-    class_page_info = parser.docs_for_object(
-        full_name='TestClass',
-        py_object=TestClass,
-        parser_config=self.parser_config)
-    class_page_info_before = copy.deepcopy(class_page_info)
+    generator = generate_lib.DocGenerator(
+        root_title='test',
+        py_modules=[('m', m)],
+        code_url_prefix='https://tensorflow.org')
+
+    parser_config = generator.run_extraction()
+
+    api_node = doc_generator_visitor.ApiTreeNode(
+        path=('m', 'TestClass'), py_object=TestClass)
+    return docs_for_object.docs_for_object(
+        api_node=api_node, parser_config=parser_config)
+
+  def _make_report(self):
+    page_info = self._build_page_info()
 
     test_api_report = utils.ApiReport()
-    test_api_report.fill_metrics(class_page_info)
-    self.assertEqual(class_page_info_before, class_page_info)
+    test_api_report.fill_metrics(page_info)
+    return test_api_report
+
+  def test_fill_report_doesnt_edit_page(self):
+    page1 = self._build_page_info()
+    page2 = self._build_page_info()
+
+    test_api_report = utils.ApiReport()
+    test_api_report.fill_metrics(page2)
+
+    page1.api_node = None
+    page1.parser_config = None
+
+    page2.api_node = None
+    page2.parser_config = None
+
+    self.assertEqual(page1, page2)
+
+  def test_class_raises_lint(self):
+    test_api_report = self._make_report()
 
     for test_report in test_api_report.api_report.symbol_metric:
-      if (test_report.symbol_name == 'TestClass' and
+      if (test_report.symbol_name == 'm.TestClass' and
           test_report.object_type == api_report_pb2.ObjectType.CLASS):
         self.assertEqual(test_report.raises_lint.num_raises_defined, 2)
         self.assertEqual(test_report.raises_lint.total_raises_in_code, 2)
 
   def test_method_return_lint(self):
-    class_page_info = parser.docs_for_object(
-        full_name='TestClass',
-        py_object=TestClass,
-        parser_config=self.parser_config)
-
-    class_page_info_before = copy.deepcopy(class_page_info)
-
-    test_api_report = utils.ApiReport()
-    test_api_report.fill_metrics(class_page_info)
-    self.assertEqual(class_page_info_before, class_page_info)
+    test_api_report = self._make_report()
 
     for test_report in test_api_report.api_report.symbol_metric:
-      if (test_report.symbol_name == 'TestClass.method_one' and
+      if (test_report.symbol_name == 'm.TestClass.method_one' and
           test_report.object_type == api_report_pb2.ObjectType.METHOD):
         self.assertTrue(test_report.return_lint.returns_defined)
 
   def test_description_lint(self):
-    class_page_info = parser.docs_for_object(
-        full_name='TestClass',
-        py_object=TestClass,
-        parser_config=self.parser_config)
-    class_page_info_before = copy.deepcopy(class_page_info)
-
-    test_api_report = utils.ApiReport()
-    test_api_report.fill_metrics(class_page_info)
-    self.assertEqual(class_page_info_before, class_page_info)
+    test_api_report = self._make_report()
 
     for test_report in test_api_report.api_report.symbol_metric:
-      if (test_report.symbol_name == 'TestClass' and
+      if (test_report.symbol_name == 'm.TestClass' and
           test_report.object_type == api_report_pb2.ObjectType.CLASS):
         self.assertEqual(test_report.desc_lint.len_brief, 2)
         self.assertEqual(test_report.desc_lint.len_long_desc, 54)
 
-      if (test_report.symbol_name == 'TestClass.method_one' and
+      if (test_report.symbol_name == 'm.TestClass.method_one' and
           test_report.object_type == api_report_pb2.ObjectType.METHOD):
         self.assertEqual(test_report.desc_lint.len_brief, 4)
         self.assertEqual(test_report.desc_lint.len_long_desc, 10)
 
   def test_parameter_lint(self):
-    class_page_info = parser.docs_for_object(
-        full_name='TestClass',
-        py_object=TestClass,
-        parser_config=self.parser_config)
-    class_page_info_before = copy.deepcopy(class_page_info)
-
-    test_api_report = utils.ApiReport()
-    test_api_report.fill_metrics(class_page_info)
-    self.assertEqual(class_page_info_before, class_page_info)
+    test_api_report = self._make_report()
 
     for test_report in test_api_report.api_report.symbol_metric:
-      if (test_report.symbol_name == 'TestClass' and
+      if (test_report.symbol_name == 'm.TestClass' and
           test_report.object_type == api_report_pb2.ObjectType.CLASS):
         self.assertEqual(test_report.parameter_lint.num_empty_param_desc_args,
                          2)
@@ -214,7 +200,7 @@ def test_parameter_lint(self):
                          1)
         self.assertEqual(test_report.parameter_lint.total_attr_param, 2)
 
-      if (test_report.symbol_name == 'TestClass.method_one' and
+      if (test_report.symbol_name == 'm.TestClass.method_one' and
           test_report.object_type == api_report_pb2.ObjectType.METHOD):
         self.assertEqual(test_report.parameter_lint.num_empty_param_desc_args,
                          0)
@@ -225,30 +211,25 @@ def test_parameter_lint(self):
         self.assertEqual(test_report.parameter_lint.total_attr_param, 0)
 
   def test_example_lint(self):
-    class_page_info = parser.docs_for_object(
-        full_name='TestClass',
-        py_object=TestClass,
-        parser_config=self.parser_config)
-    class_page_info_before = copy.deepcopy(class_page_info)
-
-    test_api_report = utils.ApiReport()
-    test_api_report.fill_metrics(class_page_info)
-    self.assertEqual(class_page_info_before, class_page_info)
+    test_api_report = self._make_report()
 
     for test_report in test_api_report.api_report.symbol_metric:
-      if (test_report.symbol_name == 'TestClass' and
+      if (test_report.symbol_name == 'm.TestClass' and
           test_report.object_type == api_report_pb2.ObjectType.CLASS):
         self.assertEqual(test_report.usage_example_lint.num_doctest, 2)
         self.assertEqual(test_report.usage_example_lint.num_untested_examples,
                          1)
-        self.assertEqual(test_report.package_group, 'TestClass')
+        self.assertEqual(
+            'm',
+            test_report.package_group,
+        )
 
-      if (test_report.symbol_name == 'TestClass.method_one' and
+      if (test_report.symbol_name == 'm.TestClass.method_one' and
           test_report.object_type == api_report_pb2.ObjectType.METHOD):
         self.assertEqual(test_report.usage_example_lint.num_doctest, 0)
         self.assertEqual(test_report.usage_example_lint.num_untested_examples,
                          1)
-        self.assertEqual(test_report.package_group, 'TestClass')
+        self.assertEqual('m', test_report.package_group)
 
 
 if __name__ == '__main__':
diff --git a/tools/tensorflow_docs/api_generator/report/schema/__init__.py b/tools/tensorflow_docs/api_generator/report/schema/__init__.py
index c6f920e7d27..16f5b669125 100644
--- a/tools/tensorflow_docs/api_generator/report/schema/__init__.py
+++ b/tools/tensorflow_docs/api_generator/report/schema/__init__.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -15,4 +14,18 @@
 # ==============================================================================
 """Public API for api report proto."""
 
-from tensorflow_docs.api_generator.report.schema import api_report_generated_pb2 as api_report_pb2
+import sys
+
+from google import protobuf
+
+_version_parts = protobuf.__version__.split('.')
+_version = (int(_version_parts[0]), int(_version_parts[1]))
+
+if _version >= (3, 20):
+  from tensorflow_docs.api_generator.report.schema import api_report_generated_pb2 as api_report_pb2  # pylint: disable=g-import-not-at-top
+else:
+  from tensorflow_docs.api_generator.report.schema import api_report_generated_319_pb2 as api_report_pb2  # pylint: disable=g-import-not-at-top
+
+sys.modules['tensorflow_docs.api_generator.report.schema.api_report_pb2'] = (
+    api_report_pb2
+)
diff --git a/tools/tensorflow_docs/api_generator/report/schema/api_report_generated_319_pb2.py b/tools/tensorflow_docs/api_generator/report/schema/api_report_generated_319_pb2.py
new file mode 100644
index 00000000000..8cf6e7c2ddb
--- /dev/null
+++ b/tools/tensorflow_docs/api_generator/report/schema/api_report_generated_319_pb2.py
@@ -0,0 +1,496 @@
+# THIS IS A GENERATED FILE. DO NOT EDIT!
+# pylint: skip-file
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: api_report.proto
+
+from google.protobuf.internal import enum_type_wrapper
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+  name='api_report.proto',
+  package='tensorflow_docs.api_report.schema',
+  syntax='proto2',
+  serialized_options=None,
+  create_key=_descriptor._internal_create_key,
+  serialized_pb=b'\n\x10\x61pi_report.proto\x12!tensorflow_docs.api_report.schema\x1a\x1fgoogle/protobuf/timestamp.proto\"\xa2\x01\n\rParameterLint\x12!\n\x19num_empty_param_desc_args\x18\x01 \x01(\x02\x12\x18\n\x10num_args_in_code\x18\x02 \x01(\x02\x12!\n\x19num_empty_param_desc_attr\x18\x03 \x01(\x02\x12\x18\n\x10total_attr_param\x18\x04 \x01(\x02\x12\x17\n\x0fnum_args_in_doc\x18\x05 \x01(\x02\";\n\x0f\x44\x65scriptionLint\x12\x11\n\tlen_brief\x18\x01 \x01(\x02\x12\x15\n\rlen_long_desc\x18\x03 \x01(\x02\"F\n\x10UsageExampleLint\x12\x13\n\x0bnum_doctest\x18\x01 \x01(\x02\x12\x1d\n\x15num_untested_examples\x18\x02 \x01(\x02\"%\n\nReturnLint\x12\x17\n\x0freturns_defined\x18\x01 \x01(\x08\"F\n\nRaisesLint\x12\x1a\n\x12num_raises_defined\x18\x01 \x01(\x02\x12\x1c\n\x14total_raises_in_code\x18\x02 \x01(\x02\"\xeb\x03\n\x0f\x41piSymbolMetric\x12\x13\n\x0bsymbol_name\x18\x01 \x01(\t\x12\x42\n\x0bobject_type\x18\x02 \x01(\x0e\x32-.tensorflow_docs.api_report.schema.ObjectType\x12H\n\x0eparameter_lint\x18\x03 \x01(\x0b\x32\x30.tensorflow_docs.api_report.schema.ParameterLint\x12\x45\n\tdesc_lint\x18\x04 \x01(\x0b\x32\x32.tensorflow_docs.api_report.schema.DescriptionLint\x12O\n\x12usage_example_lint\x18\x05 \x01(\x0b\x32\x33.tensorflow_docs.api_report.schema.UsageExampleLint\x12\x42\n\x0breturn_lint\x18\x06 \x01(\x0b\x32-.tensorflow_docs.api_report.schema.ReturnLint\x12\x42\n\x0braises_lint\x18\x07 \x01(\x0b\x32-.tensorflow_docs.api_report.schema.RaisesLint\x12\x15\n\rpackage_group\x18\x08 \x01(\t\"\x93\x01\n\tApiReport\x12-\n\ttimestamp\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x0c\n\x04\x64\x61te\x18\x02 \x01(\t\x12I\n\rsymbol_metric\x18\x03 \x03(\x0b\x32\x32.tensorflow_docs.api_report.schema.ApiSymbolMetric*M\n\nObjectType\x12\t\n\x05\x43LASS\x10\x00\x12\n\n\x06METHOD\x10\x01\x12\x0c\n\x08\x46UNCTION\x10\x02\x12\n\n\x06MODULE\x10\x03\x12\x0e\n\nTYPE_ALIAS\x10\x04'
+  ,
+  dependencies=[google_dot_protobuf_dot_timestamp__pb2.DESCRIPTOR,])
+
+_OBJECTTYPE = _descriptor.EnumDescriptor(
+  name='ObjectType',
+  full_name='tensorflow_docs.api_report.schema.ObjectType',
+  filename=None,
+  file=DESCRIPTOR,
+  create_key=_descriptor._internal_create_key,
+  values=[
+    _descriptor.EnumValueDescriptor(
+      name='CLASS', index=0, number=0,
+      serialized_options=None,
+      type=None,
+      create_key=_descriptor._internal_create_key),
+    _descriptor.EnumValueDescriptor(
+      name='METHOD', index=1, number=1,
+      serialized_options=None,
+      type=None,
+      create_key=_descriptor._internal_create_key),
+    _descriptor.EnumValueDescriptor(
+      name='FUNCTION', index=2, number=2,
+      serialized_options=None,
+      type=None,
+      create_key=_descriptor._internal_create_key),
+    _descriptor.EnumValueDescriptor(
+      name='MODULE', index=3, number=3,
+      serialized_options=None,
+      type=None,
+      create_key=_descriptor._internal_create_key),
+    _descriptor.EnumValueDescriptor(
+      name='TYPE_ALIAS', index=4, number=4,
+      serialized_options=None,
+      type=None,
+      create_key=_descriptor._internal_create_key),
+  ],
+  containing_type=None,
+  serialized_options=None,
+  serialized_start=1141,
+  serialized_end=1218,
+)
+_sym_db.RegisterEnumDescriptor(_OBJECTTYPE)
+
+ObjectType = enum_type_wrapper.EnumTypeWrapper(_OBJECTTYPE)
+CLASS = 0
+METHOD = 1
+FUNCTION = 2
+MODULE = 3
+TYPE_ALIAS = 4
+
+
+
+_PARAMETERLINT = _descriptor.Descriptor(
+  name='ParameterLint',
+  full_name='tensorflow_docs.api_report.schema.ParameterLint',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  create_key=_descriptor._internal_create_key,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='num_empty_param_desc_args', full_name='tensorflow_docs.api_report.schema.ParameterLint.num_empty_param_desc_args', index=0,
+      number=1, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='num_args_in_code', full_name='tensorflow_docs.api_report.schema.ParameterLint.num_args_in_code', index=1,
+      number=2, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='num_empty_param_desc_attr', full_name='tensorflow_docs.api_report.schema.ParameterLint.num_empty_param_desc_attr', index=2,
+      number=3, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='total_attr_param', full_name='tensorflow_docs.api_report.schema.ParameterLint.total_attr_param', index=3,
+      number=4, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='num_args_in_doc', full_name='tensorflow_docs.api_report.schema.ParameterLint.num_args_in_doc', index=4,
+      number=5, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto2',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=89,
+  serialized_end=251,
+)
+
+
+_DESCRIPTIONLINT = _descriptor.Descriptor(
+  name='DescriptionLint',
+  full_name='tensorflow_docs.api_report.schema.DescriptionLint',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  create_key=_descriptor._internal_create_key,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='len_brief', full_name='tensorflow_docs.api_report.schema.DescriptionLint.len_brief', index=0,
+      number=1, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='len_long_desc', full_name='tensorflow_docs.api_report.schema.DescriptionLint.len_long_desc', index=1,
+      number=3, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto2',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=253,
+  serialized_end=312,
+)
+
+
+_USAGEEXAMPLELINT = _descriptor.Descriptor(
+  name='UsageExampleLint',
+  full_name='tensorflow_docs.api_report.schema.UsageExampleLint',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  create_key=_descriptor._internal_create_key,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='num_doctest', full_name='tensorflow_docs.api_report.schema.UsageExampleLint.num_doctest', index=0,
+      number=1, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='num_untested_examples', full_name='tensorflow_docs.api_report.schema.UsageExampleLint.num_untested_examples', index=1,
+      number=2, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto2',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=314,
+  serialized_end=384,
+)
+
+
+_RETURNLINT = _descriptor.Descriptor(
+  name='ReturnLint',
+  full_name='tensorflow_docs.api_report.schema.ReturnLint',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  create_key=_descriptor._internal_create_key,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='returns_defined', full_name='tensorflow_docs.api_report.schema.ReturnLint.returns_defined', index=0,
+      number=1, type=8, cpp_type=7, label=1,
+      has_default_value=False, default_value=False,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto2',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=386,
+  serialized_end=423,
+)
+
+
+_RAISESLINT = _descriptor.Descriptor(
+  name='RaisesLint',
+  full_name='tensorflow_docs.api_report.schema.RaisesLint',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  create_key=_descriptor._internal_create_key,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='num_raises_defined', full_name='tensorflow_docs.api_report.schema.RaisesLint.num_raises_defined', index=0,
+      number=1, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='total_raises_in_code', full_name='tensorflow_docs.api_report.schema.RaisesLint.total_raises_in_code', index=1,
+      number=2, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto2',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=425,
+  serialized_end=495,
+)
+
+
+_APISYMBOLMETRIC = _descriptor.Descriptor(
+  name='ApiSymbolMetric',
+  full_name='tensorflow_docs.api_report.schema.ApiSymbolMetric',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  create_key=_descriptor._internal_create_key,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='symbol_name', full_name='tensorflow_docs.api_report.schema.ApiSymbolMetric.symbol_name', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='object_type', full_name='tensorflow_docs.api_report.schema.ApiSymbolMetric.object_type', index=1,
+      number=2, type=14, cpp_type=8, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='parameter_lint', full_name='tensorflow_docs.api_report.schema.ApiSymbolMetric.parameter_lint', index=2,
+      number=3, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='desc_lint', full_name='tensorflow_docs.api_report.schema.ApiSymbolMetric.desc_lint', index=3,
+      number=4, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='usage_example_lint', full_name='tensorflow_docs.api_report.schema.ApiSymbolMetric.usage_example_lint', index=4,
+      number=5, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='return_lint', full_name='tensorflow_docs.api_report.schema.ApiSymbolMetric.return_lint', index=5,
+      number=6, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='raises_lint', full_name='tensorflow_docs.api_report.schema.ApiSymbolMetric.raises_lint', index=6,
+      number=7, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='package_group', full_name='tensorflow_docs.api_report.schema.ApiSymbolMetric.package_group', index=7,
+      number=8, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto2',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=498,
+  serialized_end=989,
+)
+
+
+_APIREPORT = _descriptor.Descriptor(
+  name='ApiReport',
+  full_name='tensorflow_docs.api_report.schema.ApiReport',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  create_key=_descriptor._internal_create_key,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='timestamp', full_name='tensorflow_docs.api_report.schema.ApiReport.timestamp', index=0,
+      number=1, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='date', full_name='tensorflow_docs.api_report.schema.ApiReport.date', index=1,
+      number=2, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=b"".decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='symbol_metric', full_name='tensorflow_docs.api_report.schema.ApiReport.symbol_metric', index=2,
+      number=3, type=11, cpp_type=10, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto2',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=992,
+  serialized_end=1139,
+)
+
+_APISYMBOLMETRIC.fields_by_name['object_type'].enum_type = _OBJECTTYPE
+_APISYMBOLMETRIC.fields_by_name['parameter_lint'].message_type = _PARAMETERLINT
+_APISYMBOLMETRIC.fields_by_name['desc_lint'].message_type = _DESCRIPTIONLINT
+_APISYMBOLMETRIC.fields_by_name['usage_example_lint'].message_type = _USAGEEXAMPLELINT
+_APISYMBOLMETRIC.fields_by_name['return_lint'].message_type = _RETURNLINT
+_APISYMBOLMETRIC.fields_by_name['raises_lint'].message_type = _RAISESLINT
+_APIREPORT.fields_by_name['timestamp'].message_type = google_dot_protobuf_dot_timestamp__pb2._TIMESTAMP
+_APIREPORT.fields_by_name['symbol_metric'].message_type = _APISYMBOLMETRIC
+DESCRIPTOR.message_types_by_name['ParameterLint'] = _PARAMETERLINT
+DESCRIPTOR.message_types_by_name['DescriptionLint'] = _DESCRIPTIONLINT
+DESCRIPTOR.message_types_by_name['UsageExampleLint'] = _USAGEEXAMPLELINT
+DESCRIPTOR.message_types_by_name['ReturnLint'] = _RETURNLINT
+DESCRIPTOR.message_types_by_name['RaisesLint'] = _RAISESLINT
+DESCRIPTOR.message_types_by_name['ApiSymbolMetric'] = _APISYMBOLMETRIC
+DESCRIPTOR.message_types_by_name['ApiReport'] = _APIREPORT
+DESCRIPTOR.enum_types_by_name['ObjectType'] = _OBJECTTYPE
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+ParameterLint = _reflection.GeneratedProtocolMessageType('ParameterLint', (_message.Message,), {
+  'DESCRIPTOR' : _PARAMETERLINT,
+  '__module__' : 'api_report_pb2'
+  # @@protoc_insertion_point(class_scope:tensorflow_docs.api_report.schema.ParameterLint)
+  })
+_sym_db.RegisterMessage(ParameterLint)
+
+DescriptionLint = _reflection.GeneratedProtocolMessageType('DescriptionLint', (_message.Message,), {
+  'DESCRIPTOR' : _DESCRIPTIONLINT,
+  '__module__' : 'api_report_pb2'
+  # @@protoc_insertion_point(class_scope:tensorflow_docs.api_report.schema.DescriptionLint)
+  })
+_sym_db.RegisterMessage(DescriptionLint)
+
+UsageExampleLint = _reflection.GeneratedProtocolMessageType('UsageExampleLint', (_message.Message,), {
+  'DESCRIPTOR' : _USAGEEXAMPLELINT,
+  '__module__' : 'api_report_pb2'
+  # @@protoc_insertion_point(class_scope:tensorflow_docs.api_report.schema.UsageExampleLint)
+  })
+_sym_db.RegisterMessage(UsageExampleLint)
+
+ReturnLint = _reflection.GeneratedProtocolMessageType('ReturnLint', (_message.Message,), {
+  'DESCRIPTOR' : _RETURNLINT,
+  '__module__' : 'api_report_pb2'
+  # @@protoc_insertion_point(class_scope:tensorflow_docs.api_report.schema.ReturnLint)
+  })
+_sym_db.RegisterMessage(ReturnLint)
+
+RaisesLint = _reflection.GeneratedProtocolMessageType('RaisesLint', (_message.Message,), {
+  'DESCRIPTOR' : _RAISESLINT,
+  '__module__' : 'api_report_pb2'
+  # @@protoc_insertion_point(class_scope:tensorflow_docs.api_report.schema.RaisesLint)
+  })
+_sym_db.RegisterMessage(RaisesLint)
+
+ApiSymbolMetric = _reflection.GeneratedProtocolMessageType('ApiSymbolMetric', (_message.Message,), {
+  'DESCRIPTOR' : _APISYMBOLMETRIC,
+  '__module__' : 'api_report_pb2'
+  # @@protoc_insertion_point(class_scope:tensorflow_docs.api_report.schema.ApiSymbolMetric)
+  })
+_sym_db.RegisterMessage(ApiSymbolMetric)
+
+ApiReport = _reflection.GeneratedProtocolMessageType('ApiReport', (_message.Message,), {
+  'DESCRIPTOR' : _APIREPORT,
+  '__module__' : 'api_report_pb2'
+  # @@protoc_insertion_point(class_scope:tensorflow_docs.api_report.schema.ApiReport)
+  })
+_sym_db.RegisterMessage(ApiReport)
+
+
+# @@protoc_insertion_point(module_scope)
\ No newline at end of file
diff --git a/tools/tensorflow_docs/api_generator/report/schema/api_report_generated_pb2.py b/tools/tensorflow_docs/api_generator/report/schema/api_report_generated_pb2.py
index 8cf6e7c2ddb..fa046d85011 100644
--- a/tools/tensorflow_docs/api_generator/report/schema/api_report_generated_pb2.py
+++ b/tools/tensorflow_docs/api_generator/report/schema/api_report_generated_pb2.py
@@ -17,11 +17,10 @@
 # -*- coding: utf-8 -*-
 # Generated by the protocol buffer compiler.  DO NOT EDIT!
 # source: api_report.proto
-
-from google.protobuf.internal import enum_type_wrapper
+"""Generated protocol buffer code."""
+from google.protobuf.internal import builder as _builder
 from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
+from google.protobuf import descriptor_pool as _descriptor_pool
 from google.protobuf import symbol_database as _symbol_database
 # @@protoc_insertion_point(imports)
 
@@ -31,466 +30,27 @@
 from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2
 
 
-DESCRIPTOR = _descriptor.FileDescriptor(
-  name='api_report.proto',
-  package='tensorflow_docs.api_report.schema',
-  syntax='proto2',
-  serialized_options=None,
-  create_key=_descriptor._internal_create_key,
-  serialized_pb=b'\n\x10\x61pi_report.proto\x12!tensorflow_docs.api_report.schema\x1a\x1fgoogle/protobuf/timestamp.proto\"\xa2\x01\n\rParameterLint\x12!\n\x19num_empty_param_desc_args\x18\x01 \x01(\x02\x12\x18\n\x10num_args_in_code\x18\x02 \x01(\x02\x12!\n\x19num_empty_param_desc_attr\x18\x03 \x01(\x02\x12\x18\n\x10total_attr_param\x18\x04 \x01(\x02\x12\x17\n\x0fnum_args_in_doc\x18\x05 \x01(\x02\";\n\x0f\x44\x65scriptionLint\x12\x11\n\tlen_brief\x18\x01 \x01(\x02\x12\x15\n\rlen_long_desc\x18\x03 \x01(\x02\"F\n\x10UsageExampleLint\x12\x13\n\x0bnum_doctest\x18\x01 \x01(\x02\x12\x1d\n\x15num_untested_examples\x18\x02 \x01(\x02\"%\n\nReturnLint\x12\x17\n\x0freturns_defined\x18\x01 \x01(\x08\"F\n\nRaisesLint\x12\x1a\n\x12num_raises_defined\x18\x01 \x01(\x02\x12\x1c\n\x14total_raises_in_code\x18\x02 \x01(\x02\"\xeb\x03\n\x0f\x41piSymbolMetric\x12\x13\n\x0bsymbol_name\x18\x01 \x01(\t\x12\x42\n\x0bobject_type\x18\x02 \x01(\x0e\x32-.tensorflow_docs.api_report.schema.ObjectType\x12H\n\x0eparameter_lint\x18\x03 \x01(\x0b\x32\x30.tensorflow_docs.api_report.schema.ParameterLint\x12\x45\n\tdesc_lint\x18\x04 \x01(\x0b\x32\x32.tensorflow_docs.api_report.schema.DescriptionLint\x12O\n\x12usage_example_lint\x18\x05 \x01(\x0b\x32\x33.tensorflow_docs.api_report.schema.UsageExampleLint\x12\x42\n\x0breturn_lint\x18\x06 \x01(\x0b\x32-.tensorflow_docs.api_report.schema.ReturnLint\x12\x42\n\x0braises_lint\x18\x07 \x01(\x0b\x32-.tensorflow_docs.api_report.schema.RaisesLint\x12\x15\n\rpackage_group\x18\x08 \x01(\t\"\x93\x01\n\tApiReport\x12-\n\ttimestamp\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x0c\n\x04\x64\x61te\x18\x02 \x01(\t\x12I\n\rsymbol_metric\x18\x03 \x03(\x0b\x32\x32.tensorflow_docs.api_report.schema.ApiSymbolMetric*M\n\nObjectType\x12\t\n\x05\x43LASS\x10\x00\x12\n\n\x06METHOD\x10\x01\x12\x0c\n\x08\x46UNCTION\x10\x02\x12\n\n\x06MODULE\x10\x03\x12\x0e\n\nTYPE_ALIAS\x10\x04'
-  ,
-  dependencies=[google_dot_protobuf_dot_timestamp__pb2.DESCRIPTOR,])
-
-_OBJECTTYPE = _descriptor.EnumDescriptor(
-  name='ObjectType',
-  full_name='tensorflow_docs.api_report.schema.ObjectType',
-  filename=None,
-  file=DESCRIPTOR,
-  create_key=_descriptor._internal_create_key,
-  values=[
-    _descriptor.EnumValueDescriptor(
-      name='CLASS', index=0, number=0,
-      serialized_options=None,
-      type=None,
-      create_key=_descriptor._internal_create_key),
-    _descriptor.EnumValueDescriptor(
-      name='METHOD', index=1, number=1,
-      serialized_options=None,
-      type=None,
-      create_key=_descriptor._internal_create_key),
-    _descriptor.EnumValueDescriptor(
-      name='FUNCTION', index=2, number=2,
-      serialized_options=None,
-      type=None,
-      create_key=_descriptor._internal_create_key),
-    _descriptor.EnumValueDescriptor(
-      name='MODULE', index=3, number=3,
-      serialized_options=None,
-      type=None,
-      create_key=_descriptor._internal_create_key),
-    _descriptor.EnumValueDescriptor(
-      name='TYPE_ALIAS', index=4, number=4,
-      serialized_options=None,
-      type=None,
-      create_key=_descriptor._internal_create_key),
-  ],
-  containing_type=None,
-  serialized_options=None,
-  serialized_start=1141,
-  serialized_end=1218,
-)
-_sym_db.RegisterEnumDescriptor(_OBJECTTYPE)
-
-ObjectType = enum_type_wrapper.EnumTypeWrapper(_OBJECTTYPE)
-CLASS = 0
-METHOD = 1
-FUNCTION = 2
-MODULE = 3
-TYPE_ALIAS = 4
-
-
-
-_PARAMETERLINT = _descriptor.Descriptor(
-  name='ParameterLint',
-  full_name='tensorflow_docs.api_report.schema.ParameterLint',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  create_key=_descriptor._internal_create_key,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='num_empty_param_desc_args', full_name='tensorflow_docs.api_report.schema.ParameterLint.num_empty_param_desc_args', index=0,
-      number=1, type=2, cpp_type=6, label=1,
-      has_default_value=False, default_value=float(0),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
-    _descriptor.FieldDescriptor(
-      name='num_args_in_code', full_name='tensorflow_docs.api_report.schema.ParameterLint.num_args_in_code', index=1,
-      number=2, type=2, cpp_type=6, label=1,
-      has_default_value=False, default_value=float(0),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
-    _descriptor.FieldDescriptor(
-      name='num_empty_param_desc_attr', full_name='tensorflow_docs.api_report.schema.ParameterLint.num_empty_param_desc_attr', index=2,
-      number=3, type=2, cpp_type=6, label=1,
-      has_default_value=False, default_value=float(0),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
-    _descriptor.FieldDescriptor(
-      name='total_attr_param', full_name='tensorflow_docs.api_report.schema.ParameterLint.total_attr_param', index=3,
-      number=4, type=2, cpp_type=6, label=1,
-      has_default_value=False, default_value=float(0),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
-    _descriptor.FieldDescriptor(
-      name='num_args_in_doc', full_name='tensorflow_docs.api_report.schema.ParameterLint.num_args_in_doc', index=4,
-      number=5, type=2, cpp_type=6, label=1,
-      has_default_value=False, default_value=float(0),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  serialized_options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=89,
-  serialized_end=251,
-)
-
-
-_DESCRIPTIONLINT = _descriptor.Descriptor(
-  name='DescriptionLint',
-  full_name='tensorflow_docs.api_report.schema.DescriptionLint',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  create_key=_descriptor._internal_create_key,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='len_brief', full_name='tensorflow_docs.api_report.schema.DescriptionLint.len_brief', index=0,
-      number=1, type=2, cpp_type=6, label=1,
-      has_default_value=False, default_value=float(0),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
-    _descriptor.FieldDescriptor(
-      name='len_long_desc', full_name='tensorflow_docs.api_report.schema.DescriptionLint.len_long_desc', index=1,
-      number=3, type=2, cpp_type=6, label=1,
-      has_default_value=False, default_value=float(0),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  serialized_options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=253,
-  serialized_end=312,
-)
-
-
-_USAGEEXAMPLELINT = _descriptor.Descriptor(
-  name='UsageExampleLint',
-  full_name='tensorflow_docs.api_report.schema.UsageExampleLint',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  create_key=_descriptor._internal_create_key,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='num_doctest', full_name='tensorflow_docs.api_report.schema.UsageExampleLint.num_doctest', index=0,
-      number=1, type=2, cpp_type=6, label=1,
-      has_default_value=False, default_value=float(0),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
-    _descriptor.FieldDescriptor(
-      name='num_untested_examples', full_name='tensorflow_docs.api_report.schema.UsageExampleLint.num_untested_examples', index=1,
-      number=2, type=2, cpp_type=6, label=1,
-      has_default_value=False, default_value=float(0),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  serialized_options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=314,
-  serialized_end=384,
-)
-
-
-_RETURNLINT = _descriptor.Descriptor(
-  name='ReturnLint',
-  full_name='tensorflow_docs.api_report.schema.ReturnLint',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  create_key=_descriptor._internal_create_key,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='returns_defined', full_name='tensorflow_docs.api_report.schema.ReturnLint.returns_defined', index=0,
-      number=1, type=8, cpp_type=7, label=1,
-      has_default_value=False, default_value=False,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  serialized_options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=386,
-  serialized_end=423,
-)
-
-
-_RAISESLINT = _descriptor.Descriptor(
-  name='RaisesLint',
-  full_name='tensorflow_docs.api_report.schema.RaisesLint',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  create_key=_descriptor._internal_create_key,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='num_raises_defined', full_name='tensorflow_docs.api_report.schema.RaisesLint.num_raises_defined', index=0,
-      number=1, type=2, cpp_type=6, label=1,
-      has_default_value=False, default_value=float(0),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
-    _descriptor.FieldDescriptor(
-      name='total_raises_in_code', full_name='tensorflow_docs.api_report.schema.RaisesLint.total_raises_in_code', index=1,
-      number=2, type=2, cpp_type=6, label=1,
-      has_default_value=False, default_value=float(0),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  serialized_options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=425,
-  serialized_end=495,
-)
-
-
-_APISYMBOLMETRIC = _descriptor.Descriptor(
-  name='ApiSymbolMetric',
-  full_name='tensorflow_docs.api_report.schema.ApiSymbolMetric',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  create_key=_descriptor._internal_create_key,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='symbol_name', full_name='tensorflow_docs.api_report.schema.ApiSymbolMetric.symbol_name', index=0,
-      number=1, type=9, cpp_type=9, label=1,
-      has_default_value=False, default_value=b"".decode('utf-8'),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
-    _descriptor.FieldDescriptor(
-      name='object_type', full_name='tensorflow_docs.api_report.schema.ApiSymbolMetric.object_type', index=1,
-      number=2, type=14, cpp_type=8, label=1,
-      has_default_value=False, default_value=0,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
-    _descriptor.FieldDescriptor(
-      name='parameter_lint', full_name='tensorflow_docs.api_report.schema.ApiSymbolMetric.parameter_lint', index=2,
-      number=3, type=11, cpp_type=10, label=1,
-      has_default_value=False, default_value=None,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
-    _descriptor.FieldDescriptor(
-      name='desc_lint', full_name='tensorflow_docs.api_report.schema.ApiSymbolMetric.desc_lint', index=3,
-      number=4, type=11, cpp_type=10, label=1,
-      has_default_value=False, default_value=None,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
-    _descriptor.FieldDescriptor(
-      name='usage_example_lint', full_name='tensorflow_docs.api_report.schema.ApiSymbolMetric.usage_example_lint', index=4,
-      number=5, type=11, cpp_type=10, label=1,
-      has_default_value=False, default_value=None,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
-    _descriptor.FieldDescriptor(
-      name='return_lint', full_name='tensorflow_docs.api_report.schema.ApiSymbolMetric.return_lint', index=5,
-      number=6, type=11, cpp_type=10, label=1,
-      has_default_value=False, default_value=None,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
-    _descriptor.FieldDescriptor(
-      name='raises_lint', full_name='tensorflow_docs.api_report.schema.ApiSymbolMetric.raises_lint', index=6,
-      number=7, type=11, cpp_type=10, label=1,
-      has_default_value=False, default_value=None,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
-    _descriptor.FieldDescriptor(
-      name='package_group', full_name='tensorflow_docs.api_report.schema.ApiSymbolMetric.package_group', index=7,
-      number=8, type=9, cpp_type=9, label=1,
-      has_default_value=False, default_value=b"".decode('utf-8'),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  serialized_options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=498,
-  serialized_end=989,
-)
-
-
-_APIREPORT = _descriptor.Descriptor(
-  name='ApiReport',
-  full_name='tensorflow_docs.api_report.schema.ApiReport',
-  filename=None,
-  file=DESCRIPTOR,
-  containing_type=None,
-  create_key=_descriptor._internal_create_key,
-  fields=[
-    _descriptor.FieldDescriptor(
-      name='timestamp', full_name='tensorflow_docs.api_report.schema.ApiReport.timestamp', index=0,
-      number=1, type=11, cpp_type=10, label=1,
-      has_default_value=False, default_value=None,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
-    _descriptor.FieldDescriptor(
-      name='date', full_name='tensorflow_docs.api_report.schema.ApiReport.date', index=1,
-      number=2, type=9, cpp_type=9, label=1,
-      has_default_value=False, default_value=b"".decode('utf-8'),
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
-    _descriptor.FieldDescriptor(
-      name='symbol_metric', full_name='tensorflow_docs.api_report.schema.ApiReport.symbol_metric', index=2,
-      number=3, type=11, cpp_type=10, label=3,
-      has_default_value=False, default_value=[],
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
-  ],
-  extensions=[
-  ],
-  nested_types=[],
-  enum_types=[
-  ],
-  serialized_options=None,
-  is_extendable=False,
-  syntax='proto2',
-  extension_ranges=[],
-  oneofs=[
-  ],
-  serialized_start=992,
-  serialized_end=1139,
-)
-
-_APISYMBOLMETRIC.fields_by_name['object_type'].enum_type = _OBJECTTYPE
-_APISYMBOLMETRIC.fields_by_name['parameter_lint'].message_type = _PARAMETERLINT
-_APISYMBOLMETRIC.fields_by_name['desc_lint'].message_type = _DESCRIPTIONLINT
-_APISYMBOLMETRIC.fields_by_name['usage_example_lint'].message_type = _USAGEEXAMPLELINT
-_APISYMBOLMETRIC.fields_by_name['return_lint'].message_type = _RETURNLINT
-_APISYMBOLMETRIC.fields_by_name['raises_lint'].message_type = _RAISESLINT
-_APIREPORT.fields_by_name['timestamp'].message_type = google_dot_protobuf_dot_timestamp__pb2._TIMESTAMP
-_APIREPORT.fields_by_name['symbol_metric'].message_type = _APISYMBOLMETRIC
-DESCRIPTOR.message_types_by_name['ParameterLint'] = _PARAMETERLINT
-DESCRIPTOR.message_types_by_name['DescriptionLint'] = _DESCRIPTIONLINT
-DESCRIPTOR.message_types_by_name['UsageExampleLint'] = _USAGEEXAMPLELINT
-DESCRIPTOR.message_types_by_name['ReturnLint'] = _RETURNLINT
-DESCRIPTOR.message_types_by_name['RaisesLint'] = _RAISESLINT
-DESCRIPTOR.message_types_by_name['ApiSymbolMetric'] = _APISYMBOLMETRIC
-DESCRIPTOR.message_types_by_name['ApiReport'] = _APIREPORT
-DESCRIPTOR.enum_types_by_name['ObjectType'] = _OBJECTTYPE
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-ParameterLint = _reflection.GeneratedProtocolMessageType('ParameterLint', (_message.Message,), {
-  'DESCRIPTOR' : _PARAMETERLINT,
-  '__module__' : 'api_report_pb2'
-  # @@protoc_insertion_point(class_scope:tensorflow_docs.api_report.schema.ParameterLint)
-  })
-_sym_db.RegisterMessage(ParameterLint)
-
-DescriptionLint = _reflection.GeneratedProtocolMessageType('DescriptionLint', (_message.Message,), {
-  'DESCRIPTOR' : _DESCRIPTIONLINT,
-  '__module__' : 'api_report_pb2'
-  # @@protoc_insertion_point(class_scope:tensorflow_docs.api_report.schema.DescriptionLint)
-  })
-_sym_db.RegisterMessage(DescriptionLint)
-
-UsageExampleLint = _reflection.GeneratedProtocolMessageType('UsageExampleLint', (_message.Message,), {
-  'DESCRIPTOR' : _USAGEEXAMPLELINT,
-  '__module__' : 'api_report_pb2'
-  # @@protoc_insertion_point(class_scope:tensorflow_docs.api_report.schema.UsageExampleLint)
-  })
-_sym_db.RegisterMessage(UsageExampleLint)
-
-ReturnLint = _reflection.GeneratedProtocolMessageType('ReturnLint', (_message.Message,), {
-  'DESCRIPTOR' : _RETURNLINT,
-  '__module__' : 'api_report_pb2'
-  # @@protoc_insertion_point(class_scope:tensorflow_docs.api_report.schema.ReturnLint)
-  })
-_sym_db.RegisterMessage(ReturnLint)
-
-RaisesLint = _reflection.GeneratedProtocolMessageType('RaisesLint', (_message.Message,), {
-  'DESCRIPTOR' : _RAISESLINT,
-  '__module__' : 'api_report_pb2'
-  # @@protoc_insertion_point(class_scope:tensorflow_docs.api_report.schema.RaisesLint)
-  })
-_sym_db.RegisterMessage(RaisesLint)
-
-ApiSymbolMetric = _reflection.GeneratedProtocolMessageType('ApiSymbolMetric', (_message.Message,), {
-  'DESCRIPTOR' : _APISYMBOLMETRIC,
-  '__module__' : 'api_report_pb2'
-  # @@protoc_insertion_point(class_scope:tensorflow_docs.api_report.schema.ApiSymbolMetric)
-  })
-_sym_db.RegisterMessage(ApiSymbolMetric)
-
-ApiReport = _reflection.GeneratedProtocolMessageType('ApiReport', (_message.Message,), {
-  'DESCRIPTOR' : _APIREPORT,
-  '__module__' : 'api_report_pb2'
-  # @@protoc_insertion_point(class_scope:tensorflow_docs.api_report.schema.ApiReport)
-  })
-_sym_db.RegisterMessage(ApiReport)
-
-
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x10\x61pi_report.proto\x12!tensorflow_docs.api_report.schema\x1a\x1fgoogle/protobuf/timestamp.proto\"\xa2\x01\n\rParameterLint\x12!\n\x19num_empty_param_desc_args\x18\x01 \x01(\x02\x12\x18\n\x10num_args_in_code\x18\x02 \x01(\x02\x12!\n\x19num_empty_param_desc_attr\x18\x03 \x01(\x02\x12\x18\n\x10total_attr_param\x18\x04 \x01(\x02\x12\x17\n\x0fnum_args_in_doc\x18\x05 \x01(\x02\";\n\x0f\x44\x65scriptionLint\x12\x11\n\tlen_brief\x18\x01 \x01(\x02\x12\x15\n\rlen_long_desc\x18\x03 \x01(\x02\"F\n\x10UsageExampleLint\x12\x13\n\x0bnum_doctest\x18\x01 \x01(\x02\x12\x1d\n\x15num_untested_examples\x18\x02 \x01(\x02\"%\n\nReturnLint\x12\x17\n\x0freturns_defined\x18\x01 \x01(\x08\"F\n\nRaisesLint\x12\x1a\n\x12num_raises_defined\x18\x01 \x01(\x02\x12\x1c\n\x14total_raises_in_code\x18\x02 \x01(\x02\"\xeb\x03\n\x0f\x41piSymbolMetric\x12\x13\n\x0bsymbol_name\x18\x01 \x01(\t\x12\x42\n\x0bobject_type\x18\x02 \x01(\x0e\x32-.tensorflow_docs.api_report.schema.ObjectType\x12H\n\x0eparameter_lint\x18\x03 \x01(\x0b\x32\x30.tensorflow_docs.api_report.schema.ParameterLint\x12\x45\n\tdesc_lint\x18\x04 \x01(\x0b\x32\x32.tensorflow_docs.api_report.schema.DescriptionLint\x12O\n\x12usage_example_lint\x18\x05 \x01(\x0b\x32\x33.tensorflow_docs.api_report.schema.UsageExampleLint\x12\x42\n\x0breturn_lint\x18\x06 \x01(\x0b\x32-.tensorflow_docs.api_report.schema.ReturnLint\x12\x42\n\x0braises_lint\x18\x07 \x01(\x0b\x32-.tensorflow_docs.api_report.schema.RaisesLint\x12\x15\n\rpackage_group\x18\x08 \x01(\t\"\x93\x01\n\tApiReport\x12-\n\ttimestamp\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x0c\n\x04\x64\x61te\x18\x02 \x01(\t\x12I\n\rsymbol_metric\x18\x03 \x03(\x0b\x32\x32.tensorflow_docs.api_report.schema.ApiSymbolMetric*M\n\nObjectType\x12\t\n\x05\x43LASS\x10\x00\x12\n\n\x06METHOD\x10\x01\x12\x0c\n\x08\x46UNCTION\x10\x02\x12\n\n\x06MODULE\x10\x03\x12\x0e\n\nTYPE_ALIAS\x10\x04')
+
+_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
+_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'api_report_pb2', globals())
+if _descriptor._USE_C_DESCRIPTORS == False:
+
+  DESCRIPTOR._options = None
+  _OBJECTTYPE._serialized_start=1141
+  _OBJECTTYPE._serialized_end=1218
+  _PARAMETERLINT._serialized_start=89
+  _PARAMETERLINT._serialized_end=251
+  _DESCRIPTIONLINT._serialized_start=253
+  _DESCRIPTIONLINT._serialized_end=312
+  _USAGEEXAMPLELINT._serialized_start=314
+  _USAGEEXAMPLELINT._serialized_end=384
+  _RETURNLINT._serialized_start=386
+  _RETURNLINT._serialized_end=423
+  _RAISESLINT._serialized_start=425
+  _RAISESLINT._serialized_end=495
+  _APISYMBOLMETRIC._serialized_start=498
+  _APISYMBOLMETRIC._serialized_end=989
+  _APIREPORT._serialized_start=992
+  _APIREPORT._serialized_end=1139
 # @@protoc_insertion_point(module_scope)
\ No newline at end of file
diff --git a/tools/tensorflow_docs/api_generator/report/utils.py b/tools/tensorflow_docs/api_generator/report/utils.py
index 0814d5cb0b6..6db22cf52e0 100644
--- a/tools/tensorflow_docs/api_generator/report/utils.py
+++ b/tools/tensorflow_docs/api_generator/report/utils.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -15,12 +14,13 @@
 # ==============================================================================
 """Utilities for generating report for a package."""
 
-from tensorflow_docs.api_generator import parser
-from tensorflow_docs.api_generator import pretty_docs
 from tensorflow_docs.api_generator import public_api
+from tensorflow_docs.api_generator.pretty_docs import base_page
+from tensorflow_docs.api_generator.pretty_docs import class_page
+from tensorflow_docs.api_generator.pretty_docs import function_page
 from tensorflow_docs.api_generator.report import linter
 
-from tensorflow_docs.api_generator.report.schema import api_report_generated_pb2 as api_report_pb2
+from tensorflow_docs.api_generator.report.schema import api_report_pb2
 from google.protobuf import timestamp_pb2
 
 
@@ -35,13 +35,22 @@ def __init__(self):
     self.api_report.timestamp.CopyFrom(invocation_timestamp)
     self.api_report.date = invocation_timestamp.ToJsonString()
 
+  def write(self, path):
+    api_report = api_report_pb2.ApiReport(
+        timestamp=self.api_report.timestamp,
+        date=self.api_report.date,
+        symbol_metric=sorted(
+            self.api_report.symbol_metric, key=lambda sm: sm.symbol_name))
+
+    path.write_bytes(api_report.SerializeToString())
+
   def _lint(
       self,
       *,
       name: str,
       object_type: api_report_pb2.ObjectType,
       package_group: str,
-      page_info: parser.PageInfo,
+      page_info: base_page.PageInfo,
   ) -> None:
     self.api_report.symbol_metric.add(
         symbol_name=name,
@@ -64,13 +73,11 @@ def _find_pkg_group(self, name: str) -> str:
     return '.'.join(name_list[:2])
 
   def _make_constructor_info(
-      self, class_page_info: parser.ClassPageInfo) -> parser.PageInfo:
+      self, class_page_info: class_page.ClassPageInfo) -> base_page.PageInfo:
     """Convert a class description into a description of the constructor."""
-    methods = pretty_docs.split_methods(class_page_info.methods)
+    methods = class_page.split_methods(class_page_info.methods)
 
-    constructor_info = parser.PageInfo(
-        full_name=class_page_info.full_name,
-        py_object=class_page_info.py_object)
+    constructor_info = base_page.PageInfo(api_node=class_page_info.api_node)
 
     # Replace the class py_object with constructors py_object. This is done
     # because each method is linted separately and class py_object contains the
@@ -81,8 +88,7 @@ def _make_constructor_info(
       constructor_info.py_object = None
 
     # Merge the constructor and class docstrings.
-    class_blocks = pretty_docs.merge_blocks(class_page_info,
-                                            methods.constructor)
+    class_blocks = class_page.merge_blocks(class_page_info, methods.constructor)
     # Add the `Attributes` sections (if it exists) to the merged class blocks.
     if class_page_info.attr_block is not None:
       class_blocks.append(class_page_info.attr_block)
@@ -93,7 +99,8 @@ def _make_constructor_info(
 
     return constructor_info
 
-  def _fill_class_metric(self, class_page_info: parser.ClassPageInfo) -> None:
+  def _fill_class_metric(self,
+                         class_page_info: class_page.ClassPageInfo) -> None:
     """Fills in the lint metrics for a class and its methods.
 
     The constructor and class's docstring is merged for linting. Class's
@@ -115,7 +122,7 @@ def _fill_class_metric(self, class_page_info: parser.ClassPageInfo) -> None:
         page_info=constructor_info,
     )
 
-    methods: pretty_docs.Methods = pretty_docs.split_methods(
+    methods: class_page.Methods = class_page.split_methods(
         class_page_info.methods)
     # Lint each method separately and add its metrics to the proto object.
     for method in methods.info_dict.values():
@@ -130,7 +137,8 @@ def _fill_class_metric(self, class_page_info: parser.ClassPageInfo) -> None:
             page_info=method,
         )
 
-  def _fill_function_metric(self, function_page_info: parser.FunctionPageInfo):
+  def _fill_function_metric(self,
+                            function_page_info: function_page.FunctionPageInfo):
     """Fills in the lint metrics for a function.
 
     Args:
@@ -144,9 +152,9 @@ def _fill_function_metric(self, function_page_info: parser.FunctionPageInfo):
         page_info=function_page_info,
     )
 
-  def fill_metrics(self, page_info: parser.PageInfo) -> None:
-    if isinstance(page_info, parser.ClassPageInfo):
+  def fill_metrics(self, page_info: base_page.PageInfo) -> None:
+    if isinstance(page_info, class_page.ClassPageInfo):
       self._fill_class_metric(page_info)
 
-    if isinstance(page_info, parser.FunctionPageInfo):
+    if isinstance(page_info, function_page.FunctionPageInfo):
       self._fill_function_metric(page_info)
diff --git a/tools/tensorflow_docs/api_generator/signature.py b/tools/tensorflow_docs/api_generator/signature.py
new file mode 100644
index 00000000000..dacf5d2bada
--- /dev/null
+++ b/tools/tensorflow_docs/api_generator/signature.py
@@ -0,0 +1,682 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Turn Python docstrings into Markdown for TensorFlow documentation."""
+
+import ast
+import copy
+import dataclasses
+import enum
+import functools
+import html
+import inspect
+import re
+import textwrap
+import typing
+
+from typing import Any, Callable, Dict, List, Optional, Tuple, Type
+
+import astor
+
+from tensorflow_docs.api_generator import config
+from tensorflow_docs.api_generator import get_source
+from tensorflow_docs.api_generator import public_api
+
+EMPTY = inspect.Signature.empty
+
+
+def _source_from_ast(node: ast.AST) -> str:
+  return astor.to_source(node).strip().replace('"""', "'")
+
+
+class _BaseDefaultAndAnnotationExtractor(ast.NodeVisitor):
+  """A base class for extracting annotations and defaults from the AST."""
+  _PAREN_NUMBER_RE = re.compile(r'^\((True|False|[0-9.e-]+)\)')
+
+  def __init__(self):
+    self.annotations = {}
+    self.defaults = {}
+    self.return_annotation = EMPTY
+
+  def _preprocess_default(self, val: ast.AST) -> str:
+    text_default_val = (
+        _source_from_ast(val).replace('\t', '\\t').replace('\n', '\\n'))
+    text_default_val = self._PAREN_NUMBER_RE.sub('\\1', text_default_val)
+    return text_default_val
+
+  def extract(self, obj: Any):
+    obj_ast = get_source.get_ast(obj)
+    if obj_ast is not None:
+      self.visit(obj_ast)
+
+
+class _ArgDefaultAndAnnotationExtractor(_BaseDefaultAndAnnotationExtractor):
+  """Extracts the type annotations by parsing the AST of a function."""
+
+  def visit_FunctionDef(self, node) -> None:  # pylint: disable=invalid-name
+    """Visits the `FunctionDef` node in AST tree and extracts the typehints."""
+
+    # Capture the return type annotation.
+    if node.returns:
+      self.return_annotation = _source_from_ast(node.returns)
+
+    # Capture the args type annotation.
+    for arg in node.args.args:
+      if arg.annotation:
+        self.annotations[arg.arg] = _source_from_ast(arg.annotation)
+        self.arguments_typehint_exists = True
+
+    # Capture the kwarg only args type annotation.
+    for kwarg in node.args.kwonlyargs:
+      if kwarg.annotation:
+        self.annotations[kwarg.arg] = _source_from_ast(kwarg.annotation)
+        self.arguments_typehint_exists = True
+
+    # From https://docs.python.org/3/library/ast.html#ast.arguments:
+    #   `defaults` is a list of default values for arguments that can be passed
+    #   positionally. If there are fewer defaults, they correspond to the last
+    #   n arguments.
+
+    last_n_pos_args = node.args.args[-1 * len(node.args.defaults):]
+    for arg, default_val in zip(last_n_pos_args, node.args.defaults):
+      if default_val is not None:
+        text_default_val = self._preprocess_default(default_val)
+        self.defaults[arg.arg] = text_default_val
+
+    for kwarg, default_val in zip(node.args.kwonlyargs, node.args.kw_defaults):
+      if default_val is not None:
+        text_default_val = self._preprocess_default(default_val)
+        self.defaults[kwarg.arg] = text_default_val
+
+
+class _ClassDefaultAndAnnotationExtractor(_BaseDefaultAndAnnotationExtractor):
+  """Extracts the type annotations by parsing the AST of a dataclass."""
+
+  def __init__(self):
+    super().__init__()
+    self.annotations = {}
+    self.defaults = {}
+    self.return_annotation = EMPTY
+
+  def visit_ClassDef(self, node) -> None:  # pylint: disable=invalid-name
+    # Don't visit all nodes. Only visit top-level AnnAssign nodes so that
+    # If there's an AnnAssign in a method it doesn't get picked up.
+    for sub in node.body:
+      if isinstance(sub, ast.AnnAssign):
+        self.visit_AnnAssign(sub)
+      elif isinstance(sub, ast.Assign):
+        self.visit_Assign(sub)
+
+  def visit_AnnAssign(self, node) -> None:  # pylint: disable=invalid-name
+    """Vists an assignment with a type annotation. Dataclasses is an example."""
+
+    arg = _source_from_ast(node.target)
+    self.annotations[arg] = _source_from_ast(node.annotation)
+    if node.value is not None:
+      self.defaults[arg] = self._preprocess_default(node.value)
+
+  def visit_Assign(self, node) -> None:  # pylint: disable=invalid-name
+    """Vists an assignment with a type annotation. Dataclasses is an example."""
+    names = [_source_from_ast(t) for t in node.targets]
+    if node.value is not None:
+      val = self._preprocess_default(node.value)
+      for name in names:
+        self.defaults[name] = val
+
+  def extract(self, cls):
+    # Iterate over the classes in reverse order so each class overwrites it's
+    # parents. Skip `object`.
+    for cls in reversed(cls.__mro__[:-1]):
+      super().extract(cls)
+
+
+_OBJECT_MEMORY_ADDRESS_RE = re.compile(r'<(?P<type>.+?) at 0x[\da-f]+>')
+
+
+def strip_obj_addresses(text):
+  return _OBJECT_MEMORY_ADDRESS_RE.sub(r'<\g<type>>', text)
+
+
+class FormatArguments(object):
+  """Formats the arguments and adds type annotations if they exist."""
+
+  # A regular expression capturing a python identifier.
+  _IDENTIFIER_RE = r'[a-zA-Z_]\w*'
+
+  _INDIVIDUAL_TYPES_RE = re.compile(
+      r"""
+        (?P<single_type>
+          ([\w.]*)
+          (?=$|,| |\]|\[)
+        )
+      """, re.IGNORECASE | re.VERBOSE)
+
+  _TYPING = frozenset(
+      list(typing.__dict__.keys()) +
+      ['int', 'str', 'bytes', 'float', 'complex', 'bool', 'None'])
+
+  _IMMUTABLE_TYPES = frozenset([
+      int, str, bytes, float, complex, bool, Ellipsis,
+      type(None), tuple, frozenset
+  ])
+
+  def __init__(
+      self,
+      parser_config: config.ParserConfig,
+  ) -> None:
+    self._reverse_index = parser_config.reverse_index
+    self._reference_resolver = parser_config.reference_resolver
+
+  def get_link(self,
+               link_text: str,
+               obj_full_name: Optional[str] = None) -> str:
+    return self._reference_resolver.python_link(
+        link_text=link_text, ref_full_name=obj_full_name)
+
+  def _extract_non_builtin_types(self, arg_obj: Any,
+                                 non_builtin_types: List[Any]) -> List[Any]:
+    """Extracts the non-builtin types from a type annotations object.
+
+    Recurses if an object contains `__args__` attribute. If an object is
+    an inbuilt object or an `Ellipsis` then its skipped.
+
+    Args:
+      arg_obj: Type annotation object.
+      non_builtin_types: List to keep track of the non-builtin types extracted.
+
+    Returns:
+      List of non-builtin types.
+    """
+
+    annotations = getattr(arg_obj, '__args__', [arg_obj])
+    if annotations is None:
+      annotations = [arg_obj]
+
+    for anno in annotations:
+      if self._reverse_index.get(id(anno), None):
+        non_builtin_types.append(anno)
+      elif (anno in self._IMMUTABLE_TYPES or
+            id(type(anno)) in public_api._TYPING_IDS):  # pylint: disable=protected-access
+        continue
+      elif hasattr(anno, '__args__'):
+        self._extract_non_builtin_types(anno, non_builtin_types)
+      else:
+        non_builtin_types.append(anno)
+    return non_builtin_types
+
+  def _get_non_builtin_ast_types(self, ast_typehint: str) -> List[str]:
+    """Extracts non-builtin types from a string AST type annotation.
+
+    If the type is an inbuilt type or an `...`(Ellipsis) then its skipped.
+
+    Args:
+      ast_typehint: AST extracted type annotation.
+
+    Returns:
+      List of non-builtin ast types.
+    """
+    non_builtin_ast_types = []
+    for single_type, _ in self._INDIVIDUAL_TYPES_RE.findall(ast_typehint):
+      if (not single_type or single_type in self._TYPING or
+          single_type == '...'):
+        continue
+      non_builtin_ast_types.append(single_type)
+    return non_builtin_ast_types
+
+  def _linkify(self, non_builtin_map: Dict[str, Any], match) -> str:
+    """Links off to types that can be linked.
+
+    Args:
+      non_builtin_map: Dictionary mapping non-builtin_ast_types to
+        non_builtin_type_objs
+      match: Match object returned by `re.sub`.
+
+    Returns:
+      Linked type annotation if the type annotation object exists.
+    """
+
+    group = match.groupdict()
+    ast_single_typehint = group['single_type']
+
+    # If the AST type hint is a built-in type hint or an `Ellipsis`,
+    # return it as is.
+    if ast_single_typehint not in non_builtin_map:
+      return ast_single_typehint
+
+    if not non_builtin_map:
+      return ast_single_typehint
+
+    # Get the type object from the ast_single_typehint and lookup the object
+    # in reverse_index to get its full name.
+    obj_full_name = self._reverse_index.get(
+        id(non_builtin_map[ast_single_typehint]), None)
+    if obj_full_name is None:
+      return ast_single_typehint
+
+    return self.get_link(obj_full_name)
+
+  def maybe_add_link(self, source: str, value: Any) -> str:
+    """Return a link to an object's api page if found.
+
+    Args:
+      source: The source string from the code.
+      value: The value of the object.
+
+    Returns:
+      The original string with maybe an HTML link added.
+    """
+    cls = type(value)
+
+    value_name = self._reverse_index.get(id(value), None)
+    cls_name = self._reverse_index.get(id(cls), None)
+
+    if cls_name is not None:
+      # It's much more common for the class to be documented than the instance.
+      # and the class page will provide better docs.
+      before = source.split('(')[0]
+      cls_short_name = cls_name.split('.')[-1]
+      if before.endswith(cls_short_name):
+        # Yes, this is a guess but it will usually be right.
+        return self.get_link(source, cls_name)
+
+    if value_name is not None:
+      return self.get_link(value_name, value_name)
+
+    return source
+
+  def preprocess(self, string: str, value: Any) -> str:
+    """Links type annotations to its page if it exists.
+
+    Args:
+      string: AST extracted type annotation.
+      value: Type annotation object.
+
+    Returns:
+      Linked type annotation if the type annotation object exists.
+    """
+    # If the object annotations exists in the reverse_index, get the link
+    # directly for the entire annotation.
+    obj_anno_full_name = self._reverse_index.get(id(value), None)
+    if obj_anno_full_name is not None:
+      return self.get_link(obj_anno_full_name)
+
+    non_builtin_ast_types = self._get_non_builtin_ast_types(string)
+    try:
+      non_builtin_type_objs = self._extract_non_builtin_types(value, [])
+    except RecursionError:
+      non_builtin_type_objs = {}
+
+    # If the length doesn't match then don't linkify any type annotation. This
+    # is done to avoid linking to wrong pages instead of guessing.
+    if len(non_builtin_type_objs) != len(non_builtin_ast_types):
+      non_builtin_map = {}
+    else:
+      non_builtin_map = dict(zip(non_builtin_ast_types, non_builtin_type_objs))
+
+    partial_func = functools.partial(self._linkify, non_builtin_map)
+    return self._INDIVIDUAL_TYPES_RE.sub(partial_func, string)
+
+  def format_return(self, return_anno: Tuple[Any, str]) -> str:
+    value, source = return_anno
+    return self.preprocess(source, value)
+
+  def format_args(self, args: List[inspect.Parameter]) -> List[str]:
+    """Creates a text representation of the args in a method/function.
+
+    Args:
+      args: List of args to format.
+
+    Returns:
+      Formatted args with type annotations if they exist.
+    """
+
+    args_text_repr = []
+
+    for arg in args:
+      typeanno = None
+      if arg.annotation is not EMPTY:
+        value, source = arg.annotation
+        if source is not None:
+          typeanno = self.preprocess(source, value)
+
+      if typeanno:
+        args_text_repr.append(f'{arg.name}: {typeanno}')
+      else:
+        args_text_repr.append(f'{arg.name}')
+
+    return args_text_repr
+
+  def format_kwargs(self, kwargs: List[inspect.Parameter]) -> List[str]:
+    """Creates a text representation of the kwargs in a method/function.
+
+    Args:
+      kwargs: List of kwargs to format.
+
+    Returns:
+      Formatted kwargs with type annotations if they exist.
+    """
+
+    kwargs_text_repr = []
+
+    for kwarg in kwargs:
+      default_text = None
+      if kwarg.default is not EMPTY:
+        default_val, default_source = kwarg.default
+        if default_source is None:
+          default_source = strip_obj_addresses(repr(default_val))
+        default_source = html.escape(default_source)
+
+        default_text = self.maybe_add_link(default_source, default_val)
+
+      # Format the kwargs to add the type annotation and default values.
+      typeanno = None
+      if kwarg.annotation is not EMPTY:
+        anno_value, anno_source = kwarg.annotation
+        if anno_source is not None:
+          typeanno = self.preprocess(anno_source, anno_value)
+
+      if typeanno is not None and default_text is not None:
+        kwargs_text_repr.append(f'{kwarg.name}: {typeanno} = {default_text}')
+      elif default_text is not None:
+        kwargs_text_repr.append(f'{kwarg.name}={default_text}')
+      elif typeanno is not None:
+        kwargs_text_repr.append(f'{kwarg.name}: {typeanno}')
+      else:
+        kwargs_text_repr.append(kwarg.name)
+
+    return kwargs_text_repr
+
+
+class TfSignature(inspect.Signature):
+  """A custom version of `inspect.Signature`."""
+
+  def __init__(self, parameters, *, return_annotation, parser_config):
+    super().__init__(parameters, return_annotation=return_annotation)  # pytype: disable=wrong-arg-types  # mapping-is-not-sequence
+    self.parser_config = parser_config
+
+  def replace(self, **kwargs):
+    attrs = {
+        'parameters': self.parameters,
+        'return_annotation': self.return_annotation,
+        'parser_config': self.parser_config,
+    }
+    attrs.update(kwargs)
+    return type(self)(**attrs)
+
+  def __copy__(self):
+    return TfSignature(
+        list(self.parameters.values()),
+        return_annotation=self.return_annotation,
+        parser_config=self.parser_config)
+
+  def __deepcopy__(self, memo):
+    return TfSignature(
+        copy.deepcopy(list(self.parameters.values()), memo),
+        return_annotation=copy.deepcopy(self.return_annotation, memo),
+        parser_config=copy.deepcopy(self.parser_config, memo))
+
+  def __str__(self):
+    # separate the args by type
+    pos_only_args = []
+    args = []
+    kwargs = []
+    only_kwargs = []
+    varargs = None
+    varkwargs = None
+
+    for index, param in enumerate(self.parameters.values()):
+      kind = param.kind
+      default = param.default
+
+      if kind == param.POSITIONAL_ONLY:
+        pos_only_args.append(param)
+      elif default is EMPTY and kind == param.POSITIONAL_OR_KEYWORD:
+        args.append(param)
+      elif default is not EMPTY and kind == param.POSITIONAL_OR_KEYWORD:
+        kwargs.append(param)
+      elif kind == param.VAR_POSITIONAL:
+        varargs = (index, param)
+      elif kind == param.KEYWORD_ONLY:
+        only_kwargs.append(param)
+      elif kind == param.VAR_KEYWORD:
+        varkwargs = param
+
+    # Build the text representation.
+    all_args_list = []
+
+    formatter = FormatArguments(parser_config=self.parser_config)
+
+    if pos_only_args:
+      all_args_list.extend(formatter.format_args(pos_only_args))
+      all_args_list.append('/')
+
+    if args:
+      all_args_list.extend(formatter.format_args(args))
+
+    if kwargs:
+      all_args_list.extend(formatter.format_kwargs(kwargs))
+
+    if only_kwargs:
+      if varargs is None:
+        all_args_list.append('*')
+      all_args_list.extend(formatter.format_kwargs(only_kwargs))
+
+    if varargs is not None:
+      all_args_list.insert(varargs[0], '*' + varargs[1].name)
+
+    if varkwargs is not None:
+      all_args_list.append('**' + varkwargs.name)
+
+    return_annotation_text = ''
+    if self.return_annotation is not EMPTY:
+      if EMPTY not in self.return_annotation:
+        return_annotation_text = formatter.format_return(self.return_annotation)
+
+    arguments_signature = ''
+    has_any_annotations = any(
+        v.annotation is not EMPTY for v in self.parameters.values())
+    if all_args_list:
+      str_signature = ',\n'.join(all_args_list)
+      # If it fits on one line flatten it.
+      if len(str_signature) + 4 < 80:
+        str_signature = textwrap.fill(str_signature, width=80)
+
+      arguments_signature = '\n' + textwrap.indent(
+          str_signature, prefix='    ') + '\n'
+
+    full_signature = f'({arguments_signature})'
+    if return_annotation_text:
+      full_signature = f'({arguments_signature}) -> {return_annotation_text}'
+    else:
+      full_signature = f'({arguments_signature})'
+    return full_signature
+
+
+class FuncType(enum.Enum):
+  """Enum to recognize type of function passed to `generate_signature`."""
+  FUNCTION = 'function'
+  METHOD = 'method'
+  CLASSMETHOD = 'classmethod'
+
+
+def get_method_type(method, name, is_dataclass):
+  """Determine the type of callable."""
+  if isinstance(method, classmethod):
+    func_type = FuncType.CLASSMETHOD
+  elif name == '__new__':
+    # __new__ acts like a regular method for this.
+    # - At this point all args are visible in the signature.
+    # - When used the first argument gets boound (like self).
+    # - Sometimes users wrap it with a `staticmethod` but that gets ignored.
+    func_type = FuncType.METHOD
+  elif isinstance(method, staticmethod):
+    func_type = FuncType.FUNCTION
+  elif is_dataclass:
+    # When building the init signature directly from a dataclass-class (for
+    # the auto-generated __init__) `self` is already removed from the
+    # signature.
+    func_type = FuncType.FUNCTION
+  else:
+    func_type = FuncType.METHOD
+  return func_type
+
+
+def generate_signature(
+    func: Any,
+    parser_config: config.ParserConfig,
+    func_type: FuncType = FuncType.FUNCTION,
+) -> TfSignature:
+  """Given a function, returns a list of strings representing its args.
+
+  This function uses `__name__` for callables if it is available. This can lead
+  to poor results for functools.partial and other callable objects.
+
+  The returned string is Python code, so if it is included in a Markdown
+  document, it should be typeset as code (using backticks), or escaped.
+
+  Args:
+    func: A function, method, or functools.partial to extract the signature for.
+    parser_config: `config.ParserConfig` for the method/function whose signature
+      is generated.
+    func_type: Type of the current `func`. This is required because there isn't
+      a clear distinction between function and method being passed to
+      `generate_signature`. Sometimes methods are detected as function by
+      `inspect`. Since we know the type of `func` when generate_signature is
+      called, use that to pass the type of `func`.
+
+  Returns:
+    A `SignatureComponents` NamedTuple.
+  """
+  try:
+    sig = inspect.signature(func)
+  except (ValueError, TypeError):
+    sig = inspect.signature(lambda: None)
+
+  params = list(sig.parameters.values())
+
+  # Drop `self`
+  if params:
+    first = params[0]
+    if first.kind != first.VAR_POSITIONAL:
+      if func_type == FuncType.METHOD:
+        # - Skip the first arg for regular methods.
+        # - Some wrapper methods forget `self` and just use `(*args, **kwargs)`.
+        #   That's still valid, don't drop `*args`.
+        # - For classmethods the `cls` arg already bound here (it's not in
+        #   `params`).
+        # - For regular functions (or staticmethods) you never need to skip.
+        params.pop(0)
+
+  sig = sig.replace(parameters=params)
+
+  if dataclasses.is_dataclass(func) and inspect.isclass(func):
+    sig = sig.replace(return_annotation=EMPTY)
+    extract_fn = _extract_class_defaults_and_annotations
+  else:
+    extract_fn = _extract_arg_defaults_and_annotations
+
+  (annotation_source_dict, defaults_source_dict,
+   return_annotation_source) = extract_fn(func)
+
+  # Replace everything with either `EMPTY` or (value, source) pairs.
+  new_params = []
+  for name, param in sig.parameters.items():
+    default = param.default
+    if default is not EMPTY:
+      default = (default, defaults_source_dict.get(name, None))
+
+    annotation = param.annotation
+    if annotation is not EMPTY:
+      annotation = (annotation, annotation_source_dict.get(name, None))
+
+    param = param.replace(default=default, annotation=annotation)
+    new_params.append(param)
+
+  return_annotation = sig.return_annotation
+  if return_annotation is not EMPTY:
+    return_annotation = (return_annotation, return_annotation_source)
+
+  sig = TfSignature(
+      parameters=new_params,
+      return_annotation=return_annotation,
+      parser_config=parser_config)
+
+  return sig
+
+
+AnnotsDefaultsReturns = Tuple[Dict[str, str], Dict[str, str], Any]
+
+
+def _extract_class_defaults_and_annotations(
+    cls: Type[object]) -> AnnotsDefaultsReturns:
+  """Extract ast defaults and annotations form a dataclass."""
+  ast_visitor = _ClassDefaultAndAnnotationExtractor()
+  ast_visitor.extract(cls)
+
+  return (ast_visitor.annotations, ast_visitor.defaults,
+          ast_visitor.return_annotation)
+
+
+def _extract_arg_defaults_and_annotations(
+    func: Callable[..., Any]) -> AnnotsDefaultsReturns:
+  """Extract ast defaults and annotations form a standard callable."""
+
+  ast_visitor = _ArgDefaultAndAnnotationExtractor()
+
+  annotation_source_dict = {}
+  defaults_source_dict = {}
+  return_annotation_source = EMPTY
+
+  try:
+    # Extract the type annotation from the parsed ast.
+    ast_visitor.extract(func)
+  except Exception:  # pylint: disable=broad-except
+    # A wide-variety of errors can be thrown here.
+    pass
+  else:
+    annotation_source_dict = ast_visitor.annotations
+    defaults_source_dict = ast_visitor.defaults
+    return_annotation_source = ast_visitor.return_annotation
+
+  return annotation_source_dict, defaults_source_dict, return_annotation_source
+
+
+def extract_decorators(func: Any) -> List[str]:
+  """Extracts the decorators on top of functions/methods.
+
+  Args:
+    func: The function to extract the decorators from.
+
+  Returns:
+    A List of decorators.
+  """
+
+  class ASTDecoratorExtractor(ast.NodeVisitor):
+
+    def __init__(self):
+      self.decorator_list = []
+
+    def visit_FunctionDef(self, node):  # pylint: disable=invalid-name
+      for dec in node.decorator_list:
+        self.decorator_list.append(_source_from_ast(dec))
+
+  visitor = ASTDecoratorExtractor()
+
+  # Note: get_source doesn't include the decorator lines on classes,
+  # this won't work for classes until that's fixed.
+  func_ast = get_source.get_ast(func)
+  if func_ast is not None:
+    visitor.visit(func_ast)
+
+  return visitor.decorator_list
diff --git a/tools/tensorflow_docs/api_generator/signature_test.py b/tools/tensorflow_docs/api_generator/signature_test.py
new file mode 100644
index 00000000000..e988da28cc0
--- /dev/null
+++ b/tools/tensorflow_docs/api_generator/signature_test.py
@@ -0,0 +1,471 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for documentation parser."""
+
+import dataclasses
+import textwrap
+import types
+
+from typing import Callable, Dict, List, Optional, Union
+
+from absl.testing import absltest
+from absl.testing import parameterized
+
+from tensorflow_docs.api_generator import config
+from tensorflow_docs.api_generator import doc_generator_visitor
+from tensorflow_docs.api_generator import generate_lib
+from tensorflow_docs.api_generator import parser
+from tensorflow_docs.api_generator import reference_resolver as reference_resolver_lib
+from tensorflow_docs.api_generator import signature
+from tensorflow_docs.api_generator.pretty_docs import class_page
+from tensorflow_docs.api_generator.pretty_docs import type_alias_page
+
+
+@dataclasses.dataclass
+class ExampleDataclass:
+  x: List[str]
+  z: int
+  c: List[int] = dataclasses.field(default_factory=list)
+  a: Union[List[str], str, int] = None
+  b: str = 'test'
+  y: bool = False
+
+  def add(self, x: int, y: int) -> int:
+    q: int = x + y
+    return q
+
+
+class TestGenerateSignature(parameterized.TestCase, absltest.TestCase):
+
+  def setUp(self):
+    super().setUp()
+    self.known_object = object()
+
+    m = types.ModuleType('m')
+    m.__file__ = __file__
+    m.extract_decorators = signature.extract_decorators
+    m.submodule = types.ModuleType('submodule')
+    m.submodule.known = self.known_object
+
+    generator = generate_lib.DocGenerator(
+        root_title='test',
+        py_modules=[('m', m)],
+        code_url_prefix='https://tensorflow.org')
+
+    self.parser_config = generator.run_extraction()
+
+
+  def test_known_object(self):
+
+    def example_fun(arg=self.known_object):  # pylint: disable=unused-argument
+      pass
+
+    self.parser_config.reference_resolver = (
+        self.parser_config.reference_resolver.with_prefix('/'))
+
+    sig = signature.generate_signature(
+        example_fun,
+        parser_config=self.parser_config,
+        func_type=signature.FuncType.FUNCTION)
+
+    expected = textwrap.dedent("""\
+        (
+            arg=<a href="/m/submodule.md#known"><code>m.submodule.known</code></a>
+        )""")
+
+    self.assertEqual(expected, str(sig))
+
+  def test_literals(self):
+
+    def example_fun(
+        self,
+        cls,
+        a=5,
+        b=5.0,
+        c=None,
+        d=True,
+        e='hello',
+        f=(1, (2, 3)),
+    ):  # pylint: disable=g-bad-name, unused-argument
+      pass
+
+    sig = signature.generate_signature(
+        example_fun,
+        parser_config=self.parser_config,
+        func_type=signature.FuncType.FUNCTION)
+
+    expected = textwrap.dedent("""\
+        (
+            self, cls, a=5, b=5.0, c=None, d=True, e=&#x27;hello&#x27;, f=(1, (2, 3))
+        )""")
+    self.assertEqual(expected, str(sig))
+
+  def test_dotted_name(self):
+    # pylint: disable=g-bad-name
+
+    class a:
+
+      class b:
+
+        class c:
+
+          class d:
+
+            def __init__(self, *args):
+              pass
+
+    # pylint: enable=g-bad-name
+
+    e = {'f': 1}
+
+    def example_fun(arg1=a.b.c.d, arg2=a.b.c.d(1, 2), arg3=e['f']):  # pylint: disable=unused-argument
+      pass
+
+    sig = signature.generate_signature(
+        example_fun,
+        parser_config=self.parser_config,
+        func_type=signature.FuncType.FUNCTION)
+    expected = ('(\n    arg1=a.b.c.d, arg2=a.b.c.d(1, 2), '
+                'arg3=e[&#x27;f&#x27;]\n)')
+    self.assertEqual(expected, str(sig))
+
+  def test_compulsory_kwargs_without_defaults(self):
+
+    def example_fun(x, z, a=True, b='test', *, c, y=None, d, **kwargs) -> bool:  # pylint: disable=unused-argument
+      return True
+
+    sig = signature.generate_signature(
+        example_fun,
+        parser_config=self.parser_config,
+        func_type=signature.FuncType.FUNCTION)
+    self.assertEqual(
+        list(sig.parameters.keys()),
+        ['x', 'z', 'a', 'b', 'c', 'y', 'd', 'kwargs'])
+    expected = textwrap.dedent("""\
+    (
+        x, z, a=True, b=&#x27;test&#x27;, *, c, y=None, d, **kwargs
+    ) -> bool""")
+    self.assertEqual(expected, str(sig))
+
+  def test_compulsory_kwargs_without_defaults_with_args(self):
+
+    def example_fun(x, z, cls, *args, a=True, b='test', y=None, c, **kwargs):  # pylint: disable=unused-argument
+      return True
+
+    sig = signature.generate_signature(
+        example_fun,
+        parser_config=self.parser_config,
+        func_type=signature.FuncType.FUNCTION)
+    self.assertEqual(
+        list(sig.parameters.keys()),
+        ['x', 'z', 'cls', 'args', 'a', 'b', 'y', 'c', 'kwargs'])
+    self.assertEqual(
+        str(sig),
+        '(\n    x, z, cls, *args, a=True, b=&#x27;test&#x27;, y=None, c, **kwargs\n)'
+    )
+
+  def test_type_annotations(self):
+    # pylint: disable=unused-argument
+
+    class TestMethodSig:
+
+      def example_fun(self,
+                      x: List[str],
+                      z: int,
+                      a: Union[List[str], str, int] = None,
+                      b: str = 'test',
+                      *,
+                      y: bool = False,
+                      c: Callable[..., int],
+                      **kwargs) -> None:
+        pass
+
+    # pylint: enable=unused-argument
+
+    sig = signature.generate_signature(
+        TestMethodSig.example_fun,
+        parser_config=self.parser_config,
+        func_type=signature.FuncType.METHOD,
+    )
+    expected = textwrap.dedent("""\
+        (
+            x: List[str],
+            z: int,
+            a: Union[List[str], str, int] = None,
+            b: str = &#x27;test&#x27;,
+            *,
+            y: bool = False,
+            c: Callable[..., int],
+            **kwargs
+        ) -> None""")
+    self.assertEqual(expected, str(sig))
+
+  def test_dataclasses_type_annotations(self):
+
+    sig = signature.generate_signature(
+        ExampleDataclass,
+        parser_config=self.parser_config,
+        func_type=signature.FuncType.FUNCTION)
+
+    expected = textwrap.dedent("""\
+      (
+          x: List[str],
+          z: int,
+          c: List[int] = dataclasses.field(default_factory=list),
+          a: Union[List[str], str, int] = None,
+          b: str = &#x27;test&#x27;,
+          y: bool = False
+      )""")
+    self.assertEqual(expected, str(sig))
+
+  @parameterized.named_parameters(
+      ('deep_objects', Union[Dict[str, Dict[bool, signature.extract_decorators]],
+                             int, bool, signature.extract_decorators,
+                             List[Dict[int, signature.extract_decorators]]],
+       textwrap.dedent("""\
+        Union[
+            dict[str, dict[bool, <a href="../../../m/extract_decorators.md"><code>m.extract_decorators</code></a>]],
+            int,
+            bool,
+            <a href="../../../m/extract_decorators.md"><code>m.extract_decorators</code></a>,
+            list[dict[int, <a href="../../../m/extract_decorators.md"><code>m.extract_decorators</code></a>]]
+        ]""")),
+      ('callable_ellipsis_sig', Union[Callable[..., int], str],
+       textwrap.dedent("""\
+        Union[
+            Callable[..., int],
+            str
+        ]""")),
+      ('callable_args_sig', Union[Callable[[bool, signature.extract_decorators],
+                                           float], int],
+       textwrap.dedent("""\
+        Union[
+            Callable[[bool, <a href="../../../m/extract_decorators.md"><code>m.extract_decorators</code></a>], float],
+            int
+        ]""")),
+      ('callable_without_args', Union[None, dict, str, Callable],
+       textwrap.dedent("""\
+        Union[
+            NoneType,
+            dict,
+            str,
+            Callable
+        ]""")),
+  )  # pyformat: disable
+  def test_type_alias_signature(self, alias, expected_sig):
+    api_node = doc_generator_visitor.ApiTreeNode(
+        path=tuple('tfdocs.api_generator.generate_lib.DocGenerator'.split('.')),
+        py_object=alias)
+    info_obj = type_alias_page.TypeAliasPageInfo(
+        api_node=api_node, parser_config=self.parser_config)
+    with self.parser_config.reference_resolver.temp_prefix('../../..'):
+      info_obj.collect_docs()
+      self.assertEqual(info_obj.signature, expected_sig)
+
+  def _setup_class_info(self, cls):
+    self.known_object = object()
+
+    x = types.ModuleType('x')
+    x.__file__ = __file__
+    x.Cls = cls
+
+    generator = generate_lib.DocGenerator(
+        root_title='test',
+        py_modules=[('x', x)],
+        code_url_prefix='https://tensorflow.org')
+
+    parser_config = generator.run_extraction()
+    parser_config.reference_resolver = (
+        parser_config.reference_resolver.with_prefix('/'))
+
+    api_node = parser_config.api_tree['x', 'Cls']
+    info = class_page.ClassPageInfo(
+        api_node=api_node, parser_config=parser_config)
+    info._doc = parser.DocstringInfo('doc', ['doc'], {})
+    info.collect_docs()
+
+    return info
+
+  def test_signature_method_wrong_self_name(self):
+
+    # Calling these classes all `Cls` confuses get_source, you need to
+    # use unique names.
+    class Cls1:
+
+      def method(x):  # pylint: disable=no-self-argument
+        pass
+
+    info = self._setup_class_info(Cls1)
+    self.assertEqual('()', str(info.methods[0].signature))
+
+  def test_signature_method_star_args(self):
+
+    class Cls2:
+
+      def method(*args):  # pylint: disable=no-method-argument
+        pass
+
+    info = self._setup_class_info(Cls2)
+    self.assertEqual('(\n    *args\n)', str(info.methods[0].signature))
+
+  def test_signature_classmethod_wrong_cls_name(self):
+
+    class Cls3:
+
+      @classmethod
+      def method(x):  # pylint: disable=bad-classmethod-argument
+        pass
+
+    info = self._setup_class_info(Cls3)
+    self.assertEqual('()', str(info.methods[0].signature))
+
+  def test_signature_staticmethod(self):
+
+    class Cls4:
+
+      @staticmethod
+      def method(x):
+        pass
+
+    info = self._setup_class_info(Cls4)
+    self.assertEqual('(\n    x\n)', str(info.methods[0].signature))
+
+  def test_signature_new(self):
+
+    class Cls5:
+
+      def __new__(x):  # pylint: disable=bad-classmethod-argument
+        pass
+
+    info = self._setup_class_info(Cls5)
+    self.assertEqual('()', str(info.methods[0].signature))
+
+  def test_signature_dataclass_auto_init(self):
+
+    @dataclasses.dataclass
+    class Cls6:
+      a: Optional[int]
+      b: Optional[str]
+
+    info = self._setup_class_info(Cls6)
+    builder = info.DEFAULT_BUILDER_CLASS(info)
+
+    self.assertEqual('(\n    a: Optional[int], b: Optional[str]\n)',
+                     str(builder.methods.constructor.signature))
+
+  def test_signature_dataclass_custom_init(self):
+
+    @dataclasses.dataclass(init=False)
+    class Cls7:
+      a: Optional[int]
+      b: Optional[str]
+
+      def __init__(self, x: Optional[Union[int, str]]):
+        self.a = int(x)
+        self.b = str(x)
+
+    info = self._setup_class_info(Cls7)
+    builder = info.DEFAULT_BUILDER_CLASS(info)
+    self.assertEqual('(\n    x: Optional[Union[int, str]]\n)',
+                     str(builder.methods.constructor.signature))
+
+  def test_dataclass_default_uses_ast_repr(self):
+
+    @dataclasses.dataclass
+    class MyClass:
+      a: float = 1 / 9
+
+    sig = signature.generate_signature(
+        MyClass, parser_config=self.parser_config)
+
+    expected = '(\n    a: float = (1 / 9)\n)'
+    self.assertEqual(expected, str(sig))
+
+  def test_dataclass_inheritance_sees_parent(self):
+    const = 3.14159
+
+    @dataclasses.dataclass
+    class Parent:
+      a: int = 60 * 60
+      b: float = 1 / 9
+
+    @dataclasses.dataclass
+    class Child(Parent):
+      b: float = 2 / 9
+      c: float = const
+
+    sig = signature.generate_signature(Child, parser_config=self.parser_config)
+    expected = textwrap.dedent("""\
+        (
+            a: int = (60 * 60), b: float = (2 / 9), c: float = const
+        )""")
+    self.assertEqual(expected, str(sig))
+
+  def test_extract_non_annotated(self):
+
+    const = 1234
+
+    class A:
+      a = 60 * 60
+      b = 1 / 9
+
+    class B(A):
+      b = 2 / 9
+      c = const
+
+    ast_extractor = signature._ClassDefaultAndAnnotationExtractor()
+    ast_extractor.extract(B)
+
+    self.assertEqual({
+        'a': '(60 * 60)',
+        'b': '(2 / 9)',
+        'c': 'const'
+    }, ast_extractor.defaults)
+
+
+  def test_vararg_before_kwargonly_consistent_order(self):
+
+    def my_fun(*args, a=1, **kwargs):  # pylint: disable=unused-argument
+      pass
+
+    sig = signature.generate_signature(my_fun, parser_config=self.parser_config)
+    expected = '(\n    *args, a=1, **kwargs\n)'
+    self.assertEqual(expected, str(sig))
+
+  def test_class_vararg_before_kwargonly_consistent_order(self):
+
+    class MyClass:
+
+      def __init__(*args, a=1, **kwargs):  # pylint: disable=no-method-argument
+        pass
+
+    sig = signature.generate_signature(
+        MyClass, parser_config=self.parser_config)
+    expected = '(\n    *args, a=1, **kwargs\n)'
+    self.assertEqual(expected, str(sig))
+
+  def test_strip_address(self):
+
+    class What:
+      pass
+
+    w = What()
+
+    expected = ('<__main__.TestGenerateSignature.test_strip_address.'
+                '<locals>.What object>')
+    self.assertEqual(expected, signature.strip_obj_addresses(str(w)))
+
+if __name__ == '__main__':
+  absltest.main()
diff --git a/tools/tensorflow_docs/api_generator/test_module1.py b/tools/tensorflow_docs/api_generator/test_module1.py
deleted file mode 100644
index b48f4bc99eb..00000000000
--- a/tools/tensorflow_docs/api_generator/test_module1.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Lint as: python3
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""A module target for TraverseTest.test_module."""
-from tensorflow_docs.api_generator import test_module2
-
-
-class ModuleClass1(object):
-
-  def __init__(self):
-    self._m2 = test_module2.ModuleClass2()
-
-  def __model_class1_method__(self):
-    pass
diff --git a/tools/tensorflow_docs/api_generator/toc.py b/tools/tensorflow_docs/api_generator/toc.py
new file mode 100644
index 00000000000..feaa15b8bda
--- /dev/null
+++ b/tools/tensorflow_docs/api_generator/toc.py
@@ -0,0 +1,350 @@
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Classes for generating the TOC."""
+
+import contextlib
+import dataclasses
+import enum
+import os
+import pathlib
+
+from typing import Any, IO, Iterator, List, Optional, Tuple, Union
+
+from tensorflow_docs.api_generator import doc_controls
+from tensorflow_docs.api_generator import doc_generator_visitor
+from tensorflow_docs.api_generator import obj_type as obj_type_lib
+from tensorflow_docs.api_generator import signature
+
+import yaml
+
+
+class _TocDumper(yaml.SafeDumper):
+
+  def ignore_aliases(self, data):
+    """Don't output references for duplicated objects (usually strings)."""
+    return True
+
+
+def _dict_representer(dumper: yaml.SafeDumper, data: Any):
+  """Represent the object as a dict created of (key, value) pairs."""
+  return dumper.represent_dict(iter(data))
+
+
+def _use_yaml_dict_representer(cls):
+  """Register the class's as using a `_dict_representer`."""
+  yaml.add_representer(cls, representer=_dict_representer, Dumper=_TocDumper)
+  return cls
+
+
+def _str_enum_representer(dumper: yaml.SafeDumper, data: Any):
+  """Represent a str-Enum as a string."""
+  return dumper.represent_str(data.value)
+
+
+def _use_yaml_str_enum_representer(cls):
+  """Register the class as using `_str_enum_representer`."""
+  yaml.add_representer(
+      cls, representer=_str_enum_representer, Dumper=_TocDumper)
+  return cls
+
+
+@_use_yaml_str_enum_representer
+class Status(enum.Enum):
+  """Represents a page status."""
+  ALPHA = 'alpha'
+  BETA = 'beta'
+  DEPRECATED = 'deprecated'
+  EXPERIMENTAL = 'experimental'
+  EXTERNAL = 'external'
+  LIMITED = 'limited'
+  NEW = 'new'
+  NIGHTLY = 'nightly'
+  PREVIEW = 'preview'
+  UNSUPPORTED = 'unsupported'
+
+
+@_use_yaml_str_enum_representer
+class HeadingStyle(enum.Enum):
+  """Represents a Heading Style."""
+  ACCORDION = 'accordion'
+  DIVIDER = 'divider'
+
+
+class Entry:
+  """Base class for toc entries."""
+
+  def replace(self, **kwargs):
+    new_kwargs = dict(self)
+    new_kwargs.update(kwargs)
+    return type(self)(**new_kwargs)
+
+  def __iter__(self) -> Iterator[Tuple[str, Any]]:
+    """Support `dict(entry)` for yaml output."""
+    for key, value in self.__dict__.items():
+      if value is not None:
+        yield (key, value)
+
+
+@_use_yaml_dict_representer
+@dataclasses.dataclass
+class Heading(Entry):
+  """A toc heading."""
+  heading: str
+  style: Optional[HeadingStyle] = None
+
+
+@_use_yaml_dict_representer
+@dataclasses.dataclass
+class Section(Entry):
+  """A toc section."""
+  title: str
+  section: List[Entry]
+  status: Optional[Status] = None
+
+  def __iter__(self) -> Iterator[Tuple[str, Any]]:
+    """Support `dict(entry)` for yaml output."""
+    yield 'title', self.title
+    if self.status is not None:
+      yield 'status', self.status
+    yield 'section', self.section
+
+
+@_use_yaml_dict_representer
+@dataclasses.dataclass
+class Link(Entry):
+  """Represents toc page-link."""
+  title: str
+  path: str
+  status: Optional[Status] = None
+
+  def __iter__(self) -> Iterator[Tuple[str, Any]]:
+    """Support `dict(entry)` for yaml output."""
+    yield 'title', self.title
+    if self.status is not None:
+      yield 'status', self.status
+    yield 'path', self.path
+
+
+@_use_yaml_dict_representer
+class Break(Entry):
+  """Represents a toc whitesoace break."""
+
+  def __init__(self):
+    self.__dict__['break'] = True
+
+
+@_use_yaml_dict_representer
+@dataclasses.dataclass
+class Toc(Entry):
+  """Represents the top-level `toc` element in included files."""
+  toc: List[Entry]
+
+  @contextlib.contextmanager
+  def _maybe_open(self, file: Union[os.PathLike, IO[str]]) -> Iterator[IO[str]]:
+    if isinstance(file, os.PathLike):
+      with open(file, 'w') as stream:
+        yield stream
+    else:
+      stream = file
+      yield stream
+
+  def write(self, file: Union[os.PathLike, IO[str]]) -> None:
+    with self._maybe_open(file) as stream:
+      yaml.dump(
+          self, stream=stream, default_flow_style=False, Dumper=_TocDumper)
+
+
+class TocBuilder:
+  """A class to build a Toc from an ApiTree."""
+
+  def __init__(self, site_path):
+    self.site_path = pathlib.Path(site_path)
+
+  def build(self, api_tree: doc_generator_visitor.ApiTree) -> Toc:
+    """Create a `Toc` from an `ApiTree`."""
+    entries = []
+    for child in api_tree.root.children.values():
+      entries.extend(self._entries_from_api_node(child))
+    return Toc(toc=entries)
+
+  def _entries_from_api_node(
+      self, api_node: doc_generator_visitor.ApiTreeNode) -> List[Entry]:
+
+    """Converts an ApiTreeNode to a list of toc entries."""
+    obj_type = api_node.obj_type
+
+    if obj_type is obj_type_lib.ObjType.MODULE:
+      return [self._make_section(api_node)]
+    if obj_type is obj_type_lib.ObjType.CLASS:
+      return self._flat_class_entries(api_node)
+    if obj_type in [
+        obj_type_lib.ObjType.CALLABLE, obj_type_lib.ObjType.TYPE_ALIAS
+    ]:
+      return [self._make_link(api_node)]
+    else:
+      return []
+
+  def _get_docpath(self, api_path) -> pathlib.Path:
+    api_path = (p.replace('.', '/') for p in api_path)
+    return pathlib.Path(self.site_path, *api_path)
+
+  def _make_link(self,
+                 api_node: doc_generator_visitor.ApiTreeNode,
+                 title: Optional[str] = None) -> Link:
+
+    docpath = self._get_docpath(api_path=api_node.path)
+    title = title or api_node.short_name
+    return Link(
+        title=title, path=str(docpath), status=self._make_status(api_node))
+
+  def _make_section(self,
+                    api_node: doc_generator_visitor.ApiTreeNode,
+                    title: Optional[str] = None) -> Section:
+    """Create a `toc.Section` from a module's ApiTreeNode."""
+    overview = self._make_overview(api_node)
+    entries = []
+    for child in api_node.children.values():
+      entries.extend(self._entries_from_api_node(child))
+    entries = sorted(entries, key=self._section_order_key)
+    entries = [overview] + entries
+
+    status = self._make_status(api_node)
+    return Section(
+        title=title or api_node.short_name, section=entries, status=status)
+
+  def _make_overview(self, api_node: doc_generator_visitor.ApiTreeNode):
+    docpath = self._get_docpath(api_path=api_node.path)
+    return Link(title='Overview', path=str(docpath))
+
+  def _section_order_key(self, entry: Entry) -> Tuple[bool, str]:
+    title = getattr(entry, 'title', None)
+    is_section = isinstance(entry, Section)
+
+    return (is_section, title)
+
+  def _flat_class_entries(self,
+                          api_node: doc_generator_visitor.ApiTreeNode,
+                          title: Optional[str] = None) -> List[Entry]:
+    """Returns entries for both `Class` and `Class.Nested`."""
+    title = title or api_node.short_name
+    entries = [self._make_link(api_node, title=title)]
+    for name, child_node in api_node.children.items():
+      if child_node.obj_type in [
+          obj_type_lib.ObjType.CLASS, obj_type_lib.ObjType.MODULE
+      ]:
+        subtitle = f'{title}.{name}'
+        entries.extend(self._flat_class_entries(child_node, title=subtitle))
+
+    return entries
+
+  def _make_status(self, api_node: doc_generator_visitor.ApiTreeNode):
+    """Returns the toc.Status of an ApiTreeNode."""
+    if self._is_deprecated(api_node):
+      return Status.DEPRECATED
+    if self._is_experimental(api_node):
+      return Status.EXPERIMENTAL
+    return None
+
+  def _is_experimental(self, api_node: doc_generator_visitor.ApiTreeNode):
+    return 'experimental' in api_node.short_name.lower()
+
+  def _is_deprecated(self, api_node: doc_generator_visitor.ApiTreeNode):
+    """Checks if an object is deprecated or not.
+
+    Each deprecated function has a `_tf_decorator.decorator_name` attribute.
+    Check the docstring of that function to confirm if the function was
+    indeed deprecated. If a different deprecation setting was used on the
+    function, then "THIS FUNCTION IS DEPRECATED" substring won't be inserted
+    into the docstring of that function by the decorator.
+
+    Args:
+      api_node: The node to evaluate.
+
+    Returns:
+      True if deprecated else False.
+    """
+    if doc_controls.is_deprecated(api_node.py_object):
+      return True
+
+    decorator_list = signature.extract_decorators(api_node.py_object)
+    if any('deprecat' in dec for dec in decorator_list):
+      docstring = getattr(api_node.py_object, '__doc__') or ''
+      return 'THIS FUNCTION IS DEPRECATED' in docstring
+
+    return False
+
+
+class FlatModulesTocBuilder(TocBuilder):
+  """Builds a toc where the top level submodules are peers (not children).
+
+  The base TocBuilder does this:
+
+  ```
+  module:
+    thing1
+    sub1:
+      thing2
+    sub2:
+      thing3
+  ```
+
+  This one outputs:
+
+  ```
+  module:
+    thing1
+  module.sub1:
+    thing2
+  module.sub2:
+    thing3
+  ```
+  """
+
+  def build(self, api_tree: doc_generator_visitor.ApiTree) -> Toc:
+    entries = []
+    for module_node in api_tree.root.children.values():
+      if '.' in module_node.short_name:
+        entries.extend(self._entries_from_api_node(module_node))
+      else:
+        assert module_node.obj_type is obj_type_lib.ObjType.MODULE
+        entries.extend(self._flat_module_entries(module_node))
+
+    return Toc(toc=entries)
+
+  def _flat_module_entries(self,
+                           api_node: doc_generator_visitor.ApiTreeNode,
+                           title: Optional[str] = None) -> List[Section]:
+    """For top-level modules, place the submodules as peers."""
+    title = title or api_node.short_name
+
+    overview = self._make_link(api_node, title='Overview')
+    entries = []
+    submodule_sections = []
+    for name, child_node in api_node.children.items():
+      if child_node.obj_type is obj_type_lib.ObjType.MODULE:
+        subtitle = f'{title}.{name}'
+        submodule_sections.append(
+            self._make_section(child_node, title=subtitle))
+      else:
+        entries.extend(self._entries_from_api_node(child_node))
+
+    entries = sorted(entries, key=self._section_order_key)
+    entries.insert(0, overview)
+
+    submodule_sections = sorted(submodule_sections, key=self._section_order_key)
+
+    status = self._make_status(api_node)
+    module_section = Section(title=title, section=entries, status=status)
+    return [module_section] + submodule_sections
diff --git a/tools/tensorflow_docs/api_generator/gen_java/processing.py b/tools/tensorflow_docs/api_generator/toc_processing.py
similarity index 52%
rename from tools/tensorflow_docs/api_generator/gen_java/processing.py
rename to tools/tensorflow_docs/api_generator/toc_processing.py
index fed6ee0724c..fbe0c3cf842 100644
--- a/tools/tensorflow_docs/api_generator/gen_java/processing.py
+++ b/tools/tensorflow_docs/api_generator/toc_processing.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,9 +13,14 @@
 # limitations under the License.
 # ==============================================================================
 """Tools for processing generated Java documentation."""
-from typing import Any, Iterable, Mapping, Sequence
+import itertools
+from typing import Any, Iterable, Mapping, MutableMapping, MutableSequence
 
-Toc = Mapping[str, Sequence[Mapping[str, Any]]]
+# TODO(b/193033225): If possible, this should be a TypedDict. If not, using the
+#   real protos might make things a little cleaner.
+TocEntry = MutableMapping[str, Any]
+Section = MutableSequence[TocEntry]
+Toc = Mapping[str, Section]
 
 
 def add_package_headings(toc: Toc, root_pkgs: Iterable[str],
@@ -30,8 +34,16 @@ def add_package_headings(toc: Toc, root_pkgs: Iterable[str],
       if new_entry.get('title', '').startswith(root_pkg):
         # Strip the common root_pkg from the title.
         new_title = new_entry['title'][len(root_pkg):].lstrip('.')
-        # The section label is the first sub-package (this.notthis.orthis)
-        section, *_ = new_title.split('.')
+        # a, a.b, a.b.c from the remainder of the package name
+        all_sections = itertools.accumulate(
+            new_title.split('.'), lambda a, b: f'{a}.{b}')
+        # Filter out any packages that aren't defined as labelled sections.
+        candidate_sections = filter(lambda s: f'{root_pkg}.{s}' in labels,
+                                    all_sections)
+        # If there are more than one, pick the most specific (longest). If there
+        # are none, use the bare trailing package.
+        section = max(candidate_sections, key=len, default=new_title)
+
         if section != current_section:
           # We've hit a new section, add a label if one was supplied.
           section_pkg = f'{root_pkg}.{section}' if section else root_pkg
@@ -43,6 +55,48 @@ def add_package_headings(toc: Toc, root_pkgs: Iterable[str],
   return {'toc': new_toc}
 
 
+def nest_toc(toc: Toc) -> Toc:
+  """Nests a flat TOC into a tree structure based on common packages."""
+  new_toc = []
+
+  # We only look at the first level for flat package names.
+  entries_by_title = {e['title']: e for e in toc['toc']}
+  for title, entry in entries_by_title.items():
+    target_entry = _nest_toc_entry(title, new_toc)
+
+    # Populate the target entry with the original entry, sans title.
+    # (pytype suppressed due to inferring .keys() as a List)
+    fields = entry.keys() - {'title'}  # pytype: disable=unsupported-operands
+    target_entry.update({f: entry[f] for f in fields})
+
+    # Clean up empty sections
+    if not target_entry.get('section'):
+      target_entry.pop('section', None)
+
+  return {'toc': new_toc}
+
+
+def _nest_toc_entry(title: str, section: Section) -> TocEntry:
+  """Nest the title (split by .) into the TOC. Creating hierarchy as needed."""
+  pkg, *maybe_rest = title.split('.', 1)
+
+  for entry in section:
+    if entry.get('title') == pkg:
+      target_entry = entry
+      if 'section' not in target_entry:
+        target_entry['section'] = []
+      break
+  else:
+    target_entry = {'title': pkg, 'section': []}
+    section.append(target_entry)
+
+  if not maybe_rest:
+    return target_entry
+  else:
+    rest = maybe_rest[0]
+    return _nest_toc_entry(rest, target_entry['section'])
+
+
 def sort_toc(toc: Toc, labels: Iterable[str]) -> Toc:
   """Pre-sort the TOC entries by `labels`."""
   new_toc = []
diff --git a/tools/tensorflow_docs/api_generator/gen_java/processing_test.py b/tools/tensorflow_docs/api_generator/toc_processing_test.py
similarity index 58%
rename from tools/tensorflow_docs/api_generator/gen_java/processing_test.py
rename to tools/tensorflow_docs/api_generator/toc_processing_test.py
index 7778b5d3a01..8b49c0b3728 100644
--- a/tools/tensorflow_docs/api_generator/gen_java/processing_test.py
+++ b/tools/tensorflow_docs/api_generator/toc_processing_test.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -17,7 +16,7 @@
 
 from absl.testing import absltest
 
-from tensorflow_docs.api_generator.gen_java import processing
+from tensorflow_docs.api_generator import toc_processing
 
 
 class ProcessingTest(absltest.TestCase):
@@ -55,7 +54,7 @@ def test_toc_package_sections(self):
         'org.tf': 'RootLabel',
         'org.tf.foo': 'FooPackage',
     }
-    actual_toc = processing.add_package_headings(toc_in, ['org.tf'], labels)
+    actual_toc = toc_processing.add_package_headings(toc_in, ['org.tf'], labels)
     expected_toc = {
         'toc': [
             {
@@ -109,7 +108,7 @@ def test_multiple_roots(self):
         }]
     }
     roots = ['org.tf', 'com.google']
-    actual_toc = processing.add_package_headings(toc_in, roots, {})
+    actual_toc = toc_processing.add_package_headings(toc_in, roots, {})
     expected_toc = {
         'toc': [
             {
@@ -126,7 +125,105 @@ def test_multiple_roots(self):
             },
         ]
     }
-    self.assertDictEqual(actual_toc, expected_toc)
+    self.assertEqual(actual_toc['toc'], expected_toc['toc'])
+
+  def test_overlapping_packages(self):
+    toc_in = {
+        'toc': [{
+            'title': 'org.tf.one',
+            'path': '/tf/one/docs.html',
+            'section': [{
+                'title': 'SymbolOne',
+                'path': '/tf/one/symbol.html',
+            }],
+        }, {
+            'title':
+                'org.tf.one.two',
+            'path':
+                '/tf/one/two/docs.html',
+            'section': [{
+                'title': 'SymbolOneTwo',
+                'path': '/tf/one/two/SymbolOneTwo.html',
+            }],
+        }]
+    }
+    labels = {
+        'org.tf.one': 'Section One',
+        'org.tf.one.two': 'Section Two',
+    }
+    actual_toc = toc_processing.add_package_headings(toc_in, ['org.tf'], labels)
+    expected_toc = {
+        'toc': [{
+            'heading': 'Section One',
+        }, {
+            'title': 'one',
+            'path': '/tf/one/docs.html',
+            'section': [{
+                'title': 'SymbolOne',
+                'path': '/tf/one/symbol.html',
+            }],
+        }, {
+            'heading': 'Section Two',
+        }, {
+            'title':
+                'one.two',
+            'path':
+                '/tf/one/two/docs.html',
+            'section': [{
+                'title': 'SymbolOneTwo',
+                'path': '/tf/one/two/SymbolOneTwo.html',
+            }],
+        }]
+    }
+    self.assertEqual(actual_toc['toc'], expected_toc['toc'])
+
+  def test_nesting_toc(self):
+    toc_in = {
+        'toc': [{
+            'title': 'tf_lite.support',
+            'path': '/tflite/support.html',
+        }, {
+            'title': 'tf_lite.support.cls',
+            'path': '/tflite/support/cls.html',
+        }, {
+            'title': 'tf_lite.task.things',
+            'path': '/tflite/task/things.html',
+        }, {
+            'title': 'tf_other.widgets',
+            'path': '/tfother/widgets.html',
+        }]
+    }
+    actual_toc = toc_processing.nest_toc(toc_in)
+    expected_toc = {
+        'toc': [{
+            'title':
+                'tf_lite',
+            'section': [{
+                'title':
+                    'support',
+                'path':
+                    '/tflite/support.html',
+                'section': [{
+                    'title': 'cls',
+                    'path': '/tflite/support/cls.html'
+                }]
+            }, {
+                'title':
+                    'task',
+                'section': [{
+                    'title': 'things',
+                    'path': '/tflite/task/things.html'
+                }]
+            }]
+        }, {
+            'title': 'tf_other',
+            'section': [{
+                'title': 'widgets',
+                'path': '/tfother/widgets.html'
+            }]
+        }]
+    }
+    self.assertEqual(actual_toc['toc'], expected_toc['toc'])
 
   def test_ordering(self):
     toc_in = {
@@ -146,7 +243,7 @@ def test_ordering(self):
         ]
     }
     labels = ['org.tf.b', 'com.google.c', 'org.tf']
-    actual_toc = processing.sort_toc(toc_in, labels)
+    actual_toc = toc_processing.sort_toc(toc_in, labels)
     expected_toc = {
         'toc': [
             {
diff --git a/tools/tensorflow_docs/api_generator/toc_test.py b/tools/tensorflow_docs/api_generator/toc_test.py
new file mode 100644
index 00000000000..beb7a452ec0
--- /dev/null
+++ b/tools/tensorflow_docs/api_generator/toc_test.py
@@ -0,0 +1,175 @@
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import io
+import textwrap
+import types
+
+from absl.testing import absltest
+
+from tensorflow_docs.api_generator import doc_generator_visitor
+from tensorflow_docs.api_generator import toc as toc_lib
+
+
+class TestToc(absltest.TestCase):
+
+  def test_toc_write(self):
+    link = toc_lib.Link(title='A link', path='/path/to/link')
+    link2 = toc_lib.Link(
+        title='Another link',
+        path='/path/to/link2',
+        status=toc_lib.Status.EXTERNAL)
+
+    subsection = toc_lib.Section(
+        title='A subsection',
+        section=[link],
+        status=toc_lib.Status.EXPERIMENTAL)
+
+    toc = toc_lib.Toc([
+        # pyformat: disable
+        toc_lib.Heading('Hello'),
+        link,
+        link2,
+        toc_lib.Break(),
+        subsection
+    ])
+
+    stream = io.StringIO()
+    toc.write(stream)
+
+    expected = textwrap.dedent("""\
+        toc:
+        - heading: Hello
+        - title: A link
+          path: /path/to/link
+        - title: Another link
+          status: external
+          path: /path/to/link2
+        - break: true
+        - title: A subsection
+          status: experimental
+          section:
+          - title: A link
+            path: /path/to/link
+        """)
+
+    self.assertEqual(expected, stream.getvalue())
+
+  def test_replace(self):
+    link = toc_lib.Link(title='A link', path='/path/to/link')
+    new_link = link.replace(status=toc_lib.Status.NEW, title='New title.')
+
+    expected = toc_lib.Link(
+        title='New title.', path='/path/to/link', status=toc_lib.Status.NEW)
+    self.assertEqual(expected, new_link)
+
+  def _make_tree(self) -> doc_generator_visitor.ApiTree:
+    api_tree = doc_generator_visitor.ApiTree()
+    api_tree.insert(
+        path=('module',), py_object=types.ModuleType('module'), aliases=[])
+    api_tree.insert(path=('module', 'func1'), py_object=lambda x: x, aliases=[])
+    api_tree.insert(
+        path=('module', 'Class'),
+        py_object=types.new_class('Class'),
+        aliases=[])
+    api_tree.insert(
+        path=('module', 'Class', 'method'), py_object=lambda x: x, aliases=[])
+    api_tree.insert(
+        path=('module', 'Class', 'NestedClass'),
+        py_object=types.new_class('NestedClass'),
+        aliases=[])
+    api_tree.insert(
+        path=('module', 'Class', 'NestedClass', 'method2'),
+        py_object=lambda x: x,
+        aliases=[])
+    api_tree.insert(
+        path=('module', 'Class', 'constant'),
+        py_object='Just a string.',
+        aliases=[])
+    api_tree.insert(
+        path=('module', 'submodule'),
+        py_object=types.ModuleType('submodule'),
+        aliases=[])
+    api_tree.insert(
+        path=('module', 'submodule', 'func2'),
+        py_object=lambda x: x,
+        aliases=[])
+    api_tree.insert(
+        path=('module', 'submodule', 'constant'),
+        py_object='Another string.',
+        aliases=[])
+    return api_tree
+
+  def test_toc_builder(self):
+    api_tree = self._make_tree()
+    builder = toc_lib.TocBuilder('/path/in/site')
+    toc = builder.build(api_tree)
+    stream = io.StringIO()
+    toc.write(stream)
+
+    expected = textwrap.dedent("""\
+        toc:
+        - title: module
+          section:
+          - title: Overview
+            path: /path/in/site/module
+          - title: Class
+            path: /path/in/site/module/Class
+          - title: Class.NestedClass
+            path: /path/in/site/module/Class/NestedClass
+          - title: func1
+            path: /path/in/site/module/func1
+          - title: submodule
+            section:
+            - title: Overview
+              path: /path/in/site/module/submodule
+            - title: func2
+              path: /path/in/site/module/submodule/func2
+        """)
+
+    self.assertEqual(expected, stream.getvalue())
+
+  def test_flat_modules_builder(self):
+    api_tree = self._make_tree()
+    builder = toc_lib.FlatModulesTocBuilder('/path/in/site')
+    toc = builder.build(api_tree)
+    stream = io.StringIO()
+    toc.write(stream)
+
+    expected = textwrap.dedent("""\
+        toc:
+        - title: module
+          section:
+          - title: Overview
+            path: /path/in/site/module
+          - title: Class
+            path: /path/in/site/module/Class
+          - title: Class.NestedClass
+            path: /path/in/site/module/Class/NestedClass
+          - title: func1
+            path: /path/in/site/module/func1
+        - title: module.submodule
+          section:
+          - title: Overview
+            path: /path/in/site/module/submodule
+          - title: func2
+            path: /path/in/site/module/submodule/func2
+        """)
+
+    self.assertEqual(expected, stream.getvalue())
+
+
+if __name__ == '__main__':
+  absltest.main()
diff --git a/tools/tensorflow_docs/api_generator/traverse.py b/tools/tensorflow_docs/api_generator/traverse.py
index 2f5c0c012c6..3d76422735d 100644
--- a/tools/tensorflow_docs/api_generator/traverse.py
+++ b/tools/tensorflow_docs/api_generator/traverse.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -15,165 +14,84 @@
 # ==============================================================================
 """Traversing Python modules and classes."""
 import inspect
-import sys
+import logging
 
-from google.protobuf.message import Message as ProtoMessage
+from typing import Any, Dict, List, Sequence, Tuple
 
-__all__ = ['traverse']
-
-
-def _filter_module_all(path, root, children):
-  """Filters module children based on the "__all__" arrtibute.
+from tensorflow_docs.api_generator import doc_generator_visitor
+from tensorflow_docs.api_generator import public_api
 
-  Args:
-    path: API to this symbol
-    root: The object
-    children: A list of (name, object) pairs.
 
-  Returns:
-    `children` filtered to respect __all__
-  """
-  del path
-  if not (inspect.ismodule(root) and hasattr(root, '__all__')):
-    return children
-  module_all = set(root.__all__)
-  children = [(name, value) for (name, value) in children if name in module_all]
+# To see the logs pass: --logger_levels=tensorflow_docs:DEBUG --alsologtostderr
+_LOGGER = logging.getLogger(__name__)
 
-  return children
+__all__ = ['traverse']
 
 
-def _add_proto_fields(path, root, children):
-  """Add properties to Proto classes, so they can be documented.
+class _Traverser:
+  """Crawls the public API."""
 
-  Warning: This inserts the Properties into the class so the rest of the system
-  is unaffected. This patching is acceptable because there is never a reason to
-  run other tensorflow code in the same process as the doc generator.
+  def __init__(self, filters: Sequence[public_api.ApiFilter],
+               accumulator: doc_generator_visitor.DocGeneratorVisitor):
+    self.filters = list(filters)
+    self.accumulator = accumulator
+    self.children_cache: Dict[int, List[Tuple[str, Any]]] = {}
 
-  Args:
-    path: API to this symbol
-    root: The object
-    children: A list of (name, object) pairs.
+  def traverse(self, root, stack, path):
+    """Execute the traversal."""
+    new_stack = stack + [root]
 
-  Returns:
-    `children` with proto fields added as properties.
-  """
-  del path
-  if not inspect.isclass(root) or not issubclass(root, ProtoMessage):
-    return children
+    # Only traverse modules and classes
+    if not inspect.isclass(root) and not inspect.ismodule(root):
+      return
 
-  descriptor = getattr(root, 'DESCRIPTOR', None)
-  if descriptor is None:
-    return children
-  fields = descriptor.fields
-  if not fields:
-    return children
-
-  field = fields[0]
-  # Make the dictionaries mapping from int types and labels to type and
-  # label names.
-  types = {
-      getattr(field, name): name
-      for name in dir(field)
-      if name.startswith('TYPE')
-  }
-
-  labels = {
-      getattr(field, name): name
-      for name in dir(field)
-      if name.startswith('LABEL')
-  }
-
-  field_properties = {}
-
-  for field in fields:
-    name = field.name
-    doc_parts = []
-
-    label = labels[field.label].lower().replace('label_', '')
-    if label != 'optional':
-      doc_parts.append(label)
-
-    type_name = types[field.type]
-    if type_name == 'TYPE_MESSAGE':
-      type_name = field.message_type.name
-    elif type_name == 'TYPE_ENUM':
-      type_name = field.enum_type.name
+    _LOGGER.debug('path: %s', path)
+    children = self.children_cache.get(id(root), None)
+    if children is None:
+      children = self.get_children(root, new_stack, path)
+      self.children_cache[id(root)] = children
     else:
-      type_name = type_name.lower().replace('type_', '')
-
-    doc_parts.append(type_name)
-    doc_parts.append(name)
-    doc = '`{}`'.format(' '.join(doc_parts))
-    prop = property(fget=lambda x: x, doc=doc)
-    field_properties[name] = prop
-
-  for name, prop in field_properties.items():
-    setattr(root, name, prop)
-
-  children = dict(children)
-  children.update(field_properties)
-  children = sorted(children.items(), key=lambda item: item[0])
-
-  return children
+      _LOGGER.debug('    children (cached): %s', [n for n, c in children])
+
+    self.accumulator(path, root, children)
+
+    for name, child in children:
+      child_path = path + (name,)
+      self.traverse(child, new_stack, child_path)
+
+  def get_children(self, root, new_stack, path) -> public_api.Children:
+    """Return the children for an object."""
+    try:
+      children = inspect.getmembers(root)
+    except ImportError:
+      # On some Python installations, some modules do not support enumerating
+      # members (six in particular), leading to import errors.
+      children = []
+
+    # Break cycles.
+    filtered_children = []
+    for name, child in children:
+      if any(child is item for item in new_stack):  # `in`, but using `is`
+        continue
+      filtered_children.append((name, child))
+    children = filtered_children
+
+    _LOGGER.debug('    children: %s', [n for n, c in children])
+    # Apply all callbacks, allowing each to filter the children
+    for fil in self.filters:
+      old_names = [n for n, c in children]
+      children = fil(path, root, children)
+      children = list(children)
+      new_names = [n for n, c in children]
+
+      if old_names != new_names:
+        _LOGGER.debug('  filter: %s', fil)
+        _LOGGER.debug('    children: %s', new_names)
 
+    return children
 
-def _filter_builtin_modules(path, root, children):
-  """Filters module children to remove builtin modules.
 
-  Args:
-    path: API to this symbol
-    root: The object
-    children: A list of (name, object) pairs.
-
-  Returns:
-    `children` with all builtin modules removed.
-  """
-  del path
-  del root
-  # filter out 'builtin' modules
-  filtered_children = []
-  for name, child in children:
-    # Do not descend into built-in modules
-    if inspect.ismodule(child) and child.__name__ in sys.builtin_module_names:
-      continue
-    filtered_children.append((name, child))
-  return filtered_children
-
-
-def _traverse_internal(root, visitors, stack, path):
-  """Internal helper for traverse."""
-  new_stack = stack + [root]
-
-  # Only traverse modules and classes
-  if not inspect.isclass(root) and not inspect.ismodule(root):
-    return
-
-  try:
-    children = inspect.getmembers(root)
-  except ImportError:
-    # On some Python installations, some modules do not support enumerating
-    # members (six in particular), leading to import errors.
-    children = []
-
-  # Break cycles.
-  filtered_children = []
-  for name, child in children:
-    if any(child is item for item in new_stack):  # `in`, but using `is`
-      continue
-    filtered_children.append((name, child))
-  children = filtered_children
-
-  # Apply all callbacks, allowing each to filter the children
-  for visitor in visitors:
-    children = visitor(path, root, list(children))
-
-  for name, child in children:
-    # Break cycles
-    child_path = path + (name,)
-    _traverse_internal(child, visitors, new_stack, child_path)
-
-
-def traverse(root, visitors, root_name):
+def traverse(root, filters, accumulator, root_name) -> None:
   """Recursively enumerate all members of `root`.
 
   Similar to the Python library function `os.path.walk`.
@@ -204,13 +122,9 @@ def traverse(root, visitors, root_name):
 
   Args:
     root: A python object with which to start the traversal.
-    visitors: A list of callables. Each taking `(path, parent, children)` as
+    filters: A list of callables. Each taking `(path, parent, children)` as
       arguments, and returns a list of accepted children.
+    accumulator: a DocGenerator to accumulate the results.
     root_name: The short-name of the root module.
   """
-  base_visitors = [
-      _filter_module_all,
-      _add_proto_fields,
-      _filter_builtin_modules
-  ]
-  _traverse_internal(root, base_visitors + visitors, [], (root_name,))
+  _Traverser(filters, accumulator).traverse(root, [], (root_name,))
diff --git a/tools/tensorflow_docs/api_generator/traverse_test.py b/tools/tensorflow_docs/api_generator/traverse_test.py
index f5b89bce8b2..7ea780b10f8 100644
--- a/tools/tensorflow_docs/api_generator/traverse_test.py
+++ b/tools/tensorflow_docs/api_generator/traverse_test.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -16,8 +15,6 @@
 """Tests for Python module traversal."""
 from absl.testing import absltest
 
-from tensorflow_docs.api_generator import test_module1
-from tensorflow_docs.api_generator import test_module2
 
 from tensorflow_docs.api_generator import traverse
 
@@ -41,22 +38,12 @@ class Cyclist(object):
     Cyclist.cycle = Cyclist
 
     visitor = TestVisitor()
-    traverse.traverse(Cyclist, [visitor], root_name='root_name')
+    traverse.traverse(Cyclist, [], visitor, root_name='root_name')
     # We simply want to make sure we terminate.
 
-  def test_module(self):
-    visitor = TestVisitor()
-    traverse.traverse(test_module1, [visitor], root_name='root_name')
-
-    called = [parent for _, parent, _ in visitor.call_log]
-
-    self.assertIn(test_module1.ModuleClass1, called)
-    self.assertIn(test_module2.ModuleClass2, called)
-    self.assertNotIn(test_module2.Hidden, called)
-
   def test_class(self):
     visitor = TestVisitor()
-    traverse.traverse(TestVisitor, [visitor], root_name='root_name')
+    traverse.traverse(TestVisitor, [], visitor, root_name='root_name')
     self.assertEqual(TestVisitor,
                      visitor.call_log[0][1])
     # There are a bunch of other members, but make sure that the ones we know
@@ -71,7 +58,7 @@ def test_class(self):
   def test_non_class(self):
     integer = 5
     visitor = TestVisitor()
-    traverse.traverse(integer, [visitor], root_name='root_name')
+    traverse.traverse(integer, [], visitor, root_name='root_name')
     self.assertEqual([], visitor.call_log)
 
 
diff --git a/tools/tensorflow_docs/api_generator/utils.py b/tools/tensorflow_docs/api_generator/utils.py
index 1c9c62bc49b..f90ae9e6c28 100644
--- a/tools/tensorflow_docs/api_generator/utils.py
+++ b/tools/tensorflow_docs/api_generator/utils.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/tools/tensorflow_docs/plots/__init__.py b/tools/tensorflow_docs/plots/__init__.py
index 6ccba62c07a..584087bdf60 100644
--- a/tools/tensorflow_docs/plots/__init__.py
+++ b/tools/tensorflow_docs/plots/__init__.py
@@ -25,13 +25,14 @@
 
 
 def _smooth(values, std):
-  """Smooths a list of values by convolving with a gussian.
+  """Smooths a list of values by convolving with a Gaussian distribution.
 
   Assumes equal spacing.
 
   Args:
     values: A 1D array of values to smooth.
-    std: The standard devistion of the gussian. The units are array elements.
+    std: The standard deviation of the Gaussian distribution. The units are
+      array elements.
 
   Returns:
     The smoothed array.
@@ -50,7 +51,7 @@ def _smooth(values, std):
 
 
 class HistoryPlotter(object):
-  """A class for plotting named set of keras-histories.
+  """A class for plotting a named set of Keras-histories.
 
   The class maintains colors for each key from plot to plot.
   """
@@ -61,15 +62,15 @@ def __init__(self, metric=None, smoothing_std=None):
     self.smoothing_std = smoothing_std
 
   def plot(self, histories, metric=None, smoothing_std=None):
-    """Plots a {name: history} dictionary of keras histories.
+    """Plots a {name: history} dictionary of Keras histories.
 
     Colors are assigned to the name-key, and maintained from call to call.
     Training metrics are shown as a solid line, validation metrics dashed.
 
     Args:
-      histories: {name: history} dictionary of keras histories.
+      histories: {name: history} a dictionary of Keras histories.
       metric: which metric to plot from all the histories.
-      smoothing_std: the standard-deviaation of the smoothing kernel applied
+      smoothing_std: the standard deviation of the smoothing kernel applied
         before plotting. The units are in array-indices.
     """
     if metric is None:
@@ -78,7 +79,7 @@ def plot(self, histories, metric=None, smoothing_std=None):
       smoothing_std = self.smoothing_std
 
     for name, history in histories.items():
-      # Remember name->color asociations.
+      # Remember name->color associations.
       if name in self.color_table:
         color = self.color_table[name]
       else:
diff --git a/tools/tensorflow_docs/tools/README.md b/tools/tensorflow_docs/tools/README.md
index 526ec43da71..548fbd60a86 100644
--- a/tools/tensorflow_docs/tools/README.md
+++ b/tools/tensorflow_docs/tools/README.md
@@ -17,7 +17,7 @@ maintainable documentation.
 Use `pip` to install the latest `tensorflow-docs` package directly from the
 [tensorflow/docs](https://github.com/tensorflow/docs) GitHub repository:
 
-```
+```shell
 $ python3 -m pip install -U --user git+https://github.com/tensorflow/docs
 ```
 
@@ -25,13 +25,13 @@ $ python3 -m pip install -U --user git+https://github.com/tensorflow/docs
 ## nbfmt
 
 A notebook formatting tool that makes Jupyter notebook source diffs consistent
-and easier to review. Since notebook authoring environments differ with regards
+and easier to review. Since notebook authoring environments differ with regard
 to file output, indentation, metadata and other non-specified fields; `nbfmt`
 uses opinionated defaults with a preference for the TensorFlow docs Colab
 workflow. To format a notebook, install the `tensorflow-docs` package and run
 the `nbfmt` tool:
 
-```
+```shell
 $ python3 -m tensorflow_docs.tools.nbfmt [options] notebook.ipynb [...]
 
 $ python3 -m tensorflow_docs.tools.nbfmt --help
@@ -70,21 +70,21 @@ repos:
 
 Someone who clones that repo for development would then install the hook with:
 
-```
+```shell
 # Install pre-commit framework
-pip3 install pre-commit
+$ pip3 install pre-commit
 
 # Install hooks
-pre-commit install
+$ pre-commit install
 ```
 
 ## nblint
 
 A notebook linting tool that checks documentation style rules. Used to catch
-common errors and useful for CI tests. To lint a notebook, install the
+common errors and is useful for CI tests. To lint a notebook, install the
 `tensorflow-docs` package and run the `nblint` tool:
 
-```
+```shell
 $ python3 -m tensorflow_docs.tools.nblint [options] notebook.ipynb [...]
 
 $ python3 -m tensorflow_docs.tools.nblint --fix [options] notebook.ipynb [...]
@@ -95,7 +95,7 @@ $ python3 -m tensorflow_docs.tools.nblint --help
 Some styles require a user-defined argument passed at the command-line. For
 example, the `tensorflow` style (default) uses the `repo` argument to check links:
 
-```
+```shell
 $ python3 -m tensorflow_docs.tools.nblint --arg=repo:tensorflow/docs notebook.ipynb
 ```
 
@@ -105,7 +105,7 @@ are collected into
 `nblint` tests the `google` and `tensorflow` styles by default, and different
 styles can be set with the `--styles` option:
 
-```
+```shell
 $ python3 -m tensorflow_docs.tools.nblint \
     --styles=tensorflow,tensorflow_docs_l10n --arg=repo:tensorflow/docs-1l0n \
     notebook.ipynb
@@ -114,7 +114,7 @@ $ python3 -m tensorflow_docs.tools.nblint \
 A style module may contain some lint checks that do not fit your project. You
 can exclude specific lint checks with the `--exclude_lint` option:
 
-```
+```shell
 $ python3 -m tensorflow_docs.tools.nblint \
     --styles=tensorflow --arg=repo:community/repo-name \
     --exclude_lint=tensorflow::copyright_check \
@@ -124,7 +124,7 @@ $ python3 -m tensorflow_docs.tools.nblint \
 
 Some lint errors can be automatically fixed in the notebook file:
 
-```
+```shell
 $ python3 -m tensorflow_docs.tools.nblint --fix \
     --arg=repo:tensorflow/docs notebook.ipynb
 ```
diff --git a/tools/tensorflow_docs/api_generator/test_module2.py b/tools/tensorflow_docs/tools/nbcp/__init__.py
similarity index 67%
rename from tools/tensorflow_docs/api_generator/test_module2.py
rename to tools/tensorflow_docs/tools/nbcp/__init__.py
index 1461eff856d..78cb171abba 100644
--- a/tools/tensorflow_docs/api_generator/test_module2.py
+++ b/tools/tensorflow_docs/tools/nbcp/__init__.py
@@ -1,5 +1,4 @@
-# Lint as: python3
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,20 +12,3 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""A module target for TraverseTest.test_module."""
-
-
-class ModuleClass2(object):
-
-  def __init__(self):
-    pass
-
-  def __model_class1_method__(self):
-    pass
-
-
-class Hidden(object):
-  pass
-
-
-__all__ = ['ModuleClass2']
diff --git a/tools/tensorflow_docs/tools/nbcp/__main__.py b/tools/tensorflow_docs/tools/nbcp/__main__.py
new file mode 100644
index 00000000000..c85b22129d1
--- /dev/null
+++ b/tools/tensorflow_docs/tools/nbcp/__main__.py
@@ -0,0 +1,92 @@
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Utility for copying cells between notebooks."""
+import pathlib
+import sys
+import textwrap
+
+from absl import app
+import nbformat
+
+from tensorflow_docs.tools.nbfmt import __main__ as nbfmt
+from tensorflow_docs.tools.nbfmt import notebook_utils
+
+
+def process_stats(stats: notebook_utils.CellCopyStats) -> bool:
+  """Displays summary stats to the user. Returns True if any warnings."""
+  print(
+      textwrap.dedent(f"""
+      Notebook copy complete.
+        - Total code cells processed: {stats.processed_cells}
+        - Cells updated: {stats.updated_cells}
+      """))
+
+  has_warnings = any((
+      stats.unmatched_target_cells,
+      stats.unmatched_source_cells,
+      stats.out_of_order_target_cells,
+  ))
+  if has_warnings:
+    print('Warnings:')
+
+    if stats.unmatched_target_cells:
+      notebook_utils.warn(
+          '- Cells in source notebook that are not in the destination: '
+          f'{" ".join(stats.unmatched_target_cells)}')
+
+    if stats.unmatched_source_cells:
+      notebook_utils.warn(
+          '- Cells in destination notebook that are not in the source: '
+          f'{" ".join(stats.unmatched_source_cells)}')
+
+    if stats.out_of_order_target_cells:
+      notebook_utils.warn(
+          '- Cells found earlier in destination notebook than source: '
+          f'{" ".join(stats.out_of_order_target_cells)}')
+
+    print()
+
+  return has_warnings
+
+
+def main(args: list[str]) -> int:
+  if len(args) != 3:
+    notebook_utils.warn('nbcp requires 2 notebooks as arguments:')
+    notebook_utils.warn('  $ ...nbcp src_notebook.ipynb dest_notebook.ipynb'
+                        ' [--nbfmt --args --supported]')
+    sys.exit(1)
+
+  src = pathlib.Path(args[1])
+  dest = pathlib.Path(args[2])
+
+  # Open files and copy cells.
+  with src.open('rt') as src_fh, dest.open('rt') as dest_fh:
+    dest_nb = nbformat.read(dest_fh, nbformat.NO_CONVERT)
+    stats = notebook_utils.copy_code_cells(
+        nbformat.read(src_fh, nbformat.NO_CONVERT), dest_nb)
+
+  # Write over destination file.
+  with dest.open('wt') as dest_fh:
+    nbformat.write(dest_nb, dest_fh)
+
+  warnings = process_stats(stats)
+
+  # Format the notebook.
+  nbfmt.main(['', str(dest)])
+
+  return int(warnings)
+
+
+if __name__ == '__main__':
+  app.run(main)
diff --git a/tools/tensorflow_docs/tools/nbfmt/__main__.py b/tools/tensorflow_docs/tools/nbfmt/__main__.py
index 9fd6b04e6b6..b806d093a25 100644
--- a/tools/tensorflow_docs/tools/nbfmt/__main__.py
+++ b/tools/tensorflow_docs/tools/nbfmt/__main__.py
@@ -76,7 +76,7 @@ def clean_notebook(data: Dict[str, Any], nb_source: str, filepath: pathlib.Path,
   nbjson = json.dumps(data, sort_keys=True, ensure_ascii=False, indent=indent)
 
   if not OSS:
-    # Serialization differences in enviroments.
+    # Serialization differences in environments.
     str_replaces = {"<": r"\u003c", ">": r"\u003e", "&": r"\u0026"}
     for str_from, str_to in str_replaces.items():
       nbjson = nbjson.replace(str_from, str_to)
@@ -99,16 +99,17 @@ def clean_root(data: Dict[str, Any], filepath: pathlib.Path) -> None:
       data, keep=["cells", "metadata", "nbformat_minor", "nbformat"])
   # All metadata is optional according to spec, but we use some of it.
   notebook_utils.del_entries_except(
-      data["metadata"], keep=["accelerator", "colab", "kernelspec"])
+      data["metadata"], keep=["accelerator", "colab", "kernelspec", "google"]
+  )
 
   metadata = data.get("metadata", {})
-  colab = metadata.get("colab", {})
 
   # Set top-level notebook defaults.
   data["nbformat"] = 4
   data["nbformat_minor"] = 0
 
   # Colab metadata
+  colab = metadata.get("colab", {})
   notebook_utils.del_entries_except(
       colab, keep=["collapsed_sections", "name", "toc_visible"])
   colab["name"] = os.path.basename(filepath)
@@ -128,6 +129,15 @@ def clean_root(data: Dict[str, Any], filepath: pathlib.Path) -> None:
   kernelspec["display_name"] = supported_kernels[kernel_name]
   metadata["kernelspec"] = kernelspec
 
+  # Google metadata
+  google = metadata.get("google", {})
+  notebook_utils.del_entries_except(google, keep=["keywords", "image_path"])
+  # Don't add the field if it's empty.
+  if google:
+    metadata["google"] = google
+  else:
+    metadata.pop("google", None)
+
   data["metadata"] = metadata
 
 
@@ -157,7 +167,7 @@ def _clean_metadata_colab(cell_metadata: Dict[str, Any],
 
   Remove all `metadata.colab` contents except for `metadata.colab.resources`, if
   present. The Colab resources are used to embed data within the notebook and
-  can be treated like output cells (kept unless explictly removed).
+  can be treated like output cells (kept unless explicitly removed).
 
   Args:
     cell_metadata: object representing the parsed JSON metadata from a cell.
@@ -227,7 +237,7 @@ def update_license_cells(data: Dict[str, Any]) -> None:
     data: object representing a parsed JSON notebook.
   """
   # This pattern in Apache and MIT license boilerplate.
-  license_re = re.compile(r"#@title.*License")
+  license_re = re.compile(r"#\s?@title.*License")
 
   for idx, cell in enumerate(data["cells"]):
     src_text = "".join(cell["source"])
diff --git a/tools/tensorflow_docs/tools/nbfmt/nbfmtmain_test.py b/tools/tensorflow_docs/tools/nbfmt/nbfmtmain_test.py
new file mode 100644
index 00000000000..5f07c103cab
--- /dev/null
+++ b/tools/tensorflow_docs/tools/nbfmt/nbfmtmain_test.py
@@ -0,0 +1,74 @@
+# Copyright 2024 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Unit tests for nbfmt."""
+import pathlib
+import unittest
+from nbformat import notebooknode
+from tensorflow_docs.tools.nbfmt import __main__ as nbfmt
+
+
+class NotebookFormatTest(unittest.TestCase):
+
+  def test_metadata_cleansing(self):
+    subject_notebook = notebooknode.NotebookNode({
+        "cells": [],
+        "metadata": {
+            "unknown": ["delete", "me"],
+            "accelerator": "GPU",
+            "colab": {
+                "name": "/this/is/clobbered.ipynb",
+                "collapsed_sections": [],
+                "deleteme": "pls",
+            },
+            "kernelspec": {
+                "display_name": "Python 2 foreverrrr",
+                "name": "python2",
+                "deleteme": "deldeldel",
+            },
+            "google": {
+                "keywords": ["one", "two"],
+                "image_path": "/foo/img.png",
+                "more_stuff": "delete me",
+            },
+        },
+    })
+
+    expected_notebook = notebooknode.NotebookNode({
+        "cells": [],
+        "metadata": {
+            "accelerator": "GPU",
+            "colab": {
+                "name": "test.ipynb",
+                "collapsed_sections": [],
+                "toc_visible": True,
+            },
+            "kernelspec": {
+                "display_name": "Python 3",
+                "name": "python3",
+            },
+            "google": {
+                "keywords": ["one", "two"],
+                "image_path": "/foo/img.png",
+            },
+        },
+        "nbformat": 4,
+        "nbformat_minor": 0,
+    })
+
+    nbfmt.clean_root(subject_notebook, pathlib.Path("/path/test.ipynb"))
+    self.assertEqual(subject_notebook, expected_notebook)
+
+
+if __name__ == "__main__":
+  unittest.main()
diff --git a/tools/tensorflow_docs/tools/nbfmt/notebook_utils.py b/tools/tensorflow_docs/tools/nbfmt/notebook_utils.py
index 2ccb10ba20f..6e5e8a36553 100644
--- a/tools/tensorflow_docs/tools/nbfmt/notebook_utils.py
+++ b/tools/tensorflow_docs/tools/nbfmt/notebook_utils.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,15 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-r"""A collection of utilties for working with notebook files."""
-import json
+"""A collection of utilities for working with notebook files."""
+import dataclasses
 import hashlib
+import json
+import logging
 import pathlib
 import sys
 import textwrap
 
 from typing import Any, Dict, List, Optional, Tuple, Union
 
+from nbformat import notebooknode
+
 
 def collect_notebook_paths(
     filepaths: List[Union[str, pathlib.Path]]
@@ -110,3 +113,60 @@ def del_entries_except(data: Dict[str, Any], keep: List[str]) -> None:
   to_delete = set(data.keys()) - frozenset(keep)
   for key in to_delete:
     del data[key]
+
+
+@dataclasses.dataclass
+class CellCopyStats:
+  processed_cells: int = 0
+  updated_cells: int = 0
+  unmatched_target_cells: list[str] = dataclasses.field(default_factory=list)
+  unmatched_source_cells: list[str] = dataclasses.field(default_factory=list)
+  out_of_order_target_cells: list[str] = dataclasses.field(default_factory=list)
+
+
+def copy_code_cells(source: notebooknode.NotebookNode,
+                    target: notebooknode.NotebookNode) -> CellCopyStats:
+  """Copies code cell source and outputs from source to target."""
+  stats = CellCopyStats()
+  if len(source.cells) != len(target.cells):
+    logging.warning('Source and target notebook have unequal cell counts.')
+
+  target_indices = {c['metadata']['id']: i for i, c in enumerate(target.cells)}
+
+  last_target_idx = -1
+  for cell in source.cells:
+    cell_id = cell['metadata']['id']
+
+    if cell.get('cell_type') != 'code':
+      target_indices.pop(cell_id, None)
+      continue
+
+    if cell_id not in target_indices:
+      logging.warning('Cell %s is not present in the target notebook.', cell_id)
+      stats.unmatched_target_cells.append(cell_id)
+      continue
+
+    stats.processed_cells += 1
+
+    if last_target_idx > (target_idx := target_indices.pop(cell_id)):
+      logging.warning(
+          'Cell %s has been moved earlier in the notebook than expected.',
+          cell_id)
+      stats.out_of_order_target_cells.append(cell_id)
+
+    target_cell = target.cells[target_idx]
+    modified = False
+    for field in 'source', 'outputs':
+      new_value = cell.get(field)
+      if target_cell.get(field) != new_value:
+        target_cell[field] = new_value
+        modified = True
+
+    stats.updated_cells += modified
+    last_target_idx = target_idx
+
+  stats.unmatched_source_cells = [
+      c for c, i in target_indices.items()
+      if target.cells[i].get('cell_type') == 'code'
+  ]
+  return stats
diff --git a/tools/tensorflow_docs/tools/nbfmt/notebook_utils_test.py b/tools/tensorflow_docs/tools/nbfmt/notebook_utils_test.py
new file mode 100644
index 00000000000..4866dcb21b8
--- /dev/null
+++ b/tools/tensorflow_docs/tools/nbfmt/notebook_utils_test.py
@@ -0,0 +1,169 @@
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Unit tests for notebook_utils."""
+
+from absl.testing import absltest
+from nbformat import notebooknode
+
+from tensorflow_docs.tools.nbfmt import notebook_utils
+
+
+class NotebookUtilsTest(absltest.TestCase):
+
+  def test_copy_code_cells(self):
+    source_notebook = notebooknode.NotebookNode({
+        "cells": [{
+            "cell_type": "markdown",
+            "metadata": {
+                "id": "id1"
+            },
+            "source": ["## Some text"]
+        }, {
+            "cell_type": "code",
+            "metadata": {
+                "id": "id2"
+            },
+            "source": ["# some python\n", "print('hi')"]
+        }]
+    })
+    target_notebook = notebooknode.NotebookNode({
+        "cells": [{
+            "cell_type": "markdown",
+            "metadata": {
+                "id": "id1"
+            },
+            "source": ["## Different text"]
+        }, {
+            "cell_type": "code",
+            "metadata": {
+                "id": "id2"
+            },
+            "source": ["# some old python\n", "print 'hi'"]
+        }]
+    })
+
+    stat = notebook_utils.copy_code_cells(source_notebook, target_notebook)
+
+    # Ensure we have the expected contents (markdown untouched, code copied)
+    self.assertIn("## Different text", target_notebook.cells[0]["source"])
+    self.assertIn("print('hi')", target_notebook.cells[1]["source"])
+
+    # Ensure only the code cell was updated
+    self.assertEqual(1, stat.updated_cells)
+    self.assertEqual(1, stat.processed_cells)
+
+  def test_missing_target_cell(self):
+    source_notebook = notebooknode.NotebookNode({
+        "cells": [{
+            "cell_type": "code",
+            "metadata": {
+                "id": "cell1"
+            },
+            "source": ["# some python\n", "print('hi')"]
+        }, {
+            "cell_type": "markdown",
+            "metadata": {
+                "id": "md1"
+            },
+            "source": ["## text"]
+        }]
+    })
+    target_notebook = notebooknode.NotebookNode({
+        "cells": [{
+            "cell_type": "code",
+            "metadata": {
+                "id": "cell2"
+            },
+            "source": ["# some old python\n", "print 'hi'"]
+        }]
+    })
+
+    stat = notebook_utils.copy_code_cells(source_notebook, target_notebook)
+
+    self.assertEqual(0, stat.updated_cells)
+    self.assertEqual(0, stat.processed_cells)
+    self.assertEqual(["cell1"], stat.unmatched_target_cells)
+
+  def test_missing_source_cell(self):
+    source_notebook = notebooknode.NotebookNode({
+        "cells": [{
+            "cell_type": "code",
+            "metadata": {
+                "id": "cell1"
+            },
+            "source": ["# some python\n", "print('hi')"]
+        }]
+    })
+    target_notebook = notebooknode.NotebookNode({
+        "cells": [{
+            "cell_type": "code",
+            "metadata": {
+                "id": "cell2"
+            },
+            "source": ["# some old python\n", "print 'hi'"]
+        }, {
+            "cell_type": "markdown",
+            "metadata": {
+                "id": "text1"
+            },
+            "source": ["## texty texty"]
+        }]
+    })
+
+    stat = notebook_utils.copy_code_cells(source_notebook, target_notebook)
+
+    self.assertEqual(0, stat.updated_cells)
+    self.assertEqual(0, stat.processed_cells)
+    self.assertEqual(["cell2"], stat.unmatched_source_cells)
+
+  def test_cell_ordering(self):
+    source_notebook = notebooknode.NotebookNode({
+        "cells": [{
+            "cell_type": "code",
+            "metadata": {
+                "id": "cell1"
+            },
+            "source": ["# first code\n"]
+        }, {
+            "cell_type": "code",
+            "metadata": {
+                "id": "cell2"
+            },
+            "source": ["# second code\n"]
+        }]
+    })
+    target_notebook = notebooknode.NotebookNode({
+        "cells": [{
+            "cell_type": "code",
+            "metadata": {
+                "id": "cell2"
+            },
+            "source": ["# update me\n"]
+        }, {
+            "cell_type": "code",
+            "metadata": {
+                "id": "cell1"
+            },
+            "source": ["# update me\n"]
+        }]
+    })
+
+    stat = notebook_utils.copy_code_cells(source_notebook, target_notebook)
+
+    self.assertEqual(2, stat.updated_cells)
+    self.assertIn("cell2", stat.out_of_order_target_cells)
+
+
+if __name__ == "__main__":
+  absltest.main()
diff --git a/tools/tensorflow_docs/tools/nblint/__main__.py b/tools/tensorflow_docs/tools/nblint/__main__.py
index 47cf94e4718..1411fbf4c7b 100644
--- a/tools/tensorflow_docs/tools/nblint/__main__.py
+++ b/tools/tensorflow_docs/tools/nblint/__main__.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # pylint: disable=invalid-name
 # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
@@ -133,7 +132,7 @@ def add_styles(styles, excluded_lints, verbose):
         getattr(mem[1], "_lint") for mem in inspect.getmembers(mod, is_lint)
     ]
 
-    # Remove lints that have been explictly excluded at the command-line.
+    # Remove lints that have been explicitly excluded at the command-line.
     lints = [
         lint for lint in lints if f"{style}::{lint.name}" not in excluded_lints
     ]
diff --git a/tools/tensorflow_docs/tools/nblint/decorator.py b/tools/tensorflow_docs/tools/nblint/decorator.py
index 9922509f1a8..d74045c7ca7 100644
--- a/tools/tensorflow_docs/tools/nblint/decorator.py
+++ b/tools/tensorflow_docs/tools/nblint/decorator.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -162,7 +161,7 @@ def fail(message: Optional[str] = None,
 
   Failure messages come in two flavors:
   - conditional: (Default) While this test may fail here, it may succeed
-    elsewhere, and thus, the larger condition passes and do not dislay this
+    elsewhere, and thus, the larger condition passes and do not display this
     message.
   - non-conditional (always show): Regardless if the larger condition is met,
     display this error message in the status report. For example, a
diff --git a/tools/tensorflow_docs/tools/nblint/fix.py b/tools/tensorflow_docs/tools/nblint/fix.py
index ab9021d8de4..34c603c1ca7 100644
--- a/tools/tensorflow_docs/tools/nblint/fix.py
+++ b/tools/tensorflow_docs/tools/nblint/fix.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/tools/tensorflow_docs/tools/nblint/linter.py b/tools/tensorflow_docs/tools/nblint/linter.py
index 8ccfc660e55..68be9d7b549 100644
--- a/tools/tensorflow_docs/tools/nblint/linter.py
+++ b/tools/tensorflow_docs/tools/nblint/linter.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -95,7 +94,7 @@ def _run_lint(self, lint: decorator.Lint, lint_args: Dict[str, Any],
     """Run lint and capture any stderr output for the status display.
 
     Args:
-      lint: `decorator.Lint` containg the assertion, scope, and condition.
+      lint: `decorator.Lint` containing the assertion, scope, and condition.
       lint_args: Nested dictionary of args to pass the lint callback function.
       status: The `LinterStatus` to add individual entries for group members.
 
@@ -132,7 +131,7 @@ def _run_lint_group(self, lint: decorator.Lint, lint_args: Dict[str, Any],
     """Run lint over all cells with scope and return cumulative pass/fail.
 
     Args:
-      lint: `decorator.Lint` containg the assertion, scope, and condition.
+      lint: `decorator.Lint` containing the assertion, scope, and condition.
       lint_args: Nested dictionary of args to pass the lint callback function.
       data: `dict` containing data of entire parse notebook.
       status: The `LinterStatus` to add individual entries for group members.
diff --git a/tools/tensorflow_docs/tools/nblint/style/gemini_cookbook.py b/tools/tensorflow_docs/tools/nblint/style/gemini_cookbook.py
new file mode 100644
index 00000000000..b34c8193c1c
--- /dev/null
+++ b/tools/tensorflow_docs/tools/nblint/style/gemini_cookbook.py
@@ -0,0 +1,341 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Lint assertions for notebooks published on tensorflow.org.
+
+These lints are a non-exhaustive implementation of style rules found in the
+TensorFlow documentation and style guides. See:
+
+- https://www.tensorflow.org/community/contribute/docs
+- https://www.tensorflow.org/community/contribute/docs_style
+
+When adding lints, link to the URL of the relevant style rule, if applicable.
+
+Lint functions return a boolean: True to pass, False to fail.
+For @lint options, see the docstrings in `decorator.py`.
+
+Lint callback functions are passed an `args` dict with the following entries:
+  cell_data: Dict of parsed cell (cell-scope only)
+  cell_source: String of cell content (cell-scope only)
+  file_data: Dict of parsed notebook
+  file_source: String of notebook content
+  path: Filepath of notebook
+  user: Dict of args passed at the command-line
+"""
+import pathlib
+import re
+import urllib
+
+from tensorflow_docs.tools.nblint import fix
+from tensorflow_docs.tools.nblint.decorator import fail
+from tensorflow_docs.tools.nblint.decorator import lint
+from tensorflow_docs.tools.nblint.decorator import Options
+
+
+# Acceptable copyright heading for notebooks following this style.
+copyrights_re = [
+    r"Copyright 20[1-9][0-9] The TensorFlow\s.*?\s?Authors",
+    r"Copyright 20[1-9][0-9] Google",
+    r"Copyright 20[1-9][0-9] The AI Edge Authors",
+]
+
+
+@lint(message="Copyright required", scope=Options.Scope.TEXT)
+def copyright_check(args):
+  cell_source = args["cell_source"]
+  return any(re.search(pattern, cell_source) for pattern in copyrights_re)
+
+
+license_re = re.compile("#\s?@title Licensed under the Apache License")
+
+
+@lint(
+    message="Apache license cell is required",
+    scope=Options.Scope.CODE,
+    cond=Options.Cond.ANY)
+def license_check(args):
+  if license_re.search(args["cell_source"]):
+    return True
+  else:
+    template_url = "https://github.com/tensorflow/docs/blob/master/tools/templates/notebook.ipynb"
+    fail(f"License cell missing or doesn't follow template: {template_url}")
+
+
+@lint(scope=Options.Scope.FILE)
+def not_translation(args):
+  if "site" not in args["path"].parents:
+    return True
+  else:
+    return "site/en" in args["path"].parents
+
+
+# Button checks
+
+# Look for class="tfo-notebook-buttons" (CSS used on website versions) or the
+# run-in-colab logo (for notebooks that stick to GitHub/Colab).
+is_button_cell_re = re.compile(
+    r"class.*tfo-notebook-buttons|colab_logo_32px\.png|colab-badge\.svg"
+)
+
+
+def get_arg_or_fail(user_args, arg_name, arg_fmt):
+  """Get value of the user-defined arg passed at the command-line.
+
+  Args:
+    user_args: Dict containing user-defined args passed at command-line.
+    arg_name: String name of user-defined arg.
+    arg_fmt: String format of expected user-defined arg.
+
+  Returns:
+    Value of arg passed to command-line. If the arg does not exist, raise a
+    failure, log a message, and skip the lint function.
+  """
+  if arg_name in user_args:
+    return user_args.get(arg_name)
+  else:
+    fail(
+        f"Requires user-argument '{arg_name}': nblint --arg={arg_name}:{arg_fmt} ...",
+        always_show=True)
+
+
+def split_doc_path(filepath):
+  """Return paths for docs root prefix directory and the relative path to file.
+
+  Given a full path to notebook file, standalone or within an established
+  documentation directory layout, split the provided path into two:
+  1. a path reprsenting the prefix directory to the docs root (if it exists),
+  2. the relative path to the file from the docs root directory.
+  If in an unknown docs directory layout, return an empty prefix path and the
+  full path of the original argument.
+
+  For example:
+  "site/en/notebook.ipynb" => ("site/en", "notebook.ipynb")
+  "tensorflow/docs/notebook.ipynb" => ("docs", "notebook.ipynb")
+  "unknown/path/notebook.ipynb" => ("", "unknown/path/notebook.ipynb")
+
+  Args:
+    filepath: `pathlib.Path` to a documentation notebook.
+
+  Returns:
+    pathlib.Path: The path of the doc root prefix directory., if applicable.
+    pathlib.Path: The relative path to notebook from the prefix directory.
+  """
+  fp_full = filepath.resolve()  # Check full path for sub-elements.
+
+  def split_path_on_dir(fp, dirname, offset=1):
+    parts = fp.parts
+    idx = parts.index(dirname)
+    docs_dir = pathlib.Path(*parts[idx:idx + offset])
+    rel_path = fp.relative_to(*parts[:idx + offset])
+    return docs_dir, rel_path
+
+  if "site" in fp_full.parts:
+    return split_path_on_dir(fp_full, "site", offset=2)  # site/<lang>/
+  elif "docs" in fp_full.parts:
+    return split_path_on_dir(fp_full, "docs")
+  elif "g3doc" in fp_full.parts:
+    idx = fp_full.parts.index("g3doc")
+    if fp_full.parts[idx + 1] == "en":
+      offset = 2
+    else:
+      offset = 1
+    return split_path_on_dir(fp_full, "g3doc", offset=offset)
+  else:
+    # Unknown setup. Return empty root and unsplit path.
+    return pathlib.Path(), filepath
+
+
+@lint(
+    message="Missing or malformed URL in Colab button.",
+    scope=Options.Scope.TEXT,
+    cond=Options.Cond.ANY)
+def button_colab(args):
+  """Test that the URL in the Colab button matches the file path."""
+  cell_source = args["cell_source"]
+  repo = get_arg_or_fail(args["user"], "repo", "<org/name>")
+  branch = args["user"].get("branch", "master")
+  docs_dir, rel_path = split_doc_path(args["path"])
+
+  # Buttons use OSS URLs.
+  if str(docs_dir) == "g3doc/en":
+    docs_dir = pathlib.Path("site/en")
+
+  base_url = f"colab.research.google.com/github/{repo}/blob/{branch}"
+  this_url = "https://" + str(base_url / docs_dir / rel_path)
+
+  if is_button_cell_re.search(cell_source) and cell_source.find(this_url) != -1:
+    return True
+  else:
+    fail(
+        f"Colab button URL doesn't match: {this_url}",
+        fix=fix.regex_between_groups_replace_all,
+        fix_args=[r"(href.*)http.*?(\\\".*colab_logo_32px.png)", this_url])
+
+
+@lint(
+    message="Missing or malformed URL in Download button.",
+    scope=Options.Scope.TEXT,
+    cond=Options.Cond.ANY)
+def button_download(args):
+  """Test that the URL in the Download button matches the file path."""
+  cell_source = args["cell_source"]
+  repo = get_arg_or_fail(args["user"], "repo", "<org/name>")
+  repo_name = pathlib.Path(repo.split("/")[1])
+  docs_dir, rel_path = split_doc_path(args["path"])
+
+  if "r1" in rel_path.parts:
+    return True  # No download button for TF 1.x docs.
+
+  # Buttons use OSS URLs.
+  if str(docs_dir) == "g3doc/en":
+    docs_dir = pathlib.Path("site/en")
+
+  this_url = urllib.parse.urljoin(
+      "https://storage.googleapis.com",
+      str(f"tensorflow_docs/{repo_name}" / docs_dir / rel_path))
+
+  if is_button_cell_re.search(cell_source) and cell_source.find(this_url) != -1:
+    return True
+  else:
+    fail(
+        f"Download button URL doesn't match: {this_url}",
+        fix=fix.regex_between_groups_replace_all,
+        fix_args=[r"(href.*)http.*?(\\\".*download_logo_32px.png)", this_url])
+
+
+@lint(
+    message="Missing or malformed URL in GitHub button.",
+    scope=Options.Scope.TEXT,
+    cond=Options.Cond.ANY)
+def button_github(args):
+  """Test that the URL in the GitHub button matches the file path."""
+  cell_source = args["cell_source"]
+  repo = get_arg_or_fail(args["user"], "repo", "<org/name>")
+  branch = args["user"].get("branch", "master")
+  docs_dir, rel_path = split_doc_path(args["path"])
+
+  # Buttons use OSS URLs.
+  if str(docs_dir) == "g3doc/en":
+    docs_dir = pathlib.Path("site/en")
+
+  base_url = f"github.com/{repo}/blob/{branch}"
+  this_url = "https://" + str(base_url / docs_dir / rel_path)
+
+  if is_button_cell_re.search(cell_source) and cell_source.find(this_url) != -1:
+    return True
+  else:
+    fail(
+        f"GitHub button URL doesn't match: {this_url}",
+        fix=fix.regex_between_groups_replace_all,
+        fix_args=[r"(href.*)http.*?(\\\".*GitHub-Mark-32px.png)", this_url])
+
+
+@lint(
+    message="Missing or malformed URL in 'View on' button.",
+    scope=Options.Scope.TEXT,
+    cond=Options.Cond.ANY)
+def button_website(args):
+  """Test that the website URL in the 'View on' button matches the file path.
+
+  Because of subsites and different output directories, the exact website path
+  can't be known from the file alone. But can check that the URL matches a
+  correct pattern.
+
+  Args:
+    args: Nested dict of runtime arguments.
+
+  Returns:
+    Boolean: True if lint test passes, False if not.
+  """
+  cell_source = args["cell_source"]
+  docs_dir, rel_path = split_doc_path(args["path"])
+
+  if "r1" in rel_path.parts:
+    return True  # No website button for TF 1.x docs.
+
+  user_url = args["user"].get("base_url")
+  if user_url:
+    base_url = user_url
+  elif str(docs_dir) == "site/zh-cn" or str(docs_dir) == "site/zh-tw":
+    base_url = "https://tensorflow.google.cn/"
+  else:
+    base_url = "https://www.tensorflow.org/"
+
+  # Construct website URL pattern based on location of this file in repo.
+  url_path = rel_path.with_suffix("")
+  # If run in source repo, we don't know for certain the published subsite URL.
+  # Match: base/<optional-subsite-path>/notebook-path
+  this_url = rf"{base_url}[\w\-/]*{url_path}"
+
+  if is_button_cell_re.search(cell_source) and re.search(this_url, cell_source):
+    return True
+  else:
+    # If included verbatim, bracket will fail lint. That's desired.
+    url_format = f"{base_url}<OPTIONAL-SUBSITE-PATH>/{url_path}"
+    fail(f"'View on' button URL doesn't match pattern: {url_format}")
+
+
+@lint(
+    message="Missing or malformed URL in 'TFHub' button.",
+    scope=Options.Scope.TEXT,
+    cond=Options.Cond.ANY)
+def button_hub(args):
+  """Notebooks that mention tfhub.dev should have a TFHub button."""
+  cell_source = args["cell_source"]
+  file_source = args["file_source"]
+
+  hub_url = "https://tfhub.dev/"
+
+  # Only check files that mention TFHub.
+  if file_source.find(hub_url) == -1:
+    return True
+
+  if is_button_cell_re.search(cell_source) and cell_source.find(hub_url) != -1:
+    return True
+  else:
+    # If included verbatim, bracket will fail lint. That's desired.
+    url_format = f"{hub_url}<MODEL-OR-COLLECTION>"
+    fail(f"'TFHub' button URL doesn't match pattern: {url_format}")
+
+
+@lint(
+    message="Remove extra buttons from TF 1.x docs.",
+    scope=Options.Scope.TEXT,
+    cond=Options.Cond.ALL)
+def button_r1_extra(args):
+  """The r1/ docs should not have website or download buttons."""
+  cell_source = args["cell_source"]
+  docs_dir, rel_path = split_doc_path(args["path"])
+
+  # Only test r1/ notebooks.
+  if "r1" not in rel_path.parts:
+    return True
+  # Only check text cells that contain the button nav bar.
+  if not is_button_cell_re.search(cell_source):
+    return True
+
+  download_url = "https://storage.googleapis.com/tensorflow_docs/"
+  if str(docs_dir) == "site/zh-cn" or str(docs_dir) == "site/zh-tw":
+    base_url = "https://tensorflow.google.cn/"
+  else:
+    base_url = "https://www.tensorflow.org/"
+
+  # Look for button URLs that shouldn't be there..
+  if (re.search(f"{base_url}/(?!images)", cell_source) or
+      cell_source.find(download_url) != -1):
+    fail(
+        "Remove the 'View on' and 'Download notebook' buttons since r1/ docs are not published."
+    )
+  else:
+    return True
diff --git a/tools/tensorflow_docs/tools/nblint/style/google.py b/tools/tensorflow_docs/tools/nblint/style/google.py
index 8c4f262633c..9bd1a47376a 100644
--- a/tools/tensorflow_docs/tools/nblint/style/google.py
+++ b/tools/tensorflow_docs/tools/nblint/style/google.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -15,7 +14,7 @@
 # ==============================================================================
 r"""Lint assertions that adhere to the Google dev docs style guide.
 
-This style module is a non-exhaustive implemention of style rules found in the
+This style module is a non-exhaustive implementation of style rules found in the
 Google developer documentation style guide: https://developers.google.com/style
 
 When adding lints, please link to the URL of the relevant style rule.
@@ -39,7 +38,7 @@ def search_wordlist(wordlist, src_str):
   """
   found_words = {}
   for word in wordlist:
-    # Word-boundary and ignore between path seperator '/'.
+    # Word-boundary and ignore between path separator '/'.
     if re.search(rf"[^/]\b{word}\b[^/]", src_str, re.IGNORECASE):
       alt_word = wordlist[word]
       if not alt_word:
@@ -66,7 +65,10 @@ def inclusive_language(args):
   found_words = search_wordlist(_INCLUSIVE_WORDLIST, args["cell_source"])
   if found_words:
     words = ", ".join([f"{word} => {alt}" for word, alt in found_words.items()])
-    fail(f"Use inclusive language where possible and accurate. Found: {words}")
+    fail(
+        f"Use inclusive language where possible and accurate. Found: {words} in"
+        f" {args['cell_source']}"
+    )
   else:
     return True
 
@@ -83,6 +85,9 @@ def second_person(args):
   found_words = search_wordlist(_SECOND_PERSON_WORDLIST, args["cell_source"])
   if found_words:
     words = ", ".join([f"{word} => {alt}" for word, alt in found_words.items()])
-    fail(f"Prefer second person instead of first person. Found: {words}")
+    fail(
+        f"Prefer second person instead of first person. Found: {words} in"
+        f" {args['cell_source']}"
+    )
   else:
     return True
diff --git a/tools/tensorflow_docs/tools/nblint/style/tensorflow.py b/tools/tensorflow_docs/tools/nblint/style/tensorflow.py
index fd4ce078211..611562c43fa 100644
--- a/tools/tensorflow_docs/tools/nblint/style/tensorflow.py
+++ b/tools/tensorflow_docs/tools/nblint/style/tensorflow.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -15,7 +14,7 @@
 # ==============================================================================
 r"""Lint assertions for notebooks published on tensorflow.org.
 
-These lints are a non-exhaustive implemention of style rules found in the
+These lints are a non-exhaustive implementation of style rules found in the
 TensorFlow documentation and style guides. See:
 
 - https://www.tensorflow.org/community/contribute/docs
@@ -47,7 +46,8 @@
 # Acceptable copyright heading for notebooks following this style.
 copyrights_re = [
     r"Copyright 20[1-9][0-9] The TensorFlow\s.*?\s?Authors",
-    r"Copyright 20[1-9][0-9] Google"
+    r"Copyright 20[1-9][0-9] Google",
+    r"Copyright 20[1-9][0-9] The AI Edge Authors",
 ]
 
 
@@ -57,7 +57,7 @@ def copyright_check(args):
   return any(re.search(pattern, cell_source) for pattern in copyrights_re)
 
 
-license_re = re.compile("#@title Licensed under the Apache License")
+license_re = re.compile("#\s?@title Licensed under the Apache License")
 
 
 @lint(
@@ -82,7 +82,11 @@ def not_translation(args):
 
 # Button checks
 
-is_button_cell_re = re.compile(r"class.*tfo-notebook-buttons")
+# Look for class="tfo-notebook-buttons" (CSS used on website versions) or the
+# run-in-colab logo (for notebooks that stick to GitHub/Colab).
+is_button_cell_re = re.compile(
+    r"class.*tfo-notebook-buttons|colab_logo_32px\.png"
+)
 
 
 def get_arg_or_fail(user_args, arg_name, arg_fmt):
@@ -160,13 +164,14 @@ def button_colab(args):
   """Test that the URL in the Colab button matches the file path."""
   cell_source = args["cell_source"]
   repo = get_arg_or_fail(args["user"], "repo", "<org/name>")
+  branch = args["user"].get("branch", "master")
   docs_dir, rel_path = split_doc_path(args["path"])
 
   # Buttons use OSS URLs.
   if str(docs_dir) == "g3doc/en":
     docs_dir = pathlib.Path("site/en")
 
-  base_url = f"colab.research.google.com/github/{repo}/blob/master"
+  base_url = f"colab.research.google.com/github/{repo}/blob/{branch}"
   this_url = "https://" + str(base_url / docs_dir / rel_path)
 
   if is_button_cell_re.search(cell_source) and cell_source.find(this_url) != -1:
@@ -217,13 +222,14 @@ def button_github(args):
   """Test that the URL in the GitHub button matches the file path."""
   cell_source = args["cell_source"]
   repo = get_arg_or_fail(args["user"], "repo", "<org/name>")
+  branch = args["user"].get("branch", "master")
   docs_dir, rel_path = split_doc_path(args["path"])
 
   # Buttons use OSS URLs.
   if str(docs_dir) == "g3doc/en":
     docs_dir = pathlib.Path("site/en")
 
-  base_url = f"github.com/{repo}/blob/master"
+  base_url = f"github.com/{repo}/blob/{branch}"
   this_url = "https://" + str(base_url / docs_dir / rel_path)
 
   if is_button_cell_re.search(cell_source) and cell_source.find(this_url) != -1:
@@ -258,7 +264,10 @@ def button_website(args):
   if "r1" in rel_path.parts:
     return True  # No website button for TF 1.x docs.
 
-  if str(docs_dir) == "site/zh-cn" or str(docs_dir) == "site/zh-tw":
+  user_url = args["user"].get("base_url")
+  if user_url:
+    base_url = user_url
+  elif str(docs_dir) == "site/zh-cn" or str(docs_dir) == "site/zh-tw":
     base_url = "https://tensorflow.google.cn/"
   else:
     base_url = "https://www.tensorflow.org/"
diff --git a/tools/tensorflow_docs/tools/nblint/style/tensorflow_docs_l10n.py b/tools/tensorflow_docs/tools/nblint/style/tensorflow_docs_l10n.py
index 4806b6090cf..84ed779b795 100644
--- a/tools/tensorflow_docs/tools/nblint/style/tensorflow_docs_l10n.py
+++ b/tools/tensorflow_docs/tools/nblint/style/tensorflow_docs_l10n.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/tools/tensorflow_docs/vis/__init__.py b/tools/tensorflow_docs/vis/__init__.py
index a8a4e6eed04..9043810d541 100644
--- a/tools/tensorflow_docs/vis/__init__.py
+++ b/tools/tensorflow_docs/vis/__init__.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/tools/tensorflow_docs/vis/embed.py b/tools/tensorflow_docs/vis/embed.py
index 4ff8623021e..720440cd7c4 100644
--- a/tools/tensorflow_docs/vis/embed.py
+++ b/tools/tensorflow_docs/vis/embed.py
@@ -1,4 +1,3 @@
-# Lint as: python3
 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/tools/tensorflow_docs/vis/webp_animation.py b/tools/tensorflow_docs/vis/webp_animation.py
deleted file mode 100644
index 9e6e5a9c03c..00000000000
--- a/tools/tensorflow_docs/vis/webp_animation.py
+++ /dev/null
@@ -1,157 +0,0 @@
-# Lint as: python3
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Easy notebook embedded webp animations.
-
-```
-import tensorflow_docs.vis.webp_animation as webp_animation
-
-env = gym.make('SpaceInvaders-v0')
-obs = env.reset()
-done = False
-n = 0
-
-anim = webp_animation.Webp()
-
-while not done:
-  img = env.render(mode = 'rgb_array')
-  anim.append(img)
-  act = env.action_space.sample() # take a random action
-  obs, reward, done, info = env.step(act)
-  n += 1
-
-anim.save("test.webp")
-anim
-```
-"""
-
-import numpy as np
-import PIL.Image
-
-from tensorflow_docs.vis import embed
-import webp
-
-
-class Webp(object):
-  """Builds a webp animation.
-
-  Attributes:
-    frame_rate: The default frame rate for appended images.
-    shape: The shape of the animation frames. Will default to the size of the
-      first image if not set.
-    result: The binary image data string. Once the animation has been used, it
-      can no longer updated. And the result field contains the webp encoded
-      data.
-  """
-
-  def __init__(self, shape=None, frame_rate=60.0, **options):
-    """A notebook-embedable webp animation.
-
-    Args:
-      shape: Optional. The image_shape of the animation. Defaults to the shape
-        of the first image if unset.
-      frame_rate: The default frame rate for the animation.
-      **options: Additional arguments passed to `WebPAnimEncoderOptions.new`.
-    """
-    self.frame_rate = frame_rate
-    self._timestamp_ms = 0
-    self._empty = True
-
-    if options is None:
-      options = {}
-
-    self._options = webp.WebPAnimEncoderOptions.new(**options)
-    self._encoder = None
-    self._shape = shape
-    self._result = None
-
-  def append(self, img, dt_ms=None):
-    """Append an image to the animation.
-
-    Args:
-      img: The image to add.
-      dt_ms: override the animation frame rate for this frame with a frame
-        length in ms.
-
-    Raises:
-      ValueError:
-        * if the video has already been "assembled" (used).
-        * if `img` does not match the shape of the animation.
-    """
-    if self._result is not None:
-      raise ValueError(
-          "Can't append to an animation after it has been \"assembled\" (used)."
-      )
-    self._empty = False
-
-    if not isinstance(img, PIL.Image.Image):
-      img = np.asarray(img)
-      img = PIL.Image.fromarray(img)
-
-    if self._shape is None:
-      self._shape = img.size
-
-    if self._encoder is None:
-      self._encoder = webp.WebPAnimEncoder.new(self.shape[0], self.shape[1],
-                                               self._options)
-
-    if img.size != self.shape:
-      raise ValueError("Image shape does not match video shape")
-
-    img = webp.WebPPicture.from_pil(img)
-
-    self._encoder.encode_frame(img, int(self._timestamp_ms))
-
-    if dt_ms is None:
-      self._timestamp_ms += 1000 * (1.0 / self.frame_rate)
-    else:
-      self._timestamp_ms += dt_ms
-
-  def extend(self, imgs, dt_ms=None):
-    """Extend tha animation with an iterable if images.
-
-    Args:
-      imgs: An iterable of images, to pass to `.append`.
-      dt_ms: Override the animation frame rate for these frames with a frame
-        length in ms.
-    """
-    for img in imgs:
-      self.append(img, dt_ms=dt_ms)
-
-  @property
-  def result(self):
-    result = self._result
-    if result is None:
-      anim_data = self._encoder.assemble(int(self._timestamp_ms))
-      result = anim_data.buffer()
-      self._result = result
-    return result
-
-  @property
-  def shape(self):
-    """The shape of the animation. Read only once set."""
-    return self._shape
-
-  def _repr_html_(self):
-    """Notebook display hook, embed the image in an <img> tag."""
-    if self._empty:
-      return "Empty Animation"
-
-    return embed.embed_data("image/webp", self.result)._repr_html_()  # pylint: disable=protected-access,
-
-  def save(self, filename):
-    """Write the webp data to a file."""
-    with open(filename, "wb") as f:
-      f.write(self.result)
diff --git a/tools/tensorflow_docs/vis/webp_test.py b/tools/tensorflow_docs/vis/webp_test.py
deleted file mode 100644
index a3cc4000d8f..00000000000
--- a/tools/tensorflow_docs/vis/webp_test.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# Lint as: python3
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for tensorflow_docs.vis.webp."""
-
-import os
-
-from absl.testing import absltest
-
-import numpy as np
-import PIL.Image
-
-from tensorflow_docs.vis import webp_animation
-
-
-class WebpTest(absltest.TestCase):
-
-  def test_smoke(self):
-    workdir = self.create_tempdir().full_path
-
-    img = PIL.Image.fromarray(np.zeros([10, 12, 3], dtype=np.uint8))
-    anim = webp_animation.Webp()
-
-    anim.append(img)
-    anim.extend([img])
-    anim.save(os.path.join(workdir, 'test.webp'))
-
-
-if __name__ == '__main__':
-  absltest.main()